data.py 1.4 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. import pandas as pd
  2. import numpy as np
  3. from functools import reduce
  4. debug = False
  5. csv_file = "data/offene_auftraege_eds_c11.csv"
  6. cols_pkey = ["Hauptbetrieb", "Standort", "Nr", "Auftragsdatum"]
  7. cols_str = [
  8. "Serviceberater",
  9. "Order Number",
  10. "Fabrikat",
  11. "Model",
  12. "Fahrzeug",
  13. "Kostenstelle",
  14. "Marke",
  15. "Kunde",
  16. "Turnover_Type_Desc",
  17. ]
  18. cols_float = [
  19. "Durchg\u00e4nge (Auftrag)",
  20. "Arbeitswerte",
  21. "Teile",
  22. "Fremdl.",
  23. "Anzahl Tage",
  24. ]
  25. def update(d, other):
  26. d.update(dict(dict(other)))
  27. return d
  28. def get_dict(cols, type):
  29. return dict(dict(zip(cols, [type] * len(cols))))
  30. cols_dict = reduce(
  31. update,
  32. (
  33. get_dict(cols_pkey, np.str),
  34. get_dict(cols_str, np.str),
  35. get_dict(cols_float, np.float),
  36. ),
  37. {},
  38. )
  39. df = pd.read_csv(
  40. csv_file,
  41. decimal=",",
  42. sep=";",
  43. encoding="latin-1",
  44. usecols=cols_dict.keys(),
  45. dtype=cols_dict,
  46. )
  47. df["pkey"] = reduce(lambda x, y: x + "_" + df[y], cols_pkey, "")
  48. df_sum = df.groupby("pkey").sum()
  49. df_unique = df[cols_pkey + cols_str + ["pkey"]].drop_duplicates()
  50. df_join = df_sum.join(df_unique.set_index("pkey"), rsuffix="_other")
  51. df_join["Gesamt"] = df_join["Arbeitswerte"] + df_join["Teile"] + df_join["Fremdl."]
  52. df_result = df_join[(df_join["Gesamt"] != 0) & (df_join["Serviceberater"] != "")]
  53. with open("data/offene_auftraege.json", "w") as f:
  54. f.write(df_result.to_json(orient="split", indent=2))
  55. print(df_result.shape)