data.py 1.3 KB

12345678910111213141516171819202122232425262728293031323334
  1. import pandas as pd
  2. import numpy as np
  3. import json
  4. from functools import reduce
  5. debug = False
  6. csv_file = "data/offene_auftraege_eds_c11.csv"
  7. cols_pkey = ["Hauptbetrieb", "Standort", "Nr", "Auftragsdatum"]
  8. cols_str = ["Serviceberater", "Order Number", "Fabrikat", "Model", "Fahrzeug", "Kostenstelle", "Marke", "Kunde", "Turnover_Type_Desc"]
  9. cols_float = ["Durchg\u00e4nge (Auftrag)", "Arbeitswerte", "Teile", "Fremdl.", "Anzahl Tage"]
  10. def update(d, other):
  11. d.update(dict(dict(other)))
  12. return d
  13. def get_dict(cols, type):
  14. return dict(dict(zip(cols,[type] * len(cols))))
  15. cols_dict = reduce(update, (get_dict(cols_pkey, np.str), get_dict(cols_str, np.str), get_dict(cols_float, np.float)), {})
  16. df = pd.read_csv(csv_file, decimal=",", sep=";", encoding="ansi", usecols=cols_dict.keys(), dtype=cols_dict)
  17. df['pkey'] = reduce(lambda x, y: x + "_" + df[y], cols_pkey, "")
  18. df_sum = df.groupby("pkey").sum()
  19. df_unique = df[cols_pkey + cols_str + ['pkey']].drop_duplicates()
  20. df_join = df_sum.join(df_unique.set_index('pkey'), rsuffix='_other')
  21. df_join['Gesamt'] = df_join['Arbeitswerte'] + df_join['Teile'] + df_join['Fremdl.']
  22. df_result = df_join[(df_join['Gesamt'] != 0) & (df_join['Serviceberater'] != "")]
  23. with open("data/offene_auftraege.json", "w") as f:
  24. f.write(df_result.to_json(orient="split", indent=2))
  25. print(df_result.shape)