import pandas as pd import numpy as np from functools import reduce debug = False csv_file = "data/offene_auftraege_eds_c11.csv" cols_pkey = ["Hauptbetrieb", "Standort", "Nr", "Auftragsdatum"] cols_str = [ "Serviceberater", "Order Number", "Fabrikat", "Model", "Fahrzeug", "Kostenstelle", "Marke", "Kunde", "Turnover_Type_Desc", ] cols_float = [ "Durchg\u00e4nge (Auftrag)", "Arbeitswerte", "Teile", "Fremdl.", "Anzahl Tage", ] def update(d, other): d.update(dict(dict(other))) return d def get_dict(cols, type): return dict(dict(zip(cols, [type] * len(cols)))) cols_dict = reduce( update, ( get_dict(cols_pkey, np.str), get_dict(cols_str, np.str), get_dict(cols_float, np.float), ), {}, ) df = pd.read_csv( csv_file, decimal=",", sep=";", encoding="latin-1", usecols=cols_dict.keys(), dtype=cols_dict, ) df["pkey"] = reduce(lambda x, y: x + "_" + df[y], cols_pkey, "") df_sum = df.groupby("pkey").sum() df_unique = df[cols_pkey + cols_str + ["pkey"]].drop_duplicates() df_join = df_sum.join(df_unique.set_index("pkey"), rsuffix="_other") df_join["Gesamt"] = df_join["Arbeitswerte"] + df_join["Teile"] + df_join["Fremdl."] df_result = df_join[(df_join["Gesamt"] != 0) & (df_join["Serviceberater"] != "")] with open("data/offene_auftraege.json", "w") as f: f.write(df_result.to_json(orient="split", indent=2)) print(df_result.shape)