12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667 |
- import pandas as pd
- import numpy as np
- from functools import reduce
- debug = False
- csv_file = "data/offene_auftraege_eds_c11.csv"
- cols_pkey = ["Hauptbetrieb", "Standort", "Nr", "Auftragsdatum"]
- cols_str = [
- "Serviceberater",
- "Order Number",
- "Fabrikat",
- "Model",
- "Fahrzeug",
- "Kostenstelle",
- "Marke",
- "Kunde",
- "Turnover_Type_Desc",
- ]
- cols_float = [
- "Durchg\u00e4nge (Auftrag)",
- "Arbeitswerte",
- "Teile",
- "Fremdl.",
- "Anzahl Tage",
- ]
- def update(d, other):
- d.update(dict(dict(other)))
- return d
- def get_dict(cols, type):
- return dict(dict(zip(cols, [type] * len(cols))))
- cols_dict = reduce(
- update,
- (
- get_dict(cols_pkey, np.str),
- get_dict(cols_str, np.str),
- get_dict(cols_float, np.float),
- ),
- {},
- )
- df = pd.read_csv(
- csv_file,
- decimal=",",
- sep=";",
- encoding="latin-1",
- usecols=cols_dict.keys(),
- dtype=cols_dict,
- )
- df["pkey"] = reduce(lambda x, y: x + "_" + df[y], cols_pkey, "")
- df_sum = df.groupby("pkey").sum()
- df_unique = df[cols_pkey + cols_str + ["pkey"]].drop_duplicates()
- df_join = df_sum.join(df_unique.set_index("pkey"), rsuffix="_other")
- df_join["Gesamt"] = df_join["Arbeitswerte"] + df_join["Teile"] + df_join["Fremdl."]
- df_result = df_join[(df_join["Gesamt"] != 0) & (df_join["Serviceberater"] != "")]
- with open("data/offene_auftraege.json", "w") as f:
- f.write(df_result.to_json(orient="split", indent=2))
- print(df_result.shape)
|