123456789101112131415161718192021222324252627282930313233343536 |
- import pandas as pd
- import numpy as np
- from functools import reduce
- debug = False
- csv_file = 'data/offene_auftraege_eds_c11.csv'
- cols_pkey = ["Hauptbetrieb", "Standort", "Nr", "Auftragsdatum"]
- cols_str = ["Serviceberater", "Order Number", "Fabrikat", "Model", "Fahrzeug", "Kostenstelle", "Marke", "Kunde", "Turnover_Type_Desc"]
- cols_float = ["Durchg\u00e4nge (Auftrag)", "Arbeitswerte", "Teile", "Fremdl.", "Anzahl Tage"]
- def update(d, other):
- d.update(dict(dict(other)))
- return d
- def get_dict(cols, type):
- return dict(dict(zip(cols, [type] * len(cols))))
- cols_dict = reduce(update, (get_dict(cols_pkey, np.str), get_dict(cols_str, np.str), get_dict(cols_float, np.float)), {})
- df = pd.read_csv(csv_file, decimal=',', sep=';', encoding='latin-1', usecols=cols_dict.keys(), dtype=cols_dict)
- df['pkey'] = reduce(lambda x, y: x + '_' + df[y], cols_pkey, '')
- df_sum = df.groupby('pkey').sum()
- df_unique = df[cols_pkey + cols_str + ['pkey']].drop_duplicates()
- df_join = df_sum.join(df_unique.set_index('pkey'), rsuffix='_other')
- df_join['Gesamt'] = df_join['Arbeitswerte'] + df_join['Teile'] + df_join['Fremdl.']
- df_result = df_join[(df_join['Gesamt'] != 0) & (df_join['Serviceberater'] != '')]
- with open('data/offene_auftraege.json', 'w') as f:
- f.write(df_result.to_json(orient='split', indent=2))
- print(df_result.shape)
|