import pandas as pd import numpy as np from pathlib import Path import json base_dir = Path(__file__).parent.parent filename = base_dir / 'data' / 'Marketingplanung_AHR_2023_V1.xls' output = base_dir / 'export' / 'marketing_2023.json' output_csv = base_dir / 'export' / 'marketing_2023.csv' debug_csv = base_dir / 'export' / 'marketing_2023_rest.csv' department = { ' MM': '10', ' ULM': '40', ' LL': '50', 'KRU': '30', 'GZ': '55', 'AAM': '82', 'AAM-MOT': '81' } columns = [ 'Bezeichnung', 'Segment', 'Termin', 'Wert1', 'Wert2', 'Wert3', 'Wert4', 'Wert5', 'Wert6', 'Wert7', 'Wert8', 'Wert9', 'Wert10', 'Wert11', 'Wert12', 'Summe' ] defaults = ['', '', '', 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] types = [str] * 3 # + [float] * 13 xls = pd.read_excel( filename, sheet_name=list(department.keys()), skiprows=4, usecols='A:P', names=columns, converters=dict(zip(columns, types)), ) xls_temp = [] for sheet, dept_no in department.items(): xls[sheet]['Betrieb Nr'] = dept_no xls[sheet] = xls[sheet].fillna(value=dict(zip(columns, defaults))) xls[sheet]['Konto Nr'] = np.where(xls[sheet]['Segment'].str.startswith('Kto '), xls[sheet]['Segment'].str.slice(4, 9), 'Rest') # xls[sheet]['Konto Nr'] = np.where(xls[sheet]['Segment'].str.startswith('50% '), xls[sheet]['Segment'].str.slice(-5), xls[sheet]['Konto Nr']) temp = xls[sheet][xls[sheet]['Segment'].str.startswith('50% ')].copy() temp['Konto Nr'] = temp['Segment'].str.slice(8, 13) xls_temp.append(temp) temp2 = temp.copy() temp2['Konto Nr'] = temp2['Segment'].str.slice(-5) xls_temp.append(temp2) xls_temp_concat = pd.concat(xls_temp) for c in columns[3:]: xls_temp_concat[c] = xls_temp_concat[c] * 0.5 df = pd.concat(list(xls.values()) + [xls_temp_concat]) df_rest = df[(df['Konto Nr'] == 'Rest') & (df['Summe'] != 0)] df_rest.to_csv(debug_csv, sep=';', decimal=',') df = df[df['Konto Nr'] != 'Rest'] df['Wert11'] = df['Wert11'].astype(float) df['Wert12'] = df['Wert12'].astype(float) for c in columns[3:]: df[c] = df[c].round() df = df.drop(labels=['Summe'], axis=1) df = df.groupby(['Konto Nr', 'Betrieb Nr']).sum() print(df.info()) # df.reset_index(inplace=True) df.to_csv(output_csv, sep=';', decimal=',') dict_split = df.to_dict(orient='split') res = {} for index, data in zip(dict_split['index'], dict_split['data']): kto, dept = index if kto not in res: res[kto] = {} res[kto][dept] = data json.dump(res, open(output, 'w'), indent=2)