import pandas as pd import numpy as np import xml.etree.ElementTree as ET import json import csv import re from bs4 import BeautifulSoup from functools import reduce config = { 'path': 'c:/projekte/python/gcstruct', 'path2': 'c:/projekte/python/gcstruct', 'output': 'gcstruct.json', 'default': [], 'special': {}, 'special2': { 'Planner': ['Kostenstelle', 'Ebene1', 'Ebene2'], 'Test': ['Ebene1', 'Ebene2'] }, 'columns': ['Konto_Nr', 'Konto_Bezeichnung', 'Konto_Art', 'Konto_KST', 'Konto_STK', 'Konto_1', 'Konto_2', 'Konto_3', 'Konto_4', 'Konto_5'], 'struct': {} } json_result = {'accounts': {}, 'tree': {}, 'flat': {}, 'struct_export': {}, 'skr51_vars': {}} def get_tree_root(node, structure): id = ';' * 9 return { 'id': id, 'text': node.attrib['Name'], 'children': get_tree(node, [], structure), 'parents': [], 'accounts': [], 'level': 0, 'form': '' } def get_tree(node, parents, structure): result = [] for child in node: p = get_parents_list(parents) parents.append(child.attrib['Name']) id = ';'.join(parents) + ';' * (10 - len(parents)) result.append({ 'id': id, 'text': child.attrib['Name'], 'children': get_tree(child, parents, structure), 'parents': p, 'accounts': get_accounts(structure, id), 'level': len(parents), 'form': child.attrib.get('Split', '') }) parents.pop() return result def get_flat(node): result = [{ 'id': node['id'], 'text': node['text'], 'children': [x['id'] for x in node['children']], 'children2': [], 'parents': node['parents'], 'accounts': node['accounts'], 'costcenter': '', 'level': node['level'], 'drilldown': node['level'] < 2, # (node['level'] != 2 and len(node['accounts']) == 0), 'form': node['form'], 'accountlevel': False, 'absolute': True, 'seasonal': True, 'status': "0", 'values': [], 'values2': {} }] for child in node['children']: result += get_flat(child) return result def get_accounts(structure, id): return [x['Konto_Nr'] for x in json_result['accounts'] if x[structure] == id] def get_parents_list(p_list): id = ';'.join(p_list) + ';' * (10 - len(p_list)) if len(p_list) > 0: return [id] + get_parents_list(p_list[:-1]) return [';' * 9] def structure_from_tree(node): result = [] result.append(node['id']) for child in node['children']: result.extend(structure_from_tree(child)) return result def xml_from_tree(xml_node, tree_node): for child in tree_node['children']: element = ET.SubElement(xml_node, 'Ebene') element.set("Name", child['text']) xml_from_tree(element, child) def split_it(text, index): try: return re.findall(r'([^;]+) - ([^;]*);;', text)[0][index] except Exception: return '' def last_layer(text): try: return re.findall(r'([^;]+);;', text)[0] except Exception: return '' def get_default_cols(i): return ['Ebene' + str(i) for i in range(i * 10 + 1, (i + 1) * 10 + 1)] def accounts_from_csv(struct): max_rows = (len(config['default']) + 1) * 10 with open(f"{config['path']}/Kontenrahmen/Kontenrahmen.csv", 'r', encoding='ansi') as f: csv_reader = csv.reader(f, delimiter=';') imported_csv = [row[:max_rows] for row in csv_reader] df = pd.DataFrame.from_records(np.array(imported_csv[1:], dtype='object'), columns=imported_csv[0]).fillna(value='') df = df.rename(columns={'Kostenstelle': 'Konto_KST', 'STK': 'Konto_STK'}) for i, (s, cols) in enumerate(struct.items()): df[s] = reduce(lambda x, y: x + ";" + df[y], cols, '') df[s] = df[s].apply(lambda x: x[1:]) df['LetzteEbene' + str(i + 1)] = df[s].apply(lambda x: last_layer(x)) df['LetzteEbene' + str(i + 1) + '_Nr'] = df[s].apply(lambda x: split_it(x, 0)) df['LetzteEbene' + str(i + 1) + '_Bez'] = df[s].apply(lambda x: split_it(x, 1)) return df def tree_from_xml(struct, df): result = {} for (s, cols) in struct.items(): try: tree = ET.parse(f"{config['path']}/Xml/{s}.xml") result[s] = get_tree_root(tree.getroot(), s) except FileNotFoundError: print('XML-Datei fehlt') used_entries = [x.split(";")[1:] for x in set(df[s].to_numpy())] print(used_entries) root = ET.Element('Ebene') root.set('Name', s) result[s] = get_tree_root(root, s) # json_result["tree"][s] = get_tree_from_accounts(cols, []) return result def get_structure_and_tree(): df = accounts_from_csv(config['struct']) json_result['accounts'] = df.to_dict('records') json_result['tree'] = tree_from_xml(config['struct'], df) for (s, cols) in config['struct'].items(): json_result['flat'][s] = get_flat(json_result['tree'][s]) for (s, entries) in json_result['flat'].items(): cols = config['struct'][s] df_temp = pd.DataFrame([x['id'].split(';') for x in entries], columns=cols) json_result['struct_export'][s] = df_temp.to_dict(orient='records') # json.dump(json_result, open(f"{config['path2']}/{config['output']}", 'w'), indent=2) return df def post_structure_and_tree(): json_post = json.load(open(f"{config['path']}/{config['output']}", 'r')) # Kontenrahmen.csv ebenen = ['Ebene' + str(i) for i in range(1, len(config['default']) * 10 + 1)] header = ';'.join(config['columns'] + ebenen) cols = config['columns'] + config['default'] with open(config['path'] + '/Kontenrahmen/Kontenrahmen_out.csv', 'w', encoding='ansi') as f: f.write(header + '\n') for row in json_post['Kontenrahmen']: f.write(';'.join([row[e] for e in cols]) + '\n') # print(header) # xml und evtl. Struktur.csv for i, s in enumerate(config['default']): with open(f"{config['path']}/Strukturen/Kontenrahmen.csv/{s}_out.csv", 'w', encoding='ansi') as f: f.write(';'.join(['Ebene' + str(i * 10 + j) for j in range(1, 11)]) + '\n') rows = structure_from_tree({'id': ";" * 9, 'children': json_post[s]}) f.write('\n'.join(rows)) # with open(config['path'] + "/Strukturen/Kontenrahmen.csv/" + structure + "_2.csv", "w", encoding="ansi") as f: root = ET.Element('Ebene') root.set('Name', s) xml_from_tree(root, {'id': ";" * 9, 'children': json_post[s]}) with open(f"{config['path']}/Xml/{s}_out.xml", 'w', encoding='utf-8') as f: f.write(BeautifulSoup(ET.tostring(root), 'xml').prettify()) def struct_config(struct_path): config['path'] = struct_path # config['path2'] = struct_path cfg = ET.parse(f"{config['path']}/config/config.xml") config['default'] = [s.find('Name').text for s in cfg.getroot().find('Strukturdefinitionen').findall('Struktur')] struct = dict([(x, get_default_cols(i)) for (i, x) in enumerate(config['default'])]) struct.update(config['special']) config['struct'] = struct def skr51_translate(): df = accounts_from_csv(config['struct']) translate = {'Konto_Nr': 'SKR51', 'Kostenstelle': 'KST', 'Absatzkanal': 'ABS', 'Kostenträger': 'KTR', 'Marke': 'MAR', 'Standort': 'STA'} df_translate = {} for i, (_, t_to) in enumerate(translate.items()): last = 'LetzteEbene' + str(i + 1) from_label = ['Konto_Nr', last, last + '_Nr', last + '_Bez', 'Ebene' + str(i * 10 + 1), 'Ebene' + str(i * 10 + 2)] to_label = [t_to, t_to + '_Ebene', t_to + '_Nr', t_to + '_Bez', 'Ebene1', 'Ebene2'] df_translate[t_to] = df[df[last + '_Nr'] != ''][from_label].rename(columns=dict(zip(from_label, to_label))) # print(df_translate[t_to].head()) df_source = pd.read_csv(f"{config['path']}/Export/Kontenrahmen_kombiniert.csv", decimal=',', sep=';', encoding='ansi', converters={i: str for i in range(0, 200)}) for t_from, t_to in translate.items(): if t_to == 'SKR51': df_source['SKR51'] = df_source['Konto_Nr'] # print(df_translate[t_to].info()) elif t_to == 'KTR': df_source['KTR'] = 'KTR_SC_' + df_source['Marke'] + '_' + df_source['Kostenträger'] df_source['KTR'] = np.where(df_source['Ebene1_KST'] == 'Neuwagen', 'KTR_NW_' + df_source['Marke'] + '_' + df_source['Kostenträger'], df_source[t_to]) df_source['KTR'] = np.where(df_source['Ebene1_KST'] == 'Teiledienst', 'KTR_TZ_' + df_source['Kostenträger'], df_source[t_to]) else: df_source[t_to] = df_source[t_from].apply(lambda x: t_to + '_' + x) df_source = df_source.merge(df_translate[t_to], how='left', on=[t_to], suffixes=(None, '_' + t_to)) df_source[t_to + '_Nr'] = np.where(df_source[t_to + '_Nr'].isna(), df_source[t_from], df_source[t_to + '_Nr']) # df_all[df_all['Ebene1'] == ] # print(df_source.head()) df_source['Konto_Nr_SKR51'] = df_source['MAR_Nr'] + '-' + df_source['STA_Nr'] + '-' + df_source['SKR51_Nr'] + '-' + df_source['KST_Nr'] + '-' + df_source['ABS_Nr'] + '-' + df_source['KTR_Nr'] df_source['Konto_Nr_Händler'] = df_source['Marke'] + '-' + df_source['Standort'] + '-' + df_source['Konto_Nr'] + '-' + df_source['Kostenstelle'] + '-' + df_source['Absatzkanal'] + '-' + df_source['Kostenträger'] df_source.to_csv(f"{config['path2']}/SKR51_Uebersetzung.csv", sep=';', encoding='ansi', index=False) from_label = ['MAR_Nr', 'STA_Nr', 'SKR51_Nr', 'KST_Nr', 'ABS_Nr', 'KTR_Nr', 'KTR_Ebene', 'Konto_Nr_Händler'] to_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger', 'Kostenträger_Ebene', 'Konto_Nr_Händler'] df_combined = df_source[from_label].rename(columns=dict(zip(from_label, to_label))) df_combined.to_csv(f"{config['path2']}/Kontenrahmen_kombiniert.csv", sep=';', encoding='ansi', index=False) def skr51_translate2(): df = accounts_from_csv(config['struct']) translate = {'Konto_Nr': 'SKR51', 'Kostenstelle': 'KST', 'Absatzkanal': 'ABS', 'Kostenträger': 'KTR', 'Marke': 'MAR', 'Standort': 'STA'} df_list = [] for i, s in enumerate(config['struct'].keys()): from_label = ['Konto_Nr', 'Ebene' + str(i * 10 + 1), 'Ebene' + str(i * 10 + 2), 'Ebene' + str(i * 10 + 3)] to_label = ['Konto_Nr', 'key', 'value', 'value2'] df_temp = df[from_label].rename(columns=dict(zip(from_label, to_label))) df_temp['key'] = '{' + s + '/' + df_temp['key'] + '}' df_list.append(df_temp[df_temp['value'] != '']) df_translate = pd.concat(df_list) print(df_translate.head()) df_source = pd.read_csv(f"{config['path']}/Export/Kontenrahmen_kombiniert.csv", decimal=',', sep=';', encoding='ansi', converters={i: str for i in range(0, 200)}) df_source = df_source.merge(df, how='left', on=['Konto_Nr']) rows = df_source.shape[0] df_source['value'] = '' for t_from, t_to in translate.items(): df_source[t_to] = t_to + '_' + df_source[t_from] for e in config['struct']['SKR51']: df_source = df_source.merge(df_translate, how='left', left_on=[t_to, e], right_on=['Konto_Nr', 'key'], suffixes=(None, '_' + t_to + '_' + e)) df_source[e] = np.where(df_source['value_' + t_to + '_' + e].notna(), df_source['value_' + t_to + '_' + e], df_source[e]) if df_source.shape[0] > rows: print(t_to + '_' + e + ': ' + str(df_source.shape[0])) # df_source[t_to + '_Nr'] = np.where(df_source[t_to + '_Nr'].isna(), df_source[t_from], df_source[t_to + '_Nr']) for e in config['struct']['SKR51']: # df_source[e] = np.where(df_source[e].str.startswith('{'), 'falsch', df_source[e]) # df_source[e].str.extract(r'/(.*)}') + df_source[e] = np.where(df_source[e] == '[KTR]', df_source['Kostenträger_Ebene'], df_source[e]) # df_all[df_all['Ebene1'] == ] print(df_source.head()) df_source['Konto_neu'] = df_source['Marke'] + '-' + df_source['Standort'] + '-' + df_source['Konto_Nr'] + '-' + df_source['Kostenstelle'] + '-' + df_source['Absatzkanal'] + '-' + df_source['Kostenträger'] + ' - ' + df_source['Konto_Bezeichnung'] df_source = df_source[['Konto', 'Konto_Nr_Händler', 'Konto_Bezeichnung', 'Konto_Art', 'Konto_KST', 'Konto_STK', 'Konto_1', 'Konto_2', 'Konto_3', 'Konto_4', 'Konto_5', 'Ebene1', 'Ebene2', 'Ebene3', 'Ebene4', 'Ebene5', 'Ebene6', 'Ebene7', 'Ebene8', 'Ebene9', 'Ebene10']] from_label = get_default_cols(0) to_label = get_default_cols(9) df_source = df_source.rename(columns=dict(zip(from_label, to_label))) # 'Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger', df_source.to_csv(f"{config['path2']}/SKR51_Uebersetzung.csv", sep=';', encoding='ansi', index=False) # df_source['Konto_Nr_SKR51'] = df_source['MAR_Nr'] + '-' + df_source['STA_Nr'] + '-' + df_source['SKR51_Nr'] + '-' + df_source['KST_Nr'] + '-' + df_source['ABS_Nr'] + '-' + df_source['KTR_Nr'] # from_label = ['MAR_Nr', 'STA_Nr', 'SKR51_Nr', 'KST_Nr', 'ABS_Nr', 'KTR_Nr', 'KTR_Ebene', 'Konto_Nr_Händler'] # to_label = ['Marke', 'Standort', 'Konto_Nr', 'Kostenstelle', 'Absatzkanal', 'Kostenträger', 'Kostenträger_Ebene', 'Konto_Nr_Händler'] # df_combined = df_source[from_label].rename(columns=dict(zip(from_label, to_label))) # df_combined.to_csv(f"{config['path2']}/Kontenrahmen_kombiniert.csv", sep=';', encoding='ansi', index=False) def skr51_vars(): get_structure_and_tree() cols = get_default_cols(0) df_list = [] for (s, entries) in json_result['flat'].items(): df = pd.DataFrame([x['id'].split(';') for x in entries], columns=cols) df['key'] = df[cols[0]].apply(lambda x: '{' + s + '/' + x + '}') df['value'] = df[cols[1]] df_list.append(df[['key', 'value']]) df = pd.concat(df_list) df_vars = df[df['value'] != ''] df_main = pd.DataFrame([x['id'].split(';') for x in json_result['flat']['SKR51']], columns=cols) df_main['value'] = '' for c in cols: df_main = df_main.merge(df_vars, how='left', left_on=c, right_on='key', suffixes=(None, '_' + c)) df_main[c] = np.where(df_main['value_' + c].isna(), df_main[c], df_main['value_' + c]) df_main[cols].to_csv(f"{config['path2']}/SKR51_Struktur.csv", sep=';', encoding='ansi', index_label='Sortierung') if __name__ == '__main__': # struct_config('c:/projekte/gcstruct_dresen') # struct_config('c:/projekte/python/gcstruct') # struct_config('c:/projekte/python/gcstruct_reisacher_planung') # struct_config('X:/Robert/Planung Reisacher/GCStruct_neue_Struktur_Planung') # struct_config('P:/SKR51_GCStruct/GCStruct_Siebrecht') struct_config('P:/SKR51_GCStruct/GCStruct_Portal') # print(config['struct']) # get_structure_and_tree() # post_structure_and_tree() # skr51_translate() skr51_translate2() # skr51_vars()