import pandas as pd import numpy as np import xml.etree.ElementTree as ET import json import csv import re from bs4 import BeautifulSoup from functools import reduce config = { 'path': 'c:/projekte/python/gcstruct', 'path2': 'c:/projekte/python/gcstruct', 'output': 'gcstruct.json', 'default': [], 'special': {}, 'special2': { 'Planner': ['Kostenstelle', 'Ebene1', 'Ebene2'], 'Test': ['Ebene1', 'Ebene2'] } } columns = ['Konto_Nr', 'Konto_Bezeichnung', 'Konto_Art', 'Konto_KST', 'Konto_STK', 'Konto_1', 'Konto_2', 'Konto_3', 'Konto_4', 'Konto_5'] json_result = {'accounts': {}, 'tree': {}, 'flat': {}} def get_tree_root(node, structure): id = ';' * 9 return { 'id': id, 'text': node.attrib['Name'], 'children': get_tree(node, [], structure), 'parents': [], 'accounts': [], 'level': 0, 'form': '' } def get_tree(node, parents, structure): result = [] for child in node: p = get_parents_list(parents) parents.append(child.attrib['Name']) id = ';'.join(parents) + ';' * (10 - len(parents)) result.append({ 'id': id, 'text': child.attrib['Name'], 'children': get_tree(child, parents, structure), 'parents': p, 'accounts': get_accounts(structure, id), 'level': len(parents), 'form': child.attrib.get('Split', '') }) parents.pop() return result def get_flat(node): result = [{ 'id': node['id'], 'text': node['text'], 'children': [x['id'] for x in node['children']], 'children2': [], 'parents': node['parents'], 'accounts': node['accounts'], 'costcenter': '', 'level': node['level'], 'drilldown': node['level'] < 2, # (node['level'] != 2 and len(node['accounts']) == 0), 'form': node['form'], 'accountlevel': False, 'absolute': True, 'seasonal': True, 'status': "0", 'values': [], 'values2': {} }] for child in node['children']: result += get_flat(child) return result def get_accounts(structure, id): return [x['Konto_Nr'] for x in json_result['accounts'] if x[structure] == id] def get_parents_list(p_list): id = ';'.join(p_list) + ';' * (10 - len(p_list)) if len(p_list) > 0: return [id] + get_parents_list(p_list[:-1]) return [';' * 9] def structure_from_tree(node): result = [] result.append(node['id']) for child in node['children']: result.extend(structure_from_tree(child)) return result def xml_from_tree(xml_node, tree_node): for child in tree_node['children']: element = ET.SubElement(xml_node, 'Ebene') element.set("Name", child['text']) xml_from_tree(element, child) def split_it(text, index): try: return re.findall(r'([^;]+) - ([^;]*);;', text)[0][index] except Exception: return '' def last_layer(text): try: return re.findall(r'([^;]+);;', text)[0] except Exception: return '' def get_default_cols(i): return ['Ebene' + str(i) for i in range(i * 10 + 1, (i + 1) * 10 + 1)] def get_structure_and_tree(struct): max_rows = (len(config['default']) + 1) * 10 with open(f"{config['path']}/Kontenrahmen/Kontenrahmen.csv", 'r', encoding='ansi') as f: csv_reader = csv.reader(f, delimiter=';') imported_csv = [row[:max_rows] for row in csv_reader] # df = pd.read_csv(f"{config['path']}/Kontenrahmen/Kontenrahmen.csv", sep=";", encoding="ansi", quoting=3, converters={i: str for i in range(200)}) # , index_col="Konto_Nr") df = pd.DataFrame.from_records(np.array(imported_csv[1:], dtype='object'), columns=imported_csv[0]).fillna(value='') df = df.rename(columns={'Kostenstelle': 'Konto_KST', 'STK': 'Konto_STK'}) # print(df.head()) for i, (s, cols) in enumerate(struct.items()): df[s] = reduce(lambda x, y: x + ";" + df[y], cols, '') df[s] = df[s].apply(lambda x: x[1:]) df['LetzteEbene' + str(i + 1)] = df[s].apply(lambda x: last_layer(x)) df['LetzteEbene' + str(i + 1) + '_Nr'] = df[s].apply(lambda x: split_it(x, 0)) df['LetzteEbene' + str(i + 1) + '_Bez'] = df[s].apply(lambda x: split_it(x, 1)) # len_items = len(struct) # df = df[columns + [*struct] + ['LetzteEbene' + str(i + 1) + '_Nr' for i in range(len_items)] + ['LetzteEbene' + str(i + 1) + '_Bez' for i in range(len_items)] + ['LetzteEbene' + str(i + 1) for i in range(len_items)]] json_result['accounts'] = df.to_dict('records') # df2 = pd.read_csv("config['path'] + "/Strukturen/Kontenrahmen.csv/SKR51.csv", sep=";", encoding="ansi", decimal=",", converters={i: str for i in range(0, 200)}) # print(df2.head()) for i, (s, cols) in enumerate(struct.items()): try: tree = ET.parse(f"{config['path']}/Xml/{s}.xml") json_result["tree"][s] = get_tree_root(tree.getroot(), s) except FileNotFoundError: print('XML-Datei fehlt') used_entries = [x.split(";")[1:] for x in set(df[s].to_numpy())] print(used_entries) root = ET.Element('Ebene') root.set('Name', s) json_result['tree'][s] = get_tree_root(root, s) # json_result["tree"][s] = get_tree_from_accounts(cols, []) json_result['flat'][s] = get_flat(json_result['tree'][s]) json.dump(json_result, open(f"{config['path2']}/{config['output']}", 'w'), indent=2) def post_structure_and_tree(): json_post = json.load(open(f"{config['path']}/{config['output']}", 'r')) # Kontenrahmen.csv ebenen = ['Ebene' + str(i) for i in range(1, len(config['default']) * 10 + 1)] header = ';'.join(columns + ebenen) cols = columns + config['default'] with open(config['path'] + '/Kontenrahmen/Kontenrahmen_out.csv', 'w', encoding='ansi') as f: f.write(header + '\n') for row in json_post['Kontenrahmen']: f.write(';'.join([row[e] for e in cols]) + '\n') # print(header) # xml und evtl. Struktur.csv for i, s in enumerate(config['default']): with open(f"{config['path']}/Strukturen/Kontenrahmen.csv/{s}_out.csv", 'w', encoding='ansi') as f: f.write(';'.join(['Ebene' + str(i * 10 + j) for j in range(1, 11)]) + '\n') rows = structure_from_tree({'id': ";" * 9, 'children': json_post[s]}) f.write('\n'.join(rows)) # with open(config['path'] + "/Strukturen/Kontenrahmen.csv/" + structure + "_2.csv", "w", encoding="ansi") as f: root = ET.Element('Ebene') root.set('Name', s) xml_from_tree(root, {'id': ";" * 9, 'children': json_post[s]}) with open(f"{config['path']}/Xml/{s}_out.xml", 'w', encoding='utf-8') as f: f.write(BeautifulSoup(ET.tostring(root), 'xml').prettify()) def main(struct_path): config['path'] = struct_path # config['path2'] = struct_path cfg = ET.parse(f"{config['path']}/config/config.xml") config['default'] = [s.find('Name').text for s in cfg.getroot().find('Strukturdefinitionen').findall('Struktur')] struct = dict([(x, get_default_cols(i)) for (i, x) in enumerate(config['default'])]) struct.update(config['special']) print(struct) get_structure_and_tree(struct) # post_structure_and_tree() if __name__ == '__main__': # main('c:/projekte/gcstruct_dresen') main('P:\\SKR51_GCStruct\\GCStruct_Siebrecht') # main('c:/projekte/python/gcstruct') # "c:/projekte/python/gcstruct_reisacher_planung", # "X:/Robert/Planung Reisacher/GCStruct_neue_Struktur_Planung"