import pandas as pd import xml.etree.ElementTree as ET import json import re from bs4 import BeautifulSoup config = ["SKR51", "Herkunft_KST", "Absatzkanal", "Kostentraeger", "Marke", "Standort", "Manuelle_Konten"] columns = ["Konto_Nr", "Konto_Bezeichnung", "Konto_Art", "Kostenstelle", "STK", "Konto_1", "Konto_2", "Konto_3", "Konto_4", "Konto_5"] def get_tree(node, parents): result = [] for child in node: parents.append(child.attrib['Name']) result.append({ "id": ";".join(parents) + ";"*(10 - len(parents)), "text": child.attrib['Name'], "children": get_tree(child, parents) }) parents.pop() return result def structure_from_tree(node): result = [] result.append(node['id']) for child in node['children']: result.extend(structure_from_tree(child)) return result def xml_from_tree (xml_node, tree_node): for child in tree_node['children']: element = ET.SubElement(xml_node, "Ebene") element.set("Name", child['text']) xml_from_tree(element, child) def split_it(text, index): try: return re.findall(r"([^;]+) - ([^;]*);;", text)[0][index] except: return "" def get_structure_and_tree (): df = pd.read_csv("gcstruct/Kontenrahmen/Kontenrahmen.csv", sep=";", encoding="ansi", decimal=",", converters={i: str for i in range(0, 200)}) #, index_col="Konto_Nr") for i, structure in enumerate(config): first = i*10 + 1 df[structure] = df['Ebene'+str(first)] + ";" + df['Ebene'+str(first+1)] + ";" + df['Ebene'+str(first+2)] + ";" + df['Ebene'+str(first+3)] + ";" + df['Ebene'+str(first+4)] + ";" + df['Ebene'+str(first+5)] + ";" + df['Ebene'+str(first+6)] + ";" + df['Ebene'+str(first+7)] + ";" + df['Ebene'+str(first+8)] + ";" + df['Ebene'+str(first+9)] df['LetzteEbene' + str(i+1) + '_Nr'] = df[structure].apply(lambda x: split_it(x, 0)) df['LetzteEbene' + str(i+1) + '_Bez'] = df[structure].apply(lambda x: split_it(x, 1)) df = df[columns + config + ['LetzteEbene' + str(i+1) + '_Nr' for i in range(len(config))] + ['LetzteEbene' + str(i+1) + '_Bez' for i in range(len(config))]] json_result = { "Kontenrahmen": df.to_dict("records") } # df2 = pd.read_csv("gcstruct/Strukturen/Kontenrahmen.csv/SKR51.csv", sep=";", encoding="ansi", decimal=",", converters={i: str for i in range(0, 200)}) # print(df2.head()) for i, structure in enumerate(config): tree = ET.parse("gcstruct/Xml/" + structure + ".xml") json_result[structure] = get_tree(tree.getroot(), []) json.dump(json_result, open("gcstruct/SKR51.json", "w"), indent=2) def post_structure_and_tree (): json_post = json.load(open("gcstruct/SKR51.json", "r")) # Kontenrahmen.csv ebenen = ["Ebene" + str(i) for i in range(1, len(config)*10+1)] header = ";".join(columns + ebenen) cols = columns + config with open("gcstruct/Kontenrahmen/Kontenrahmen_out.csv", "w", encoding="ansi") as f: f.write(header + "\n") for row in json_post['Kontenrahmen']: f.write(";".join([row[e] for e in cols]) + "\n") # print(header) # xml und evtl. Struktur.csv for i, structure in enumerate(config): with open("gcstruct/Strukturen/Kontenrahmen.csv/" + structure + "_out.csv", "w", encoding="ansi") as f: f.write(";".join(["Ebene" + str(i*10 + j) for j in range(1, 11)]) + "\n") rows = structure_from_tree({"id": ";"*9, "children": json_post[structure] }) f.write("\n".join(rows)) #with open("gcstruct/Strukturen/Kontenrahmen.csv/" + structure + "_2.csv", "w", encoding="ansi") as f: root = ET.Element("Ebene") root.set("Name", structure) xml_from_tree(root, {"id": ";"*9, "children": json_post[structure] }) with open("gcstruct/Xml/" + structure + "_out.xml", "w", encoding="utf-8") as f: f.write(BeautifulSoup(ET.tostring(root), "xml").prettify()) get_structure_and_tree() # post_structure_and_tree()