import json from pathlib import Path import re from bs4 import BeautifulSoup dimensions = [] categories = [] cat_expressions = [] advanced_subset = [] report_specs = [] with open("cognos7/data/ppx/1_3_Kostenstellenbericht_Vormonat.ppx", "rb") as frh: bs = BeautifulSoup(frh, "xml", from_encoding="latin-1") cube_name = bs.find("PPDSConnection")["Cube"] cube_ids_file = "cognos7/data/ppx/" + cube_name[:-4] + "_ids.json" cat_name_to_label = {} if Path(cube_ids_file).exists(): with open(cube_ids_file, "r") as frh: cat_name_to_label = json.load(frh) for entry in bs.find("DataPool"): if entry.name is None: continue if entry.name == "CatExp": cat_expressions.append( { "ID": entry["ExpId"], "Operation": entry.Op["Code"], "Elements": [e["RefId"] for e in entry.find_all("Ref")], } ) elif entry.name == "Dimension": dimensions.append({"Name": entry["Code"], "ID": entry["DimIdx"]}) elif entry.name == "Level": categories.append( { "ID": entry["LevelId"], "Name": entry["Code"], "Dimension": entry["DimIdx"], } ) elif entry.name == "PPDSID": categories.append( { "ID": entry["CatId"], "Name": entry["Code"], "Dimension": entry["DimIdx"], } ) for entry in bs.find("QueryList"): if entry.name is None: continue advanced_subset.append( { "ID": entry["Key"], "Name": entry["Alias"], "Dimension": entry["Dimension"], "Level": entry.find("Level")["RefId"], } ) dim_id_to_name = dict([(e["ID"], e["Name"]) for e in dimensions]) cat_id_to_name = dict( [ (e["ID"], dim_id_to_name[e["Dimension"]] + "//" + e["Name"]) for e in categories ] ) cat_exp_to_name = dict( [ ( e["ID"], e["Operation"] + "('" + "', '".join([cat_id_to_name[id] for id in e["Elements"]]) + "')", ) for e in cat_expressions ] ) query_to_name = dict([(e["ID"], e["Name"]) for e in advanced_subset]) query_to_members = dict( [ ( e["ID"], cat_id_to_name[e["Level"]], ) for e in advanced_subset ] ) for entry in bs.find_all("ReportAxis"): if entry.name is None: continue for level, group in enumerate(entry.find_all("NestGroup")): for pos, category in enumerate(group.find_all("Category")): if category.get("QueryKey") is not None: report_specs.append( { "ID": category["QueryKey"], "Name": "members( '" + query_to_members[category["QueryKey"]] + "' )", "Axis": entry["Identifier"], "Level": str(level), "Position": str(pos), "Label": query_to_name[category["QueryKey"]], "Width": category.get("ColExtent", ""), } ) elif re.search(r"T\d+", category["RefId"]): report_specs.append( { "ID": category["RefId"], "Name": cat_exp_to_name[category["RefId"]], "Type": entry["Identifier"], "Level": str(level), "Position": str(pos), "Label": category.get("Label", ""), "Width": category.get("ColExtent", ""), } ) else: report_specs.append( { "ID": category["RefId"], "Name": cat_id_to_name[category["RefId"]], "Type": entry["Identifier"], "Level": str(level), "Position": str(pos), "Label": category.get( "Label", cat_name_to_label.get( cat_id_to_name[category["RefId"]], "" ), ), "Width": category.get("ColExtent", ""), } ) with open("cognos7/data/ppx/export.json", "w") as fwh: json.dump( { "Dim": dim_id_to_name, "Cat": cat_id_to_name, "Exp": cat_exp_to_name, "Sub": query_to_name, "Specs": report_specs, }, fwh, indent=2, )