# from itertools import chain import json import re from collections import defaultdict from pathlib import Path import pandas as pd from bs4 import BeautifulSoup from config import Config def get_path_info(base_dir): path_info_df = pd.read_csv( base_dir + "/logs/status/path_info.csv", sep=";", encoding="latin-1", converters={"process": str} ) path_info_df.rename(columns={"name": "filename"}, inplace=True) path_info_df["filename"] = path_info_df["filename"].str.lower() path_info_df.set_index("filename", inplace=True, drop=False) return path_info_df.to_dict(orient="index") def get_cubes(base_dir, cfg: Config, path_info, cubes_models): ver_files = [k for k in path_info.keys() if re.search(r"\\cubes\\.*\.ver", k)] cubes = {} for file in ver_files: match = re.search(r"\\cubes\\(.*)__\d+\.ver$", file) cube_name = match[1] cube_subdir = f"{file[:-4]}\\{cube_name}.mdc" cube_out = f"{cfg.system_dir}\\cube_out\\{cube_name}.mdc" cubes[cube_name] = { "deployed_mdc": path_info[cube_subdir], "cube_out_mdc": path_info[cube_out], "model": cubes_models[cube_name], "errors": [], } return cubes def get_models(base_dir, cfg: Config, path_info, fm_sources): models = {} for file in Path(base_dir + "\\config\\models").glob("*.log"): with open(file, "r") as frh: model_infos = frh.read().lower().replace('"', "").replace(",", "").split("--") datasources = model_infos[1].split("\n") datasources = [d for d in datasources if d != ""] fm_src = [] fm_package = None if "[" in datasources[0]: fm_package = datasources.pop(0).upper() fm_src = datasources datasources = [] for src in fm_src: if src in fm_sources: datasources.extend(fm_sources[src]) datasources = sorted(set(datasources)) cube = re.search(r"\\cube_out\\(.*)\.mdc", model_infos[0])[1] models[file.name[:-8].lower()] = { "framework_manager": "J" if fm_package else "N", "fm_package": fm_package, "fm_sources": fm_src, "datasources": datasources, "cube": cube, "logfile_tasks": path_info.get( cfg.portal_dir + "\\tasks\\logs\\" + file.name[:-8] + ".log", {"mtime": "0"} ), "logfile_system": path_info.get(cfg.system_dir + "\\logs\\" + file.name[:-8] + ".log", {"mtime": "0"}), "model_file": path_info.get(cfg.system_dir + "\\models\\" + file.name[:-4], {"mtime": "0"}), "model_file_filled": path_info.get( cfg.system_dir + "\\models\\gefuellt\\" + file.name[:-4], {"mtime": "0"} ), } return models def get_database_info(base_dir, cfg: Config): db_info_df = pd.read_csv(base_dir + "/logs/status/db_info.csv", sep=";", encoding="latin-1") db_info_df = db_info_df[db_info_df["DatabaseName"] == "GC"] db_info_df["table"] = db_info_df["TableName"].str.lower() db_info_df.set_index("table", inplace=True) return db_info_df.to_dict(orient="index") def get_fm_sources(base_dir, cfg): bs = BeautifulSoup(open(base_dir + "\\config\\fm\\model.xml", "r"), "xml") sources = defaultdict(list) for item in bs.find_all("queryItem"): p = item.parent.parent.find("name").string if item.parent.parent.name == "folder": p = item.parent.parent.parent.find("name").string parent = "[{0}].[{1}]".format(p, item.parent.find("name").string) src = "" exp = "" if item.expression: if item.expression.refobj: src = item.expression.refobj.string else: exp = item.expression.string elif item.externalName: exp = item.externalName.string sources[parent].append((item.find("name").string, src, exp)) interface = {} for k, fields in sources.items(): if "[Schnittstelle]" not in k: continue key = k.split(".")[-1][1:-1].lower() links = [] for field in fields: links.append(follow_links(sources, field, "")) interface[key] = sorted(list(set([re.search(r"\.\[(.*)\]$", e)[1].lower() for e in links if "[Import]" in e]))) return interface def follow_links(sources, field, value): if field[1] == "": if field[2] == field[0]: return value return field[2] match = re.search(r"(\[.*\]\.\[.*\])\.\[(.*)\]", field[1]) key1 = match[1] val1 = match[2] if key1 in sources: for field2 in sources[key1]: if field2[0] != val1: continue return follow_links(sources, field2, key1) return key1 def get_datasources(base_dir, cfg, path_info): all_datasources = set( [re.search(r"\\iqd\\.*\\(.*)\.imr", k)[1] for k in path_info.keys() if re.search(r"\\iqd\\.*\\.*\.imr", k)] ) datasources = {} for ds in all_datasources: ds_search = f"\\{ds}.imr" imr_files = [ k for k in path_info.keys() if re.search(r"\\iqd\\.*\.imr", k) and ds_search in k and "austausch" not in k ] if len(imr_files) == 0: imr_file = "0.imr" else: imr_file = imr_files.pop(0) datasources[ds] = { "imr_file": path_info.get(imr_file, {"mtime": "0"}), "iqd_file": path_info.get(imr_file[:-4] + ".iqd", {"mtime": "0"}), "csv_file": path_info.get(cfg.system_dir + "\\export\\" + ds + ".csv", {"mtime": "0"}), "csv_file_iqd_folder": path_info.get(imr_file[:-4] + ".csv", {"mtime": "0"}), "duplicates": imr_files, } return datasources def cubes_to_models(models): models_sort = sorted( [ ( v.get("logfile_tasks")["mtime"], v.get("logfile_system")["mtime"], v.get("model_file_filled")["mtime"], v.get("model_file")["mtime"], k, v["cube"], ) for k, v in models.items() ] ) result = {} for m in models_sort: result[m[5]] = m[4] return result def main(): base_dir = "app/temp/unzipped/loeffler-c11_2022-07-08_112547" cfg = Config(str(Path(base_dir + "\\gaps.ini").absolute())) # Dateiliste path_info = get_path_info(base_dir) # random_bat_file = [k for k in path_info.keys() if re.search(r'\\Tasks\\.*\.bat', k)][0] # portal_dir = re.search(r'(.*)\\Tasks\\.*\.bat', random_bat_file)[1] # print(path_info) # Liste aller Cubes result = {} # Modelle und Datenquellen result["fm_sources"] = get_fm_sources(base_dir, cfg) result["models"] = get_models(base_dir, cfg, path_info, result["fm_sources"]) result["database"] = get_database_info(base_dir, cfg) result["datasources"] = get_datasources(base_dir, cfg, path_info) cubes_models = cubes_to_models(result["models"]) result["cubes"] = get_cubes(base_dir, cfg, path_info, cubes_models) # Cubes aktuell? # Rest aktuell? json.dump(result, open("app/temp/logs/export.json", "w"), indent=2) if __name__ == "__main__": main()