inspect_files.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200
  1. # from itertools import chain
  2. import json
  3. import re
  4. from collections import defaultdict
  5. from pathlib import Path
  6. import pandas as pd
  7. from bs4 import BeautifulSoup
  8. from config import Config
  9. def get_path_info(base_dir):
  10. path_info_df = pd.read_csv(
  11. base_dir + "/logs/status/path_info.csv", sep=";", encoding="latin-1", converters={"process": str}
  12. )
  13. path_info_df.rename(columns={"name": "filename"}, inplace=True)
  14. path_info_df["filename"] = path_info_df["filename"].str.lower()
  15. path_info_df.set_index("filename", inplace=True, drop=False)
  16. return path_info_df.to_dict(orient="index")
  17. def get_cubes(base_dir, cfg: Config, path_info, cubes_models):
  18. ver_files = [k for k in path_info.keys() if re.search(r"\\cubes\\.*\.ver", k)]
  19. cubes = {}
  20. for file in ver_files:
  21. match = re.search(r"\\cubes\\(.*)__\d+\.ver$", file)
  22. cube_name = match[1]
  23. cube_subdir = f"{file[:-4]}\\{cube_name}.mdc"
  24. cube_out = f"{cfg.system_dir}\\cube_out\\{cube_name}.mdc"
  25. cubes[cube_name] = {
  26. "deployed_mdc": path_info[cube_subdir],
  27. "cube_out_mdc": path_info[cube_out],
  28. "model": cubes_models[cube_name],
  29. "errors": [],
  30. }
  31. return cubes
  32. def get_models(base_dir, cfg: Config, path_info, fm_sources):
  33. models = {}
  34. for file in Path(base_dir + "\\config\\models").glob("*.log"):
  35. with open(file, "r") as frh:
  36. model_infos = frh.read().lower().replace('"', "").replace(",", "").split("--")
  37. datasources = model_infos[1].split("\n")
  38. datasources = [d for d in datasources if d != ""]
  39. fm_src = []
  40. fm_package = None
  41. if "[" in datasources[0]:
  42. fm_package = datasources.pop(0).upper()
  43. fm_src = datasources
  44. datasources = []
  45. for src in fm_src:
  46. if src in fm_sources:
  47. datasources.extend(fm_sources[src])
  48. datasources = sorted(set(datasources))
  49. cube = re.search(r"\\cube_out\\(.*)\.mdc", model_infos[0])[1]
  50. models[file.name[:-8].lower()] = {
  51. "framework_manager": "J" if fm_package else "N",
  52. "fm_package": fm_package,
  53. "fm_sources": fm_src,
  54. "datasources": datasources,
  55. "cube": cube,
  56. "logfile_tasks": path_info.get(
  57. cfg.portal_dir + "\\tasks\\logs\\" + file.name[:-8] + ".log", {"mtime": "0"}
  58. ),
  59. "logfile_system": path_info.get(cfg.system_dir + "\\logs\\" + file.name[:-8] + ".log", {"mtime": "0"}),
  60. "model_file": path_info.get(cfg.system_dir + "\\models\\" + file.name[:-4], {"mtime": "0"}),
  61. "model_file_filled": path_info.get(
  62. cfg.system_dir + "\\models\\gefuellt\\" + file.name[:-4], {"mtime": "0"}
  63. ),
  64. }
  65. return models
  66. def get_database_info(base_dir, cfg: Config):
  67. db_info_df = pd.read_csv(base_dir + "/logs/status/db_info.csv", sep=";", encoding="latin-1")
  68. db_info_df = db_info_df[db_info_df["DatabaseName"] == "GC"]
  69. db_info_df["table"] = db_info_df["TableName"].str.lower()
  70. db_info_df.set_index("table", inplace=True)
  71. return db_info_df.to_dict(orient="index")
  72. def get_fm_sources(base_dir, cfg):
  73. bs = BeautifulSoup(open(base_dir + "\\config\\fm\\model.xml", "r"), "xml")
  74. sources = defaultdict(list)
  75. for item in bs.find_all("queryItem"):
  76. p = item.parent.parent.find("name").string
  77. if item.parent.parent.name == "folder":
  78. p = item.parent.parent.parent.find("name").string
  79. parent = "[{0}].[{1}]".format(p, item.parent.find("name").string)
  80. src = ""
  81. exp = ""
  82. if item.expression:
  83. if item.expression.refobj:
  84. src = item.expression.refobj.string
  85. else:
  86. exp = item.expression.string
  87. elif item.externalName:
  88. exp = item.externalName.string
  89. sources[parent].append((item.find("name").string, src, exp))
  90. interface = {}
  91. for k, fields in sources.items():
  92. if "[Schnittstelle]" not in k:
  93. continue
  94. key = k.split(".")[-1][1:-1].lower()
  95. links = []
  96. for field in fields:
  97. links.append(follow_links(sources, field, ""))
  98. interface[key] = sorted(list(set([re.search(r"\.\[(.*)\]$", e)[1].lower() for e in links if "[Import]" in e])))
  99. return interface
  100. def follow_links(sources, field, value):
  101. if field[1] == "":
  102. if field[2] == field[0]:
  103. return value
  104. return field[2]
  105. match = re.search(r"(\[.*\]\.\[.*\])\.\[(.*)\]", field[1])
  106. key1 = match[1]
  107. val1 = match[2]
  108. if key1 in sources:
  109. for field2 in sources[key1]:
  110. if field2[0] != val1:
  111. continue
  112. return follow_links(sources, field2, key1)
  113. return key1
  114. def get_datasources(base_dir, cfg, path_info):
  115. all_datasources = set(
  116. [re.search(r"\\iqd\\.*\\(.*)\.imr", k)[1] for k in path_info.keys() if re.search(r"\\iqd\\.*\\.*\.imr", k)]
  117. )
  118. datasources = {}
  119. for ds in all_datasources:
  120. ds_search = f"\\{ds}.imr"
  121. imr_files = [
  122. k for k in path_info.keys() if re.search(r"\\iqd\\.*\.imr", k) and ds_search in k and "austausch" not in k
  123. ]
  124. if len(imr_files) == 0:
  125. imr_file = "0.imr"
  126. else:
  127. imr_file = imr_files.pop(0)
  128. datasources[ds] = {
  129. "imr_file": path_info.get(imr_file, {"mtime": "0"}),
  130. "iqd_file": path_info.get(imr_file[:-4] + ".iqd", {"mtime": "0"}),
  131. "csv_file": path_info.get(cfg.system_dir + "\\export\\" + ds + ".csv", {"mtime": "0"}),
  132. "csv_file_iqd_folder": path_info.get(imr_file[:-4] + ".csv", {"mtime": "0"}),
  133. "duplicates": imr_files,
  134. }
  135. return datasources
  136. def cubes_to_models(models):
  137. models_sort = sorted(
  138. [
  139. (
  140. v.get("logfile_tasks")["mtime"],
  141. v.get("logfile_system")["mtime"],
  142. v.get("model_file_filled")["mtime"],
  143. v.get("model_file")["mtime"],
  144. k,
  145. v["cube"],
  146. )
  147. for k, v in models.items()
  148. ]
  149. )
  150. result = {}
  151. for m in models_sort:
  152. result[m[5]] = m[4]
  153. return result
  154. def main():
  155. base_dir = "app/temp/unzipped/loeffler-c11_2022-07-08_112547"
  156. cfg = Config(str(Path(base_dir + "\\gaps.ini").absolute()))
  157. # Dateiliste
  158. path_info = get_path_info(base_dir)
  159. # random_bat_file = [k for k in path_info.keys() if re.search(r'\\Tasks\\.*\.bat', k)][0]
  160. # portal_dir = re.search(r'(.*)\\Tasks\\.*\.bat', random_bat_file)[1]
  161. # print(path_info)
  162. # Liste aller Cubes
  163. result = {}
  164. # Modelle und Datenquellen
  165. result["fm_sources"] = get_fm_sources(base_dir, cfg)
  166. result["models"] = get_models(base_dir, cfg, path_info, result["fm_sources"])
  167. result["database"] = get_database_info(base_dir, cfg)
  168. result["datasources"] = get_datasources(base_dir, cfg, path_info)
  169. cubes_models = cubes_to_models(result["models"])
  170. result["cubes"] = get_cubes(base_dir, cfg, path_info, cubes_models)
  171. # Cubes aktuell?
  172. # Rest aktuell?
  173. json.dump(result, open("app/temp/logs/export.json", "w"), indent=2)
  174. if __name__ == "__main__":
  175. main()