mdl_convert.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309
  1. import json
  2. import os
  3. import re
  4. from pathlib import Path
  5. CONVERSION = [
  6. "Name",
  7. "CognosSource",
  8. "CognosPackageDatasourceConnection",
  9. "DataSource",
  10. "OrgName",
  11. "Dimension",
  12. "Root",
  13. "Drill",
  14. "Levels",
  15. "Associations",
  16. "Category",
  17. "SpecialCategory",
  18. "MapDrills",
  19. "ViewName",
  20. "Measure",
  21. "Signon",
  22. "Cube",
  23. "CustomView",
  24. "CustomViewChildList",
  25. "SecurityNameSpace",
  26. "SecurityObject",
  27. "AllocationAdd",
  28. ]
  29. id_lookup = {}
  30. find_words = re.compile(r'("[^"]+"|\w+) ')
  31. def convert_block(block):
  32. block = block.replace("\n", "")
  33. block_type = block.split(" ")[0]
  34. words = find_words.findall(block)
  35. if len(words) < 3:
  36. return {"Type": block_type}
  37. result = {"Type": words[0], "ID": words[1], "Name": words[2].strip('"')}
  38. offset = 0
  39. for i in range(3, len(words), 2):
  40. if len(words) < i + offset + 2:
  41. break
  42. key = words[i + offset]
  43. if key in ["PackageReportSource", "Database"]:
  44. result[key] = {
  45. "ID": words[i + offset + 1],
  46. "Name": words[i + offset + 2].strip('"'),
  47. }
  48. offset += 1
  49. elif key in ["DimensionView"]:
  50. if key + "s" not in result:
  51. result[key + "s"] = []
  52. result[key + "s"].append({"ID": words[i + offset + 1], "Name": words[i + offset + 2].strip('"')})
  53. offset += 1
  54. elif key in ["MeasureInclude"]:
  55. if key + "s" not in result:
  56. result[key + "s"] = []
  57. result[key + "s"].append({"ID": words[i + offset + 1], "Include": words[i + offset + 2]})
  58. offset += 1
  59. elif key == "Calc":
  60. for j in range(i + offset + 1, len(words)):
  61. if words[j] in ["Sign", "Format", "Filtered"] or j == len(words) - 1:
  62. result["Calc"] = " ".join(words[i + offset + 1 : j])
  63. offset = j - i - 1
  64. break # for
  65. elif key == "EncryptedPW":
  66. result["EncryptedPW"] = words[i + offset + 1].strip('"')
  67. result["Salt"] = words[i + offset + 2].strip('"')
  68. offset += 1
  69. elif key == "AllocationAdd":
  70. if key + "s" not in result:
  71. result[key + "s"] = []
  72. result[key + "s"].append({"Measure": words[i + offset + 2], "Type": words[i + offset + 4]})
  73. offset += 3
  74. elif key in [
  75. "CustomViewList",
  76. "DrillThrough",
  77. "DeployLocations",
  78. "PowerCubeCustomViewList",
  79. "StartList",
  80. "TransientLevelList",
  81. ]:
  82. for j in range(i + offset + 1, len(words)):
  83. if words[j] in ["EndList"]:
  84. result[key] = " ".join(words[i + offset + 1 : j])
  85. offset = j - i - 1
  86. break # for
  87. # elif words[i + offset].isnumeric() or words[i + offset].startswith('"'):
  88. # offset += 1
  89. else:
  90. result[key] = words[i + offset + 1].strip('"')
  91. if block_type == "DataSource":
  92. result["Columns"] = []
  93. if block_type in ["OrgName", "Levels", "Measure"]:
  94. result["Associations"] = []
  95. if block_type == "Dimension":
  96. result["Root"] = {}
  97. result["Levels"] = []
  98. result["Categories"] = []
  99. result["SpecialCategories"] = []
  100. if block_type == "Root":
  101. result["Drill"] = {}
  102. if block_type == "Associations":
  103. result["Parent"] = 0
  104. if block_type == "CustomView":
  105. result["ChildList"] = {}
  106. if block_type == "SecurityNameSpace":
  107. result["Objects"] = []
  108. return result
  109. def remove_ids(nested):
  110. nested.pop("ID", "")
  111. nested.pop("DateDrill", "")
  112. nested.pop("Primary", "")
  113. nested.pop("Lastuse", "")
  114. nested.pop("AssociationContext", "")
  115. if nested.get("Type", "") == "SpecialCategory" and "Label" in nested and "20" in nested["Label"]:
  116. nested.pop("Label", "")
  117. for col in ["Parent", "Levels", "CustomViewList"]:
  118. if col not in nested:
  119. continue
  120. if col == "Levels" and (isinstance(nested["Levels"], list) or nested["Levels"] == "0"):
  121. continue
  122. nested[col] = id_lookup.get(nested[col], {}).get("Name", "undefined")
  123. for child in nested.values():
  124. if isinstance(child, dict):
  125. remove_ids(child)
  126. if isinstance(child, list):
  127. for entry in child:
  128. remove_ids(entry)
  129. return nested
  130. def prepare_mdl_str(mdl_str):
  131. mdl_str = re.sub(r"\n+", "\n", mdl_str)
  132. mdl_str = re.sub(r"^\n?Name ", "ModelName 1 ", mdl_str)
  133. mdl_str = re.sub(r'\nLevels (\d+ [^"])', r"Levels \1", mdl_str)
  134. mdl_str = re.sub(r" Associations ", " \nAssociations ", mdl_str)
  135. mdl_str = re.sub(r'([^ ])""', r"\1'", mdl_str)
  136. mdl_str = re.sub(r'""([^ ])', r"'\1", mdl_str)
  137. tags = "|".join(CONVERSION)
  138. mdl_str = re.sub(r"\n(" + tags + r") ", r"\n\n\1 ", mdl_str)
  139. return mdl_str
  140. def group_mdl_blocks(converted):
  141. result = {
  142. "Model": {},
  143. "Connections": [],
  144. "DataSources": [],
  145. "Dimensions": [],
  146. "Measures": [],
  147. "Signons": [],
  148. "CustomViews": [],
  149. "Security": [],
  150. "Cubes": [],
  151. }
  152. types = [c["Type"] for c in converted]
  153. ids = [c.get("ID", "0") for c in converted]
  154. id_lookup.update(dict(zip(ids, converted)))
  155. current = None
  156. level_ids = []
  157. for c, t in zip(converted, types):
  158. if t in [""]:
  159. continue
  160. if t in ["Category", "SpecialCategory"] and result["Dimensions"][-1]["Name"] == "Zeit":
  161. if t == "Category" or c["Name"][0].isnumeric():
  162. continue
  163. if t in ["ModelName"]:
  164. result["Model"] = c
  165. elif t in ["CognosSource", "CognosPackageDatasourceConnection"]:
  166. result["Connections"].append(c)
  167. elif t in ["DataSource"]:
  168. result["DataSources"].append(c)
  169. elif t in ["OrgName"]:
  170. result["DataSources"][-1]["Columns"].append(c)
  171. elif t in ["Dimension"]:
  172. level_ids = []
  173. result["Dimensions"].append(c)
  174. elif t in ["Root"]:
  175. result["Dimensions"][-1]["Root"] = c
  176. elif t in ["Drill"]:
  177. result["Dimensions"][-1]["Root"]["Drill"] = c
  178. elif t in ["Levels"]:
  179. current = c
  180. level_ids.append(c["ID"])
  181. result["Dimensions"][-1]["Levels"].append(c)
  182. elif t in ["Category"]:
  183. if c.get("Levels", "") in level_ids[0:2]:
  184. result["Dimensions"][-1]["Categories"].append(c)
  185. elif t in ["SpecialCategory"]:
  186. result["Dimensions"][-1]["SpecialCategories"].append(c)
  187. elif t in ["Measure"]:
  188. current = c
  189. result["Measures"].append(c)
  190. elif t in ["Associations"]:
  191. c["Parent"] = current["ID"]
  192. current["Associations"].append(c)
  193. for ds in result["DataSources"]:
  194. for col in ds["Columns"]:
  195. if col["Column"] == c["AssociationReferenced"]:
  196. col["Associations"].append(c)
  197. elif t in ["Signon"]:
  198. result["Signons"].append(c)
  199. elif t in ["Cube"]:
  200. result["Cubes"].append(c)
  201. elif t in ["CustomView"]:
  202. result["CustomViews"].append(c)
  203. elif t in ["CustomViewChildList"]:
  204. for cv in result["CustomViews"]:
  205. if cv["ID"] == c["ID"]:
  206. cv["ChildList"] = c
  207. elif t in ["SecurityNameSpace"]:
  208. result["Security"].append(c)
  209. elif t in ["SecurityObject"]:
  210. result["Security"][-1]["Objects"].append(c)
  211. # else:
  212. # print(t, c)
  213. return result
  214. def build_query(datasource):
  215. table = datasource["Name"]
  216. # suffix = "_fm" if datasource["SourceType"] == "CognosSourceQuery" else "_imr"
  217. # table_name = f"[staging].[{table}{suffix}]"
  218. table_name = f"[export_csv].[{table}]"
  219. view_name = f"[load].[{table}]"
  220. columns = ",\n\t".join([extract_column(c) for c in datasource["Columns"]])
  221. return f"CREATE\n\tOR\n\nALTER VIEW {view_name}\nAS\nSELECT {columns} \nFROM {table_name}\nGO\n\n"
  222. def extract_column(col):
  223. name = col["Name"]
  224. if "]." in name:
  225. name = name.split("].")[-1]
  226. alias = col["Column"]
  227. is_used = "" if len(col["Associations"]) > 0 else "--"
  228. return f"{is_used}[{name}] AS [{alias}]"
  229. def convert_file(filename: str) -> None:
  230. with open(filename, "r", encoding="latin-1") as frh:
  231. mdl_str = frh.read()
  232. mdl_str = prepare_mdl_str(mdl_str)
  233. mdl_blocks = mdl_str.split("\n\n")
  234. converted = [convert_block(b) for b in mdl_blocks]
  235. grouped = group_mdl_blocks(converted)
  236. with open(filename[:-4] + "_ori.json", "w") as fwh:
  237. json.dump(grouped, fwh, indent=2)
  238. # yaml.safe_dump(result, open(filename[:-4] + ".yaml", "w"))
  239. without_ids = remove_ids(grouped)
  240. with open(filename[:-4] + ".json", "w") as fwh:
  241. json.dump(without_ids, fwh, indent=2)
  242. queries = {ds["Name"]: build_query(ds) for ds in grouped["DataSources"]}
  243. with open(filename[:-4] + "_queries.sql", "w", encoding="latin-1") as fwh:
  244. fwh.writelines(queries.values())
  245. base_dir = str(Path(filename).parent.parent / "SQL")
  246. os.makedirs(base_dir, exist_ok=True)
  247. model = Path(filename).stem
  248. for ds, query in queries.items():
  249. with open(f"{base_dir}\\{ds}_{model}.sql", "w", encoding="latin-1") as fwh:
  250. fwh.write(query)
  251. cat_name_to_label = dict(
  252. [
  253. (d["Name"] + "//" + c["Name"], c.get("Label", c.get("SourceValue", "")))
  254. for d in grouped["Dimensions"]
  255. for c in d["Categories"]
  256. ]
  257. )
  258. filename_ids = filename[:-4] + "_ids.json"
  259. if len(grouped["Cubes"]):
  260. cube_name = Path(grouped["Cubes"][0]["MdcFile"]).name
  261. filename_ids = str(Path(filename).parent / cube_name[:-4]) + "_ids.json"
  262. with open(filename_ids, "w") as fwh:
  263. json.dump(cat_name_to_label, fwh, indent=2)
  264. def convert_folder(base_dir: str) -> None:
  265. files = sorted([(f.stat().st_mtime, f) for f in Path(base_dir).rglob("*.mdl")])
  266. for _, filename in files:
  267. convert_file(str(filename))
  268. if __name__ == "__main__":
  269. # convert_file("data/S_Offene_Auftraege.mdl")
  270. # convert_file("data/F_Belege_SKR_SKR_Boettche.mdl")
  271. convert_folder("cognos7/data/mdl/")