mdl_convert.py 9.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301
  1. import json
  2. from pathlib import Path
  3. import re
  4. CONVERSION = [
  5. "Name",
  6. "CognosSource",
  7. "CognosPackageDatasourceConnection",
  8. "DataSource",
  9. "OrgName",
  10. "Dimension",
  11. "Root",
  12. "Drill",
  13. "Levels",
  14. "Associations",
  15. "Category",
  16. "SpecialCategory",
  17. "MapDrills",
  18. "ViewName",
  19. "Measure",
  20. "Signon",
  21. "Cube",
  22. "CustomView",
  23. "CustomViewChildList",
  24. "SecurityNameSpace",
  25. "SecurityObject",
  26. ]
  27. id_lookup = {}
  28. find_words = re.compile(r'("[^"]+"|\w+) ')
  29. def convert_block(block):
  30. block = block.replace("\n", "")
  31. block_type = block.split(" ")[0]
  32. words = find_words.findall(block)
  33. if len(words) < 3:
  34. return {"Type": block_type}
  35. result = {"Type": words[0], "ID": words[1], "Name": words[2].strip('"')}
  36. offset = 0
  37. for i in range(3, len(words), 2):
  38. if len(words) < i + offset + 2:
  39. break
  40. key = words[i + offset]
  41. if key in ["PackageReportSource", "Database"]:
  42. result[key] = {
  43. "ID": words[i + offset + 1],
  44. "Name": words[i + offset + 2].strip('"'),
  45. }
  46. offset += 1
  47. elif key in ["DimensionView"]:
  48. if key + "s" not in result:
  49. result[key + "s"] = []
  50. result[key + "s"].append(
  51. {"ID": words[i + offset + 1], "Name": words[i + offset + 2].strip('"')}
  52. )
  53. offset += 1
  54. elif key in ["MeasureInclude"]:
  55. if key + "s" not in result:
  56. result[key + "s"] = []
  57. result[key + "s"].append(
  58. {"ID": words[i + offset + 1], "Include": words[i + offset + 2]}
  59. )
  60. offset += 1
  61. elif key == "Calc":
  62. for j in range(i + offset + 1, len(words)):
  63. if words[j] in ["Sign", "Format", "Filtered"] or j == len(words) - 1:
  64. result["Calc"] = " ".join(words[i + offset + 1 : j])
  65. offset = j - i - 1
  66. break # for
  67. elif key == "EncryptedPW":
  68. result["EncryptedPW"] = words[i + offset + 1].strip('"')
  69. result["Salt"] = words[i + offset + 2].strip('"')
  70. offset += 1
  71. elif key == "AllocationAdd":
  72. if key + "s" not in result:
  73. result[key + "s"] = []
  74. result[key + "s"].append(
  75. {"Measure": words[i + offset + 2], "Type": words[i + offset + 4]}
  76. )
  77. offset += 3
  78. elif key in [
  79. "CustomViewList",
  80. "DrillThrough",
  81. "DeployLocations",
  82. "PowerCubeCustomViewList",
  83. "StartList",
  84. "TransientLevelList",
  85. ]:
  86. for j in range(i + offset + 1, len(words)):
  87. if words[j] in ["EndList"]:
  88. result[key] = " ".join(words[i + offset + 1 : j])
  89. offset = j - i - 1
  90. break # for
  91. # elif words[i + offset].isnumeric() or words[i + offset].startswith('"'):
  92. # offset += 1
  93. else:
  94. result[key] = words[i + offset + 1].strip('"')
  95. if block_type == "DataSource":
  96. result["Columns"] = []
  97. if block_type in ["OrgName", "Levels", "Measure"]:
  98. result["Associations"] = []
  99. if block_type == "Dimension":
  100. result["Root"] = {}
  101. result["Levels"] = []
  102. result["Categories"] = []
  103. result["SpecialCategories"] = []
  104. if block_type == "Root":
  105. result["Drill"] = {}
  106. if block_type == "Associations":
  107. result["Parent"] = 0
  108. if block_type == "CustomView":
  109. result["ChildList"] = {}
  110. if block_type == "SecurityNameSpace":
  111. result["Objects"] = []
  112. return result
  113. def remove_ids(nested):
  114. nested.pop("ID", "")
  115. nested.pop("DateDrill", "")
  116. nested.pop("Primary", "")
  117. nested.pop("Lastuse", "")
  118. nested.pop("AssociationContext", "")
  119. if (
  120. nested.get("Type", "") == "SpecialCategory"
  121. and "Label" in nested
  122. and "20" in nested["Label"]
  123. ):
  124. nested.pop("Label", "")
  125. for col in ["Parent", "Levels", "CustomViewList"]:
  126. if col not in nested:
  127. continue
  128. if col == "Levels" and (
  129. isinstance(nested["Levels"], list) or nested["Levels"] == "0"
  130. ):
  131. continue
  132. nested[col] = id_lookup.get(nested[col], {}).get("Name", "undefined")
  133. for child in nested.values():
  134. if isinstance(child, dict):
  135. remove_ids(child)
  136. if isinstance(child, list):
  137. for entry in child:
  138. remove_ids(entry)
  139. return nested
  140. def prepare_mdl_str(mdl_str):
  141. mdl_str = re.sub(r"\n+", "\n", mdl_str)
  142. mdl_str = re.sub(r"^\n?Name ", "ModelName 1 ", mdl_str)
  143. mdl_str = re.sub(r'\nLevels (\d+ [^"])', r"Levels \1", mdl_str)
  144. mdl_str = re.sub(r" Associations ", " \nAssociations ", mdl_str)
  145. mdl_str = re.sub(r'([^ ])""', r"\1'", mdl_str)
  146. mdl_str = re.sub(r'""([^ ])', r"'\1", mdl_str)
  147. tags = "|".join(CONVERSION)
  148. mdl_str = re.sub(r"\n(" + tags + r") ", r"\n\n\1 ", mdl_str)
  149. return mdl_str
  150. def group_mdl_blocks(converted):
  151. result = {
  152. "Model": {},
  153. "Connections": [],
  154. "DataSources": [],
  155. "Dimensions": [],
  156. "Measures": [],
  157. "Signons": [],
  158. "CustomViews": [],
  159. "Security": [],
  160. "Cubes": [],
  161. }
  162. types = [c["Type"] for c in converted]
  163. ids = [c.get("ID", "0") for c in converted]
  164. id_lookup.update(dict(zip(ids, converted)))
  165. current = None
  166. level_ids = []
  167. for c, t in zip(converted, types):
  168. if t in [""]:
  169. continue
  170. if (
  171. t in ["Category", "SpecialCategory"]
  172. and result["Dimensions"][-1]["Name"] == "Zeit"
  173. ):
  174. if t == "Category" or c["Name"][0].isnumeric():
  175. continue
  176. if t in ["ModelName"]:
  177. result["Model"] = c
  178. elif t in ["CognosSource", "CognosPackageDatasourceConnection"]:
  179. result["Connections"].append(c)
  180. elif t in ["DataSource"]:
  181. result["DataSources"].append(c)
  182. elif t in ["OrgName"]:
  183. result["DataSources"][-1]["Columns"].append(c)
  184. elif t in ["Dimension"]:
  185. level_ids = []
  186. result["Dimensions"].append(c)
  187. elif t in ["Root"]:
  188. result["Dimensions"][-1]["Root"] = c
  189. elif t in ["Drill"]:
  190. result["Dimensions"][-1]["Root"]["Drill"] = c
  191. elif t in ["Levels"]:
  192. current = c
  193. level_ids.append(c["ID"])
  194. result["Dimensions"][-1]["Levels"].append(c)
  195. elif t in ["Category"]:
  196. if c["Levels"] in level_ids[0:2]:
  197. result["Dimensions"][-1]["Categories"].append(c)
  198. elif t in ["SpecialCategory"]:
  199. result["Dimensions"][-1]["SpecialCategories"].append(c)
  200. elif t in ["Measure"]:
  201. current = c
  202. result["Measures"].append(c)
  203. elif t in ["Associations"]:
  204. c["Parent"] = current["ID"]
  205. current["Associations"].append(c)
  206. for ds in result["DataSources"]:
  207. for col in ds["Columns"]:
  208. if col["Column"] == c["AssociationReferenced"]:
  209. col["Associations"].append(c)
  210. elif t in ["Signon"]:
  211. result["Signons"].append(c)
  212. elif t in ["Cube"]:
  213. result["Cubes"].append(c)
  214. elif t in ["CustomView"]:
  215. result["CustomViews"].append(c)
  216. elif t in ["CustomViewChildList"]:
  217. for cv in result["CustomViews"]:
  218. if cv["ID"] == c["ID"]:
  219. cv["ChildList"] = c
  220. elif t in ["SecurityNameSpace"]:
  221. result["Security"].append(c)
  222. elif t in ["SecurityObject"]:
  223. result["Security"][-1]["Objects"].append(c)
  224. # else:
  225. # print(t, c)
  226. return result
  227. def build_query(datasource):
  228. table = datasource["Name"]
  229. suffix = "_fm" if datasource["SourceType"] == "CognosSourceQuery" else "_imr"
  230. table_name = f"[staging].[{table}{suffix}]"
  231. view_name = f"[load].[{table}]"
  232. columns = ",\n ".join([extract_column(c) for c in datasource["Columns"]])
  233. return f"CREATE VIEW {view_name}\nAS\nSELECT {columns} \nFROM {table_name}\nGO\n\n"
  234. def extract_column(col):
  235. name = col["Name"]
  236. if "]." in name:
  237. name = name.split("].")[-1]
  238. alias = col["Column"]
  239. is_used = "" if len(col["Associations"]) > 0 else "--"
  240. return f'{is_used}{name} AS "{alias}"'
  241. def convert_file(filename):
  242. with open(filename, "r", encoding="latin-1") as frh:
  243. mdl_str = frh.read()
  244. mdl_str = prepare_mdl_str(mdl_str)
  245. mdl_blocks = mdl_str.split("\n\n")
  246. converted = [convert_block(b) for b in mdl_blocks]
  247. grouped = group_mdl_blocks(converted)
  248. with open(filename[:-4] + "_ori.json", "w") as fwh:
  249. json.dump(grouped, fwh, indent=2)
  250. # yaml.safe_dump(result, open(filename[:-4] + ".yaml", "w"))
  251. without_ids = remove_ids(grouped)
  252. with open(filename[:-4] + ".json", "w") as fwh:
  253. json.dump(without_ids, fwh, indent=2)
  254. queries = [build_query(ds) for ds in grouped["DataSources"]]
  255. with open(filename[:-4] + "_queries.txt", "w", encoding="latin-1") as fwh:
  256. fwh.writelines(queries)
  257. def convert_folder(base_dir):
  258. files = sorted([(f.stat().st_mtime, f) for f in Path(base_dir).rglob("*.mdl")])
  259. for timestamp, filename in files:
  260. convert_file(str(filename))
  261. if __name__ == "__main__":
  262. # convert_file("data/S_Offene_Auftraege.mdl")
  263. # convert_file("data/F_Belege_SKR_SKR_Boettche.mdl")
  264. convert_folder("data/")