mdl_convert.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607
  1. import json
  2. from pathlib import Path
  3. import re
  4. id_lookup = {}
  5. def get_field(field, block, num=1):
  6. if num == 2:
  7. res = re.search(field + r' (\d+) "([^"]*)" ', block)
  8. if res:
  9. return res[1], res[2]
  10. return "0", ""
  11. res = re.search(field + r' "([^"]*)" ', block)
  12. if res:
  13. return res[1]
  14. res = re.search(field + r" (\w*) ", block)
  15. if res:
  16. return res[1]
  17. return ""
  18. def ignore(block):
  19. s = block.split(" ")
  20. return (s[0], block)
  21. def model_name(block):
  22. return {
  23. "Type": "ModelName",
  24. "Name": get_field("Name", block),
  25. "ModelCodePage": get_field("ModelCodePage", block),
  26. "AutoAccess": get_field("AutoAccess", block),
  27. "UpdateCycle": get_field("UpdateCycle", block),
  28. "ModelStamp": get_field("ModelStamp", block),
  29. "Version": get_field("Version", block),
  30. "ModelCategoryOrderDefault": get_field("ModelCategoryOrderDefault", block),
  31. "ModelOrderedByDefault": get_field("ModelOrderedByDefault", block),
  32. "ModelNonRollupHierarchies": get_field("ModelNonRollupHierarchies", block),
  33. }
  34. def cognos_source(block):
  35. id, name = get_field("CognosSource", block, 2)
  36. return {
  37. "Type": "CognosSource",
  38. "ID": id,
  39. "Name": name,
  40. "SourceType": get_field("SourceType", block),
  41. "SourcePath": get_field("SourcePath", block),
  42. "PackageTimeStamp": get_field("PackageTimeStamp", block),
  43. }
  44. def cognos_package_datasource_connection(block):
  45. id, name = get_field("CognosPackageDatasourceConnection", block, 2)
  46. prs_id, prs_name = get_field("PackageReportSource", block, 2)
  47. return {
  48. "Type": "CognosPackageDatasourceConnection",
  49. "ID": id,
  50. "Name": name,
  51. "PackageReportSource": {
  52. "ID": prs_id,
  53. "Name": prs_name,
  54. },
  55. "CognosPackageConnection": get_field("CognosPackageConnection", block),
  56. "CognosPackageConnectionSignon": get_field(
  57. "CognosPackageConnectionSignon", block
  58. ),
  59. "CognosPackageAlwaysUseTransformerSignon": get_field(
  60. "CognosPackageAlwaysUseTransformerSignon", block
  61. ),
  62. "CognosPackagePowercubeSource": get_field(
  63. "CognosPackagePowercubeSource", block
  64. ),
  65. }
  66. def data_source(block):
  67. id, name = get_field("DataSource", block, 2)
  68. prs_id, prs_name = get_field("PackageReportSource", block, 2)
  69. return {
  70. "Type": "DataSource",
  71. "ID": id,
  72. "Name": name,
  73. "Separator": get_field("Separator", block),
  74. "SourceType": get_field("SourceType", block),
  75. "CharacterSet": get_field("CharacterSet", block),
  76. "DecimalSep": get_field("DecimalSep", block),
  77. "Thousandsep": get_field("Thousandsep", block),
  78. "HasColumns": get_field("Columns", block),
  79. "Timing": get_field("Timing", block),
  80. "PackageReportSource": {
  81. "ID": prs_id,
  82. "Name": prs_name,
  83. },
  84. "AutoSummary": get_field("AutoSummary", block),
  85. "SetCurrent": get_field("SetCurrent", block),
  86. "ServerSource": get_field("ServerSource", block),
  87. "Speed": get_field("Speed", block),
  88. "Presummarized": get_field("Presummarized", block),
  89. "StreamExtractSize": get_field("StreamExtractSize", block),
  90. "Columns": [],
  91. }
  92. def org_name(block):
  93. id, name = get_field("OrgName", block, 2)
  94. return {
  95. "Type": "OrgName",
  96. "ID": id,
  97. "Name": name,
  98. "Origin": get_field("Origin", block),
  99. "Offset": get_field("Offset", block),
  100. "Column": get_field("Column", block),
  101. "Storage": get_field("Storage", block),
  102. "Scale": get_field("Scale", block),
  103. "Size": get_field("Size", block),
  104. "Decimals": get_field("Decimals", block),
  105. "Class": get_field("Class", block),
  106. "InputScale": get_field("InputScale", block),
  107. "TimeArray": get_field("TimeArray", block),
  108. "ColSrcType": get_field("ColSrcType", block),
  109. "Associations": [],
  110. }
  111. def dimension(block):
  112. id, name = get_field("Dimension", block, 2)
  113. return {
  114. "Type": "Dimension",
  115. "ID": id,
  116. "Name": name,
  117. "DimType": get_field("DimType", block),
  118. "EarliestDate": get_field("EarliestDate", block),
  119. "LatestDate": get_field("LatestDate", block),
  120. "ManualPeriods": get_field("ManualPeriods", block),
  121. "DaysInWeek": get_field("DaysInWeek", block),
  122. "NewCatsLock": get_field("NewCatsLock", block),
  123. "ExcludeAutoPartitioning": get_field("ExcludeAutoPartitioning", block),
  124. "DimDefaultCategory": get_field("DimDefaultCategory", block),
  125. "Root": {},
  126. "Levels": [],
  127. "Categories": [],
  128. "SpecialCategories": [],
  129. }
  130. def root(block):
  131. id, name = get_field("Root", block, 2)
  132. return {
  133. "Type": "Root",
  134. "ID": id,
  135. "Name": name,
  136. "Inclusion": get_field("Inclusion", block),
  137. "Lastuse": get_field("Lastuse", block),
  138. "Date": get_field("Date", block),
  139. "Filtered": get_field("Filtered", block),
  140. "Suppressed": get_field("Suppressed", block),
  141. "Sign": get_field("Sign", block),
  142. "HideValue": get_field("HideValue", block),
  143. "IsKeyOrphanage": get_field("IsKeyOrphanage", block),
  144. "IsTruncated": get_field("IsTruncated", block),
  145. "Blanks": get_field("Blanks", block),
  146. "Drill": {},
  147. }
  148. def levels(block):
  149. id, name = get_field("Levels", block, 2)
  150. return {
  151. "Type": "Levels",
  152. "ID": id,
  153. "Name": name,
  154. "Blanks": get_field("Blanks", block),
  155. "Inclusion": get_field("Inclusion", block),
  156. "DateFunction": get_field("DateFunction", block),
  157. "Generate": get_field("Generate", block),
  158. "RefreshLabel": get_field("RefreshLabel", block),
  159. "RefreshDescription": get_field("RefreshDescription", block),
  160. "RefreshShortName": get_field("RefreshShortName", block),
  161. "NewCatsLock": get_field("NewCatsLock", block),
  162. "CatLabFormat": get_field("CatLabFormat", block),
  163. "Timerank": get_field("Timerank", block),
  164. "UniqueCategories": get_field("UniqueCategories", block),
  165. "UniqueMove": get_field("UniqueMove", block),
  166. "Associations": [],
  167. }
  168. def category(block):
  169. id, name = get_field("Category", block, 2)
  170. return {
  171. "Type": "Category",
  172. "ID": id,
  173. "Name": name,
  174. "Parent": get_field("Parent", block),
  175. "Levels": get_field("Levels", block),
  176. "OrderBy": get_field("OrderBy", block),
  177. "Value": get_field("Value", block),
  178. "Label": get_field("Label", block),
  179. "Lastuse": get_field("Lastuse", block),
  180. "SourceValue": get_field("SourceValue", block),
  181. "Date": get_field("Date", block),
  182. "Filtered": get_field("Filtered", block),
  183. "Suppressed": get_field("Suppressed", block),
  184. "Sign": get_field("Sign", block),
  185. "HideValue": get_field("HideValue", block),
  186. "IsKeyOrphanage": get_field("IsKeyOrphanage", block),
  187. "IsTruncated": get_field("IsTruncated", block),
  188. "Blanks": get_field("Blanks", block),
  189. }
  190. def special_category(block):
  191. id, name = get_field("SpecialCategory", block, 2)
  192. return {
  193. "Type": "SpecialCategory",
  194. "ID": id,
  195. "Name": name,
  196. "Parent": get_field("Parent", block),
  197. "Levels": get_field("Levels", block),
  198. "Lastuse": get_field("Lastuse", block),
  199. "Rollup": get_field("Rollup", block),
  200. "TimeAggregate": get_field("TimeAggregate", block),
  201. "RunningPeriods": get_field("RunningPeriods", block),
  202. "TargetOffset": get_field("TargetOffset", block),
  203. "TargetLevel": get_field("TargetLevel", block),
  204. "ContextOffset": get_field("ContextOffset", block),
  205. "DateDrill": get_field("DateDrill", block),
  206. "Primary": get_field("Primary", block),
  207. "Sign": get_field("Sign", block),
  208. }
  209. def map_drills(block):
  210. return {
  211. "Type": "MapDrills",
  212. # "MapDrills MapDrill 1469 "
  213. }
  214. def view_name(block):
  215. id, name = get_field("ViewName", block, 2)
  216. return {
  217. "Type": "ViewName",
  218. "ID": id,
  219. "Name": name,
  220. "ViewType": get_field("Type", block),
  221. "ViewCustomView": get_field("ViewCustomView", block),
  222. }
  223. def associations(block):
  224. id, name = get_field("Associations", block, 2)
  225. return {
  226. "Type": "Associations",
  227. "ID": id,
  228. "Name": name,
  229. "AssociationType": get_field("AssociationType", block),
  230. "AssociationRole": get_field("AssociationRole", block),
  231. "AssociationReferenced": get_field("AssociationReferenced", block),
  232. "SortOrder": get_field("SortOrder", block),
  233. "SortAs": get_field("SortAs", block),
  234. "Parent": "0",
  235. }
  236. def drill(block):
  237. id, name = get_field("Drill", block, 2)
  238. return {
  239. "Type": "Drill",
  240. "ID": id,
  241. "Name": name,
  242. "Label": get_field("Label", block),
  243. "Inclusion": get_field("Inclusion", block),
  244. "Filtered": get_field("Filtered", block),
  245. "Suppressed": get_field("Suppressed", block),
  246. "PrimaryDrill": get_field("PrimaryDrill", block),
  247. "HideValue": get_field("HideValue", block),
  248. "YearBegins": get_field("YearBegins", block),
  249. "PartialWeek": get_field("PartialWeek", block),
  250. "ExtraWeek": get_field("ExtraWeek", block),
  251. "WeekBegins": get_field("WeekBegins", block),
  252. }
  253. def measure(block):
  254. id, name = get_field("Measure", block, 2)
  255. return {
  256. "Type": "Measure",
  257. "ID": id,
  258. "Name": name,
  259. "Missing": get_field("Missing", block),
  260. "IgnoreMissingValue": get_field("IgnoreMissingValue", block),
  261. "Storage": get_field("Storage", block),
  262. "OutPutScale": get_field("OutPutScale", block),
  263. "Decimals": get_field("Decimals", block),
  264. "ReverseSign": get_field("ReverseSign", block),
  265. "IsCurrency": get_field("IsCurrency", block),
  266. "IsFolder": get_field("IsFolder", block),
  267. "Format": get_field("Format", block),
  268. "DrillThrough": get_field("DrillThrough", block),
  269. "Associations": [],
  270. }
  271. def signon(block):
  272. id, name = get_field("Signon", block, 2)
  273. return {
  274. "Type": "Signon",
  275. "ID": id,
  276. "Name": name,
  277. "UserId": get_field("UserId", block),
  278. "PromptForPassword": get_field("PromptForPassword", block),
  279. "EncryptedPW": get_field("EncryptedPW", block),
  280. "AutoLogon": get_field("AutoLogon", block),
  281. "SignonType": get_field("SignonType", block),
  282. }
  283. def dimension_view(block):
  284. id, name = get_field("DimensionView", block, 2)
  285. return {
  286. "Type": "DimensionView",
  287. "ID": id,
  288. "Name": name
  289. # "DimensionView 1463 \"All Categories\" DimensionView 1521 \"All Categories\" DimensionView 1551 \"All Categories\"
  290. # DimensionView 1575 \"All Categories\" DimensionView 1591 \"All Categories\" DimensionView 1651 \"All Categories\"
  291. # DimensionView 1665 \"All Categories\" DimensionView 1693 \"All Categories\" DimensionView 15741 \"All Categories\"
  292. # MeasureInclude 9829 Yes MeasureInclude 10053 Yes MeasureInclude 10309 Yes MeasureInclude 10313 Yes
  293. # MeasureInclude 10317 Yes MeasureInclude 15761 Yes "
  294. }
  295. def allocation_add(block):
  296. # only in DimensionView
  297. return {
  298. "Type": "AllocationAdd",
  299. # "AllocationAdd Measure 9829 Type Default AllocationAdd Measure 10053 Type Default AllocationAdd Measure 10309 Type Default
  300. # AllocationAdd Measure 10313 Type Default AllocationAdd Measure 10317 Type Default AllocationAdd Measure 15761 Type Default "
  301. }
  302. def cube(block):
  303. id, name = get_field("Cube", block, 2)
  304. return {
  305. "Type": "Cube",
  306. "ID": id,
  307. "Name": name,
  308. "MdcFile": get_field("MdcFile", block),
  309. "EncryptedPW": get_field("EncryptedPW", block),
  310. "Status": get_field("Status", block),
  311. "CubeCreation": get_field("CubeCreation", block),
  312. "Optimize": get_field("Optimize", block),
  313. "ConsolidatedRecords": get_field("ConsolidatedRecords", block),
  314. "PartitionSize": get_field("PartitionSize", block),
  315. "PassesNumber": get_field("PassesNumber", block),
  316. "Compress": get_field("Compress", block),
  317. "IncrementalUpdate": get_field("IncrementalUpdate", block),
  318. "ServerCube": get_field("ServerCube", block),
  319. "CubeStamp": get_field("CubeStamp", block),
  320. "CubeCycle": get_field("CubeCycle", block),
  321. "BlockParentTotals": get_field("BlockParentTotals", block),
  322. "Caching": get_field("Caching", block),
  323. "UseAlternateFileName": get_field("UseAlternateFileName", block),
  324. "DeployType": get_field("DeployType", block),
  325. "DeployLocations": get_field("DeployLocations", block),
  326. "DeployToAvailableLocationsAutomatic": get_field(
  327. "DeployToAvailableLocationsAutomatic", block
  328. ),
  329. "DeployCleanupEnabled": get_field("DeployCleanupEnabled", block),
  330. "DeployCleanupNumberOfCubes": get_field("DeployCleanupNumberOfCubes", block),
  331. "DrillThrough": get_field("DrillThrough", block),
  332. "DataSourceSignon": get_field("DataSourceSignon", block),
  333. "PublishEnable": get_field("PublishEnable", block),
  334. "PublishStatus": get_field("PublishStatus", block),
  335. "PublishAllowNullSuppression": get_field("PublishAllowNullSuppression", block),
  336. "PublishAllowMultiEdgeSuppression": get_field(
  337. "PublishAllowMultiEdgeSuppression", block
  338. ),
  339. "PublishAllowAccessToSuppressionOptions": get_field(
  340. "PublishAllowAccessToSuppressionOptions", block
  341. ),
  342. }
  343. def custom_view(block):
  344. id, name = get_field("CustomView", block, 2)
  345. return {
  346. "Type": "CustomView",
  347. "ID": id,
  348. "Name": name,
  349. "DimensionView": [
  350. {"ID": b[0], "Name": b[1]}
  351. for b in re.findall(r'DimensionView (\d+) "([^"]+)"', block)
  352. ],
  353. "MeasureInclude": [
  354. {"ID": b[0], "Name": b[1]}
  355. for b in re.findall(r"MeasureInclude (\d+) (\w+) ", block)
  356. ],
  357. "ChildList": {},
  358. }
  359. def custom_view_child_list(block):
  360. id, name = get_field("CustomViewChildList", block, 2)
  361. return {
  362. "Type": "CustomViewChildList",
  363. "ID": id,
  364. "Name": name,
  365. }
  366. def security_namespace(block):
  367. id, name = get_field("SecurityNameSpace", block, 2)
  368. return {
  369. "Type": "SecurityNameSpace",
  370. "ID": id,
  371. "Name": name,
  372. "SecurityNameSpaceCAMID": get_field("SecurityNameSpaceCAMID", block),
  373. "Objects": [],
  374. }
  375. def security_object(block):
  376. id, name = get_field("SecurityObject", block, 2)
  377. return {
  378. "Type": "SecurityObject",
  379. "ID": id,
  380. "Name": name,
  381. "SecurityObjectDisplayName": get_field("SecurityObjectDisplayName", block),
  382. "SecurityObjectType": get_field("SecurityObjectType", block),
  383. "CustomViewList": get_field("CustomViewList", block),
  384. }
  385. CONVERSION = {
  386. "Name": model_name,
  387. "CognosSource": cognos_source,
  388. "CognosPackageDatasourceConnection": cognos_package_datasource_connection,
  389. "DataSource": data_source,
  390. "OrgName": org_name,
  391. "Dimension": dimension,
  392. "Root": root,
  393. "Drill": drill,
  394. "Levels": levels,
  395. "Associations": associations,
  396. "Category": category,
  397. "SpecialCategory": special_category,
  398. "MapDrills": map_drills,
  399. "ViewName": view_name,
  400. "Measure": measure,
  401. "Signon": signon,
  402. "Cube": cube,
  403. "CustomView": custom_view,
  404. "CustomViewChildList": custom_view_child_list,
  405. "SecurityNameSpace": security_namespace,
  406. "SecurityObject": security_object,
  407. }
  408. def convert_block(block):
  409. block = block.replace("\n", "")
  410. block_type = block.split(" ")[0]
  411. # block_pair = re.findall(r'("[^"]+"|\w+) ', block)
  412. # return (block_type, list(zip((block_pair[::2], block_pair[1::2]))))
  413. if block_type in CONVERSION:
  414. return CONVERSION[block_type](block)
  415. return {"Type": block_type}
  416. def remove_ids(nested):
  417. nested.pop("ID", "")
  418. nested.pop("DateDrill", "")
  419. nested.pop("Primary", "")
  420. nested.pop("Lastuse", "")
  421. for col in ["Parent", "Levels", "CustomViewList"]:
  422. if col not in nested:
  423. continue
  424. if col == "Levels" and (
  425. isinstance(nested["Levels"], list) or nested["Levels"] == "0"
  426. ):
  427. continue
  428. nested[col] = id_lookup.get(nested[col], {}).get("Name", "undefined")
  429. for child in nested.values():
  430. if isinstance(child, dict):
  431. remove_ids(child)
  432. if isinstance(child, list):
  433. for entry in child:
  434. remove_ids(entry)
  435. return nested
  436. def convert_file(filename):
  437. with open(filename, "r", encoding="latin-1") as frh:
  438. mdl_str = frh.read()
  439. mdl_str = re.sub(r"\n+", "\n", mdl_str)
  440. mdl_str = re.sub(r'\nLevels (\d+ [^"])', r"Levels \1", mdl_str)
  441. mdl_str = re.sub(r" Associations ", " \nAssociations ", mdl_str)
  442. mdl_str = re.sub(r'([^ ])""', r"\1'", mdl_str)
  443. mdl_str = re.sub(r'""([^ ])', r"'\1", mdl_str)
  444. tags = "|".join(list(CONVERSION.keys()))
  445. mdl_str = re.sub(r"\n(" + tags + r") ", r"\n\n\1 ", mdl_str)
  446. mdl_blocks = mdl_str.split("\n\n")
  447. converted = [convert_block(b) for b in mdl_blocks]
  448. result = {
  449. "Model": {},
  450. "Connections": [],
  451. "DataSources": [],
  452. "Dimensions": [],
  453. "Measures": [],
  454. "Signons": [],
  455. "CustomViews": [],
  456. "Security": [],
  457. "Cubes": [],
  458. }
  459. types = [c["Type"] for c in converted]
  460. ids = [c.get("ID", "0") for c in converted]
  461. id_lookup.update(dict(zip(ids, converted)))
  462. current = None
  463. level_ids = []
  464. for c, t in zip(converted, types):
  465. if t in [""]:
  466. continue
  467. if (
  468. t in ["Category", "SpecialCategory"]
  469. and result["Dimensions"][-1]["Name"] == "Zeit"
  470. ):
  471. if t == "Category" or c["Name"][0].isnumeric():
  472. continue
  473. if t in ["ModelName"]:
  474. result["Model"] = c
  475. elif t in ["CognosSource", "CognosPackageDatasourceConnection"]:
  476. result["Connections"].append(c)
  477. elif t in ["DataSource"]:
  478. result["DataSources"].append(c)
  479. elif t in ["OrgName"]:
  480. result["DataSources"][-1]["Columns"].append(c)
  481. elif t in ["Dimension"]:
  482. level_ids = []
  483. result["Dimensions"].append(c)
  484. elif t in ["Root"]:
  485. result["Dimensions"][-1]["Root"] = c
  486. elif t in ["Drill"]:
  487. result["Dimensions"][-1]["Root"]["Drill"] = c
  488. elif t in ["Levels"]:
  489. current = c
  490. level_ids.append(c["ID"])
  491. result["Dimensions"][-1]["Levels"].append(c)
  492. elif t in ["Category"]:
  493. if c["Levels"] in level_ids[0:2]:
  494. result["Dimensions"][-1]["Categories"].append(c)
  495. elif t in ["SpecialCategory"]:
  496. result["Dimensions"][-1]["SpecialCategories"].append(c)
  497. elif t in ["Measure"]:
  498. current = c
  499. result["Measures"].append(c)
  500. elif t in ["Associations"]:
  501. c["Parent"] = current["ID"]
  502. current["Associations"].append(c)
  503. for ds in result["DataSources"]:
  504. for col in ds["Columns"]:
  505. if col["Column"] == c["AssociationReferenced"]:
  506. col["Associations"].append(c)
  507. elif t in ["Signon"]:
  508. result["Signons"].append(c)
  509. elif t in ["Cube"]:
  510. result["Cubes"].append(c)
  511. elif t in ["CustomView"]:
  512. result["CustomViews"].append(c)
  513. elif t in ["CustomViewChildList"]:
  514. for cv in result["CustomViews"]:
  515. if cv["ID"] == c["ID"]:
  516. cv["ChildList"] = c
  517. elif t in ["SecurityNameSpace"]:
  518. result["Security"].append(c)
  519. elif t in ["SecurityObject"]:
  520. result["Security"][-1]["Objects"].append(c)
  521. # else:
  522. # print(t, c)
  523. json.dump(result, open(filename[:-4] + "_ori.json", "w"), indent=2)
  524. # yaml.safe_dump(result, open(filename[:-4] + ".yaml", "w"))
  525. result = remove_ids(result)
  526. json.dump(result, open(filename[:-4] + ".json", "w"), indent=2)
  527. def convert_folder(base_dir):
  528. files = sorted([(f.stat().st_mtime, f) for f in Path(base_dir).rglob("*.mdl")])
  529. for timestamp, filename in files:
  530. convert_file(str(filename))
  531. if __name__ == "__main__":
  532. # convert_file("data/S_Offene_Auftraege.mdl")
  533. # convert_file("data/F_Belege_SKR_SKR_Boettche.mdl")
  534. convert_folder("data/")