inspect_files.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191
  1. # from itertools import chain
  2. import json
  3. from pathlib import Path
  4. import pandas as pd
  5. from bs4 import BeautifulSoup
  6. from collections import defaultdict
  7. import re
  8. import sys
  9. from config import Config
  10. def get_path_info(base_dir):
  11. path_info_df = pd.read_csv(base_dir + '/logs/status/path_info.csv', sep=';',
  12. encoding='latin-1', converters={'process': str})
  13. path_info_df.rename(columns={'name': 'filename'}, inplace=True)
  14. path_info_df['filename'] = path_info_df['filename'].str.lower()
  15. path_info_df.set_index('filename', inplace=True, drop=False)
  16. return path_info_df.to_dict(orient='index')
  17. def get_cubes(base_dir, cfg: Config, path_info, cubes_models):
  18. ver_files = [k for k in path_info.keys() if re.search(r'\\cubes\\.*\.ver', k)]
  19. cubes = {}
  20. for file in ver_files:
  21. match = re.search(r'\\cubes\\(.*)__\d+\.ver$', file)
  22. cube_name = match[1]
  23. cube_subdir = f"{file[:-4]}\\{cube_name}.mdc"
  24. cube_out = f"{cfg.system_dir}\\cube_out\\{cube_name}.mdc"
  25. cubes[cube_name] = {
  26. 'deployed_mdc': path_info[cube_subdir],
  27. 'cube_out_mdc': path_info[cube_out],
  28. 'model': cubes_models[cube_name],
  29. 'errors': []
  30. }
  31. return cubes
  32. def get_models(base_dir, cfg: Config, path_info, fm_sources):
  33. models = {}
  34. for file in Path(base_dir + '\\config\\models').glob('*.log'):
  35. with open(file, 'r') as frh:
  36. model_infos = frh.read().lower().replace('"', '').replace(',', '').split('--')
  37. datasources = model_infos[1].split('\n')
  38. datasources = [d for d in datasources if d != '']
  39. fm_src = []
  40. fm_package = None
  41. if '[' in datasources[0]:
  42. fm_package = datasources.pop(0).upper()
  43. fm_src = datasources
  44. datasources = []
  45. for src in fm_src:
  46. if src in fm_sources:
  47. datasources.extend(fm_sources[src])
  48. datasources = sorted(set(datasources))
  49. cube = re.search(r'\\cube_out\\(.*)\.mdc', model_infos[0])[1]
  50. models[file.name[:-8].lower()] = {
  51. 'framework_manager': 'J' if fm_package else 'N',
  52. 'fm_package': fm_package,
  53. 'fm_sources': fm_src,
  54. 'datasources': datasources,
  55. 'cube': cube,
  56. 'logfile_tasks': path_info.get(cfg.portal_dir + '\\tasks\\logs\\' + file.name[:-8] + '.log', {'mtime': '0'}),
  57. 'logfile_system': path_info.get(cfg.system_dir + '\\logs\\' + file.name[:-8] + '.log', {'mtime': '0'}),
  58. 'model_file': path_info.get(cfg.system_dir + '\\models\\' + file.name[:-4], {'mtime': '0'}),
  59. 'model_file_filled': path_info.get(cfg.system_dir + '\\models\\gefuellt\\' + file.name[:-4], {'mtime': '0'}),
  60. }
  61. return models
  62. def get_database_info(base_dir, cfg: Config):
  63. db_info_df = pd.read_csv(base_dir + '/logs/status/db_info.csv', sep=';',
  64. encoding='latin-1')
  65. db_info_df = db_info_df[db_info_df['DatabaseName'] == 'GC']
  66. db_info_df['table'] = db_info_df['TableName'].str.lower()
  67. db_info_df.set_index('table', inplace=True)
  68. return db_info_df.to_dict(orient='index')
  69. def get_fm_sources(base_dir, cfg):
  70. bs = BeautifulSoup(open(base_dir + '\\config\\fm\\model.xml', 'r'), 'xml')
  71. sources = defaultdict(list)
  72. for item in bs.find_all('queryItem'):
  73. p = item.parent.parent.find('name').string
  74. if item.parent.parent.name == 'folder':
  75. p = item.parent.parent.parent.find('name').string
  76. parent = "[{0}].[{1}]".format(
  77. p,
  78. item.parent.find('name').string
  79. )
  80. src = ''
  81. exp = ''
  82. if item.expression:
  83. if item.expression.refobj:
  84. src = item.expression.refobj.string
  85. else:
  86. exp = item.expression.string
  87. elif item.externalName:
  88. exp = item.externalName.string
  89. sources[parent].append((item.find('name').string, src, exp))
  90. interface = {}
  91. for k, fields in sources.items():
  92. if '[Schnittstelle]' not in k:
  93. continue
  94. key = k.split('.')[-1][1:-1].lower()
  95. links = []
  96. for field in fields:
  97. links.append(follow_links(sources, field, ''))
  98. interface[key] = sorted(list(set([re.search(r'\.\[(.*)\]$', e)[1].lower() for e in links if '[Import]' in e])))
  99. return interface
  100. def follow_links(sources, field, value):
  101. if field[1] == '':
  102. if field[2] == field[0]:
  103. return value
  104. return field[2]
  105. match = re.search(r'(\[.*\]\.\[.*\])\.\[(.*)\]', field[1])
  106. key1 = match[1]
  107. val1 = match[2]
  108. if key1 in sources:
  109. for field2 in sources[key1]:
  110. if field2[0] != val1:
  111. continue
  112. return follow_links(sources, field2, key1)
  113. return key1
  114. def get_datasources(base_dir, cfg, path_info):
  115. all_datasources = set([re.search(r'\\iqd\\.*\\(.*)\.imr', k)[1]
  116. for k in path_info.keys()
  117. if re.search(r'\\iqd\\.*\\.*\.imr', k)])
  118. datasources = {}
  119. for ds in all_datasources:
  120. ds_search = f'\\{ds}.imr'
  121. imr_files = [k for k in path_info.keys()
  122. if re.search(r'\\iqd\\.*\.imr', k) and ds_search in k
  123. and 'austausch' not in k]
  124. if len(imr_files) == 0:
  125. imr_file = '0.imr'
  126. else:
  127. imr_file = imr_files.pop(0)
  128. datasources[ds] = {
  129. 'imr_file': path_info.get(imr_file, {'mtime': '0'}),
  130. 'iqd_file': path_info.get(imr_file[:-4] + '.iqd', {'mtime': '0'}),
  131. 'csv_file': path_info.get(cfg.system_dir + '\\export\\' + ds + '.csv', {'mtime': '0'}),
  132. 'csv_file_iqd_folder': path_info.get(imr_file[:-4] + '.csv', {'mtime': '0'}),
  133. 'duplicates': imr_files
  134. }
  135. return datasources
  136. def cubes_to_models(models):
  137. models_sort = sorted([(v.get('logfile_tasks')['mtime'],
  138. v.get('logfile_system')['mtime'],
  139. v.get('model_file_filled')['mtime'],
  140. v.get('model_file')['mtime'],
  141. k, v['cube']) for k, v in models.items()])
  142. result = {}
  143. for m in models_sort:
  144. result[m[5]] = m[4]
  145. return result
  146. def main():
  147. base_dir = 'app/temp/unzipped/loeffler-c11_2022-07-08_112547'
  148. cfg = Config(str(Path(base_dir + '\\gaps.ini').absolute()))
  149. # Dateiliste
  150. path_info = get_path_info(base_dir)
  151. # random_bat_file = [k for k in path_info.keys() if re.search(r'\\Tasks\\.*\.bat', k)][0]
  152. # portal_dir = re.search(r'(.*)\\Tasks\\.*\.bat', random_bat_file)[1]
  153. # print(path_info)
  154. # Liste aller Cubes
  155. result = {}
  156. # Modelle und Datenquellen
  157. result['fm_sources'] = get_fm_sources(base_dir, cfg)
  158. result['models'] = get_models(base_dir, cfg, path_info, result['fm_sources'])
  159. result['database'] = get_database_info(base_dir, cfg)
  160. result['datasources'] = get_datasources(base_dir, cfg, path_info)
  161. cubes_models = cubes_to_models(result['models'])
  162. result['cubes'] = get_cubes(base_dir, cfg, path_info, cubes_models)
  163. # Cubes aktuell?
  164. # Rest aktuell?
  165. json.dump(result, open('app/temp/logs/export.json', 'w'), indent=2)
  166. if __name__ == '__main__':
  167. main()