db_create.py 8.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import plac
  2. import pandas as pd
  3. from sqlalchemy import create_engine, inspect
  4. from sqlalchemy.types import VARCHAR
  5. from database import bcp_conn_params, conn_string
  6. import json
  7. from pathlib import Path
  8. from collections import namedtuple
  9. DbCreateConfig = namedtuple('DbCreateConfig', 'name csv_file clients filter source_dsn target_dsn stage_dir batch_dir')
  10. cfg = DbCreateConfig(**{
  11. 'name': 'CARLO',
  12. 'csv_file': 'CARLO.csv',
  13. 'clients': {'1': 'M und S Fahrzeughandel GmbH'},
  14. 'filter': ['01.01.2018', '01.01.2019'],
  15. 'source_dsn': {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'DE0017', 'driver': 'mssql', 'schema': 'dbo'},
  16. 'target_dsn': {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'CARLO2', 'driver': 'mssql', 'schema': 'import'},
  17. 'stage_dir': '..\\temp',
  18. 'batch_dir': '..\\batch'
  19. })
  20. @plac.pos('config_file', '', type=str)
  21. def create(config_file='dbtools\\OPTIMA.json'):
  22. cfg_import = json.load(open(config_file, 'r', encoding='ansi'))
  23. base_dir = Path(config_file).resolve().parent
  24. cfg_import['name'] = Path(config_file).stem
  25. if cfg_import['stage_dir'][:2] == '..':
  26. cfg_import['stage_dir'] = str(base_dir.joinpath(cfg_import['stage_dir']).resolve())
  27. if cfg_import['batch_dir'][:2] == '..':
  28. cfg_import['batch_dir'] = str(base_dir.joinpath(cfg_import['batch_dir']).resolve())
  29. cfg = DbCreateConfig(**cfg_import)
  30. df = pd.read_csv(str(base_dir) + "\\" + cfg.csv_file, sep=';', encoding='ansi')
  31. config = df[df['target'].notnull()]
  32. print(config.head())
  33. source_db = create_engine(conn_string(cfg.source_dsn))
  34. source_insp = inspect(source_db)
  35. source_tables = source_insp.get_table_names(schema=cfg.source_dsn['schema']) + source_insp.get_view_names(schema=cfg.source_dsn['schema'])
  36. source_tables_prefix = dict(enumerate(sorted(list(set([t.split('$')[0] for t in source_tables if '$' in t]))), 1))
  37. if len(source_tables_prefix) > 0:
  38. print(source_tables_prefix)
  39. else:
  40. q = source_db.execute('select name FROM sys.databases')
  41. print([x[0] for x in q.fetchall()])
  42. target_db = create_engine(conn_string(cfg.target_dsn))
  43. target_insp = inspect(target_db)
  44. target_tables = target_insp.get_table_names(schema=cfg.target_dsn['schema'])
  45. for index, current_table in config.iterrows():
  46. with open(cfg.batch_dir + "\\" + current_table['target'] + '.bat', 'w', encoding='cp850') as f:
  47. f.write('@echo off \n')
  48. f.write('rem ==' + current_table['target'] + '==\n')
  49. if not current_table['target'] in target_tables:
  50. f.write(f"echo Ziel-Tabelle '{current_table['target']}' existiert nicht!\n")
  51. print(f"Ziel-Tabelle '{current_table['target']}' existiert nicht!")
  52. continue
  53. f.write(f"del {cfg.stage_dir}\\{current_table['target']}*.* /Q /F >nul 2>nul \n")
  54. f.write(f"sqlcmd.exe {bcp_conn_params(cfg.target_dsn)} -p -Q \"TRUNCATE TABLE [{cfg.target_dsn['schema']}].[{current_table['target']}]\" \n")
  55. target_insp_cols = target_insp.get_columns(current_table['target'], schema=cfg.target_dsn['schema'])
  56. target_columns_list = [col['name'] for col in target_insp_cols]
  57. target_column_types = [isinstance(col['type'], VARCHAR) for col in target_insp_cols]
  58. # print(target_column_types)
  59. if 'CLIENT_DB' in target_columns_list:
  60. target_columns_list.remove('CLIENT_DB')
  61. target_columns_list.append('Client_DB')
  62. target_columns = set(target_columns_list)
  63. for client_db, prefix in cfg.clients.items():
  64. source_table = current_table['source'].format(prefix)
  65. if source_table not in source_tables:
  66. source_table2 = source_table
  67. if '.' in source_table2:
  68. source_table2 = source_table2.split('.')[-1]
  69. if '[' in source_table2:
  70. source_table2 = source_table2[1:-1]
  71. if source_table2 not in source_tables:
  72. f.write(f"echo Quell-Tabelle '{source_table}' existiert nicht!\n")
  73. print(f"Quell-Tabelle '{source_table}' existiert nicht!")
  74. continue
  75. source_insp_cols = source_insp.get_columns(source_table)
  76. if len(source_insp_cols) == 0:
  77. q = source_db.execute(f"SELECT COLUMN_NAME as name FROM information_schema.columns WHERE TABLE_NAME = '{source_table2}'")
  78. source_insp_cols = q.fetchall()
  79. source_columns = set([col['name'] for col in source_insp_cols])
  80. if not pd.isnull(current_table['query']):
  81. select_query = current_table['query'].format(prefix, cfg.filter[0], cfg.filter[1])
  82. elif '.' in source_table:
  83. select_query = f"SELECT T1.* FROM {source_table} T1 "
  84. else:
  85. select_query = f"SELECT T1.* FROM [{cfg.source_dsn['schema']}].[{source_table}] T1 "
  86. if not pd.isnull(current_table['filter']):
  87. select_query += " WHERE " + current_table['filter'].format("", cfg.filter[0], cfg.filter[1])
  88. intersect = source_columns.intersection(target_columns)
  89. # print("Auf beiden Seiten: " + ";".join(intersect))
  90. diff1 = source_columns.difference(target_columns)
  91. if len(diff1) > 0:
  92. f.write("rem Nur in Quelle: " + ";".join(diff1) + "\n")
  93. diff2 = target_columns.difference(source_columns)
  94. if 'Client_DB' not in diff2:
  95. f.write("echo Spalte 'Client_DB' fehlt!\n")
  96. print(f"Ziel-Tabelle '{current_table['target']}' Spalte 'Client_DB' fehlt!")
  97. continue
  98. diff2.remove('Client_DB')
  99. if len(diff2) > 0:
  100. f.write("rem Nur in Ziel: " + ";".join(diff2) + "\n")
  101. # select_columns = "T1.[" + "], T1.[".join(intersect) + "],"
  102. select_columns = ''
  103. for col, col_type in zip(target_columns_list, target_column_types):
  104. if col in intersect:
  105. if col_type:
  106. select_columns += "trim(T1.[" + col + "]), "
  107. else:
  108. select_columns += "T1.[" + col + "], "
  109. elif col == 'Client_DB':
  110. select_columns += "'" + client_db + "' as \\\"Client_DB\\\", "
  111. else:
  112. select_columns += "'' as \\\"" + col + "\\\", "
  113. select_query = select_query.replace("T1.*", select_columns[:-2])
  114. select_query = select_query.replace("%", "%%") # batch-Problem
  115. stage_csv = f"{cfg.stage_dir}\\{current_table['target']}_{client_db}.csv"
  116. # insert_query = f"LOAD DATA INFILE '{stage_csv}' INTO TABLE {current_table['target']} FIELDS TERMINATED BY ',' ENCLOSED BY '\"' LINES TERMINATED BY '\n';"
  117. # print(select_query)
  118. f.write(f"bcp \"{select_query}\" queryout \"{stage_csv}\" {bcp_conn_params(cfg.source_dsn)} -c -C 65001 -e \"{stage_csv[:-4]}.queryout.log\" > \"{stage_csv[:-4]}.bcp1.log\" \n")
  119. f.write(f"type \"{stage_csv[:-4]}.bcp1.log\" | findstr -v \"1000\" \n")
  120. f.write(f"bcp [{cfg.target_dsn['schema']}].[{current_table['target']}] in \"{stage_csv}\" {bcp_conn_params(cfg.target_dsn)} -c -C 65001 -m 1000 -e \"{stage_csv[:-4]}.in.log\" > \"{stage_csv[:-4]}.bcp2.log\" \n")
  121. f.write(f"type \"{stage_csv[:-4]}.bcp2.log\" | findstr -v \"1000\" \n")
  122. with open(cfg.batch_dir + "\\_" + cfg.name + ".bat", "w", encoding="cp850") as f:
  123. f.write("@echo off & cd /d %~dp0 \n")
  124. f.write(f"del {cfg.stage_dir}\\*.* /Q /F >nul 2>nul \n\n")
  125. for index, current_table in config.iterrows():
  126. f.write("echo ==" + current_table['target'] + "==\n")
  127. f.write("echo " + current_table['target'] + " >CON \n")
  128. f.write("call " + current_table['target'] + ".bat\n\n")
  129. if __name__ == '__main__':
  130. plac.call(create)