import.py 5.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117
  1. import pandas as pd
  2. from sqlalchemy import create_engine, inspect
  3. cfg = {
  4. 'csv_file': 'CARLO.csv',
  5. 'clients': {'1': 'M und S Fahrzeughandel GmbH'},
  6. 'filter': {'01.01.2018', '2019-01-01'},
  7. 'source_dsn': {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'DE0017', 'schema': 'dbo'},
  8. 'target_dsn': {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'CARLO2', 'schema': 'import'},
  9. 'stage_dir': 'C:\\GlobalCube\\System\\CARLO\\Export\\stage'
  10. }
  11. def db_import(select_query, source_db, current_table, target_db, target_schema):
  12. pd.read_sql(select_query, source_db).to_sql(current_table['target'], target_db, schema=target_schema, index=False, if_exists='append')
  13. def conn_string(dsn):
  14. return f"mssql+pyodbc://{dsn['user']}:{dsn['pass']}@{dsn['server']}/{dsn['database']}?driver=SQL+Server+Native+Client+11.0"
  15. def conn_params(dsn):
  16. return f"-S {dsn['server']} -d {dsn['database']} -U {dsn['user']} -P {dsn['pass']}"
  17. df = pd.read_csv(cfg['csv_file'], sep=';', encoding='latin-1')
  18. config = df[df['target'].notnull()]
  19. print(config.head())
  20. source_db = create_engine(conn_string(cfg['source_dsn']))
  21. source_insp = inspect(source_db)
  22. source_tables = source_insp.get_table_names(schema=cfg['source_dsn']['schema'])
  23. source_tables_prefix = set([t.split('$')[0] for t in source_tables if '$' in t])
  24. print(source_tables_prefix)
  25. target_db = create_engine(conn_string(cfg['target_dsn']))
  26. target_insp = inspect(target_db)
  27. target_tables = target_insp.get_table_names(schema=cfg['target_dsn']['schema'])
  28. for index, current_table in config.iterrows():
  29. with open(cfg['stage_dir'] + "\\batch\\" + current_table['target'] + ".bat", "w", encoding="cp850") as f:
  30. f.write("@echo off \n")
  31. f.write("rem ==" + current_table['target'] + "==\n")
  32. if not current_table['target'] in target_tables:
  33. f.write(f"echo Ziel-Tabelle '{current_table['target']}' existiert nicht!\n")
  34. continue
  35. f.write(f"del {cfg['stage_dir']}\\{current_table['target']}*.* /Q /F >nul 2>nul \n")
  36. f.write(f"sqlcmd.exe {conn_params(cfg['target_dsn'])} -p " +
  37. f"-Q \"TRUNCATE TABLE [{cfg['target_dsn']['schema']}].[{current_table['target']}]\" \n")
  38. target_insp_cols = target_insp.get_columns(current_table['target'], schema=cfg['target_dsn']['schema'])
  39. target_columns_list = [col['name'] for col in target_insp_cols]
  40. target_columns = set(target_columns_list)
  41. for client_db, prefix in cfg['clients'].items():
  42. source_table = current_table['source'].format(prefix)
  43. if source_table not in source_tables:
  44. f.write(f"echo Quell-Tabelle '{source_table}' existiert nicht!\n")
  45. continue
  46. stage_csv = f"{cfg['stage_dir']}\\{current_table['target']}_{client_db}.csv"
  47. if not pd.isnull(current_table['query']):
  48. select_query = current_table['query'].format(prefix, cfg['filter'][0], cfg['filter'][1])
  49. else:
  50. select_query = f"SELECT T1.* FROM [{cfg['source_dsn']['schema']}].[{source_table}] T1 "
  51. if not pd.isnull(current_table['filter']):
  52. select_query += " WHERE " + current_table['filter'].format("", cfg['filter'][0], cfg['filter'][1])
  53. source_insp_cols = source_insp.get_columns(source_table)
  54. source_columns = set([col['name'] for col in source_insp_cols])
  55. intersect = source_columns.intersection(target_columns)
  56. # print("Auf beiden Seiten: " + ";".join(intersect))
  57. diff1 = source_columns.difference(target_columns)
  58. if len(diff1) > 0:
  59. f.write("rem Nur in Quelle: " + ";".join(diff1) + "\n")
  60. diff2 = target_columns.difference(source_columns)
  61. if "Client_DB" not in diff2:
  62. f.write("echo Spalte 'Client_DB' fehlt!\n")
  63. continue
  64. diff2.remove("Client_DB")
  65. if len(diff2) > 0:
  66. f.write("rem Nur in Ziel: " + ";".join(diff2) + "\n")
  67. # select_columns = "T1.[" + "], T1.[".join(intersect) + "],"
  68. select_columns = ""
  69. for col in target_columns_list:
  70. if col in intersect:
  71. select_columns += "T1.[" + col + "], "
  72. elif col == "Client_DB":
  73. select_columns += "'" + client_db + "' as \\\"Client_DB\\\", "
  74. else:
  75. select_columns += "'' as \\\"" + col + "\\\", "
  76. select_query = select_query.replace("T1.*", select_columns[:-2])
  77. select_query = select_query.replace("%", "%%") # batch-Problem
  78. # print(select_query)
  79. f.write(f"bcp \"{select_query}\" queryout \"{stage_csv}\" {conn_params(cfg['source_dsn'])} " +
  80. f"-c -C 65001 -e \"{stage_csv[:-4]}.queryout.log\" > \"{stage_csv[:-4]}.bcp1.log\" \n")
  81. f.write(f"type \"{stage_csv[:-4]}.bcp1.log\" | findstr -v \"1000\" \n")
  82. f.write(f"bcp [{cfg['target_dsn']['schema']}].[{current_table['target']}] in \"{stage_csv}\" " +
  83. f"-c -C 65001 {conn_params(cfg['target_dsn'])} -e \"{stage_csv[:-4]}.in.log\" > \"{stage_csv[:-4]}.bcp2.log\" \n")
  84. f.write(f"type \"{stage_csv[:-4]}.bcp2.log\" | findstr -v \"1000\" \n")
  85. with open(cfg['stage_dir'] + "\\batch\\_all.bat", "w", encoding="cp850") as f:
  86. f.write("@echo off & cd /d %~dp0 \n")
  87. f.write(f"del {cfg['stage_dir']}\\*.* /Q /F >nul 2>nul \n\n")
  88. for index, current_table in config.iterrows():
  89. f.write("echo ==" + current_table['target'] + "==\n")
  90. f.write("echo " + current_table['target'] + " >CON \n")
  91. f.write("call " + current_table['target'] + ".bat\n\n")