Ver Fonte

database black formatted

gc-server6 há 1 ano atrás
pai
commit
6cc86caffa

+ 5 - 1
.vscode/settings.json

@@ -1,4 +1,8 @@
-{
+{  
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.black-formatter",
+        "editor.formatOnSave": true
+    },
     "python.testing.pytestEnabled": false,
     "python.testing.unittestEnabled": true,
     "python.linting.pylintEnabled": false,

+ 7 - 7
database/autoline.py

@@ -3,23 +3,23 @@ from unlzw import unlzw
 from pathlib import Path
 
 
-@plac.pos('csv_dir', '', type=Path)
+@plac.pos("csv_dir", "", type=Path)
 def main(csv_dir):
-    for zip_file in csv_dir.glob('*Z.*'):
+    for zip_file in csv_dir.glob("*Z.*"):
         if zip_file.is_dir():
             continue
         # print(zip_file.stat())
         size = zip_file.stat().st_size
         if size > 100:
-            new_file = str(zip_file)[:-8] + str(zip_file)[-1:] + '.csv'
+            new_file = str(zip_file)[:-8] + str(zip_file)[-1:] + ".csv"
             try:
-                with open(zip_file, 'rb') as zip:
-                    with open(new_file, 'wb') as f_out:
+                with open(zip_file, "rb") as zip:
+                    with open(new_file, "wb") as f_out:
                         f_out.write(unlzw(zip.read()))
             except OSError:
                 print(str(zip_file) + ": " + str(size))
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     # plac.call(main)
-    main(Path('C:\\GlobalCube\\System\\AUTOLINE\\Datenbank\\Full_zusammengesetllt'))
+    main(Path("C:\\GlobalCube\\System\\AUTOLINE\\Datenbank\\Full_zusammengesetllt"))

+ 38 - 26
database/bcp_log.py

@@ -19,44 +19,54 @@ class BulkcopyResult:
         return self.exported - self.imported - self.ignored
 
     def to_csv(self):
-        return (f"{self.filename};{self.timestamp.strftime('%d.%m.%Y %H:%M:%S')};"
-                + f"{self.exported};{self.imported};{self.ignored};{self.missing};"
-                + f"{self.export_duration};{self.import_duration}")
+        return (
+            f"{self.filename};{self.timestamp.strftime('%d.%m.%Y %H:%M:%S')};"
+            + f"{self.exported};{self.imported};{self.ignored};{self.missing};"
+            + f"{self.export_duration};{self.import_duration}"
+        )
 
     def __str__(self) -> str:
-        return "\n".join([f'Filename: {self.filename}',
-                          f"Last run: {self.timestamp.strftime('%d.%m.%Y %H:%M')}",
-                          '',
-                          f'Exported: {self.exported:>7}',
-                          f'Imported: {self.imported:>7}',
-                          f'Ignored:  {self.ignored:>7}',
-                          f'Missing:  {self.missing:>7}',
-                          '',
-                          f'Duration: {self.export_duration:>11} s',
-                          f'          {self.import_duration:>11} s'])
+        return "\n".join(
+            [
+                f"Filename: {self.filename}",
+                f"Last run: {self.timestamp.strftime('%d.%m.%Y %H:%M')}",
+                "",
+                f"Exported: {self.exported:>7}",
+                f"Imported: {self.imported:>7}",
+                f"Ignored:  {self.ignored:>7}",
+                f"Missing:  {self.missing:>7}",
+                "",
+                f"Duration: {self.export_duration:>11} s",
+                f"          {self.import_duration:>11} s",
+            ]
+        )
 
 
 def check_logfiles(prefix, base_dir):
     ts = datetime.fromtimestamp(Path(f"{base_dir}/{prefix}.in.log").stat().st_mtime)
     result = BulkcopyResult(filename=prefix, timestamp=ts)
 
-    with open(f"{base_dir}/{prefix}.in.log", 'r') as frh:
+    with open(f"{base_dir}/{prefix}.in.log", "r") as frh:
         result.ignored = len(frh.readlines())
 
     # info output of export
-    with open(f"{base_dir}/{prefix}.bcp1.log", 'r', encoding='cp850', errors='ignore') as frh:
+    with open(
+        f"{base_dir}/{prefix}.bcp1.log", "r", encoding="cp850", errors="ignore"
+    ) as frh:
         raw_logs = frh.read()
-        match = re.search(r'(\d+) Zeilen kopiert.', raw_logs)
+        match = re.search(r"(\d+) Zeilen kopiert.", raw_logs)
         result.exported = int(match.group(1)) if match else 0
-        match2 = re.search(r'Zeit .* gesamt: (\d+)', raw_logs)
+        match2 = re.search(r"Zeit .* gesamt: (\d+)", raw_logs)
         result.export_duration = int(match2.group(1)) / 1000 if match2 else 0
 
     # info output of import
-    with open(f"{base_dir}/{prefix}.bcp2.log", 'r', encoding='cp850', errors='ignore') as frh:
+    with open(
+        f"{base_dir}/{prefix}.bcp2.log", "r", encoding="cp850", errors="ignore"
+    ) as frh:
         raw_logs = frh.read()
-        match = re.search(r'(\d+) Zeilen kopiert.', raw_logs)
+        match = re.search(r"(\d+) Zeilen kopiert.", raw_logs)
         result.imported = int(match.group(1)) if match else 0
-        match2 = re.search(r'Zeit .* gesamt: (\d+)', raw_logs)
+        match2 = re.search(r"Zeit .* gesamt: (\d+)", raw_logs)
         result.import_duration = int(match2.group(1)) / 1000 if match2 else 0
 
     return result
@@ -64,14 +74,16 @@ def check_logfiles(prefix, base_dir):
 
 def check_directory(base_dir):
     res = []
-    for filename in Path(base_dir).glob('*.bcp1.log'):
+    for filename in Path(base_dir).glob("*.bcp1.log"):
         stem = filename.name[:-9]
         res.append(check_logfiles(stem, base_dir).to_csv())
-    with open(base_dir + '/info.log', 'w') as fwh:
-        fwh.write('filename;timestamp;imported;exported;ignored;import_duration;export_duration\n')
-        fwh.write('\n'.join(res))
+    with open(base_dir + "/info.log", "w") as fwh:
+        fwh.write(
+            "filename;timestamp;imported;exported;ignored;import_duration;export_duration\n"
+        )
+        fwh.write("\n".join(res))
 
 
-if __name__ == '__main__':
-    check_directory('/home/robert/projekte/python/dbtools/SQL/temp')
+if __name__ == "__main__":
+    check_directory("/home/robert/projekte/python/dbtools/SQL/temp")
     # check_logfiles('ORDER_LINE_1')

+ 33 - 23
database/cet.py

@@ -3,39 +3,49 @@ import pyodbc
 from os import path
 
 
-@plac.pos('query', 'SQL Query', type=str)
-@plac.pos('mode', '', choices=['in', 'out', 'queryout'])
-@plac.pos('csv_file', '', type=str)
-@plac.opt('Server', 'Hostname or DSN', type=str)
-@plac.opt('database', '', type=str)
-@plac.opt('User', '', type=str)
-@plac.opt('Password', '', type=str)
-@plac.flg('charset', '')
-@plac.opt('Codepage', '', type=str)
-@plac.opt('errorlog', '', type=str)
-def run(query, mode, csv_file, Server='localhost\\GLOBALCUBE', database='master',
-        User='sa', Password='Mffu3011#', charset=False, Codepage='65001', errorlog='error.log'):
+@plac.pos("query", "SQL Query", type=str)
+@plac.pos("mode", "", choices=["in", "out", "queryout"])
+@plac.pos("csv_file", "", type=str)
+@plac.opt("Server", "Hostname or DSN", type=str)
+@plac.opt("database", "", type=str)
+@plac.opt("User", "", type=str)
+@plac.opt("Password", "", type=str)
+@plac.flg("charset", "")
+@plac.opt("Codepage", "", type=str)
+@plac.opt("errorlog", "", type=str)
+def run(
+    query,
+    mode,
+    csv_file,
+    Server="localhost\\GLOBALCUBE",
+    database="master",
+    User="sa",
+    Password="Mffu3011#",
+    charset=False,
+    Codepage="65001",
+    errorlog="error.log",
+):
     dsn = f"dsn={Server};uid={User};pwd={Password}"
     if Codepage.isnumeric():
-        Codepage = 'cp' + Codepage
-    if mode == 'queryout':
+        Codepage = "cp" + Codepage
+    if mode == "queryout":
         queryout(dsn, query, csv_file, Codepage, errorlog)
         return
-    print('This is madness')
+    print("This is madness")
 
 
 def convert_data(element):
     txt = str(element)
-    txt = txt.replace('None', '')
-    txt = txt.replace('False', '0').replace('True', '1')
-    txt = txt.replace('\t', '').replace('\r', '').replace('\n', '')
-    txt = txt.replace('\x81', '').replace('\x90', '')
+    txt = txt.replace("None", "")
+    txt = txt.replace("False", "0").replace("True", "1")
+    txt = txt.replace("\t", "").replace("\r", "").replace("\n", "")
+    txt = txt.replace("\x81", "").replace("\x90", "")
     return txt
 
 
 def queryout(dsn, query, csv_file, codepage, errorlog):
     if path.exists(query):
-        with open(query, 'r', encoding=codepage) as frh:
+        with open(query, "r", encoding=codepage) as frh:
             query = frh.read()
 
     try:
@@ -45,13 +55,13 @@ def queryout(dsn, query, csv_file, codepage, errorlog):
     except pyodbc.InterfaceError as e:
         print(e.args[1])
 
-    with open(csv_file, 'w', encoding=codepage) as fwh:
+    with open(csv_file, "w", encoding=codepage) as fwh:
         while row := cursor.fetchone():
             try:
-                fwh.write(('\t'.join(map(convert_data, row)) + '\n'))
+                fwh.write(("\t".join(map(convert_data, row)) + "\n"))
             except pyodbc.DataError as e:
                 print(e.args[1])
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     plac.call(run)

+ 54 - 29
database/csv_import.py

@@ -6,17 +6,26 @@ import pandas as pd
 from sqlalchemy import create_engine, inspect
 import json
 
-csv_dir = Path('C:\\GlobalCube\\System\\AUTOLINE\\Datenbank\\Full_zusammengesetllt')
-target_dsn = {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'AUTOLINE'}
-temp_schema = 'temp'
-target_schema = 'import'
+csv_dir = Path("C:\\GlobalCube\\System\\AUTOLINE\\Datenbank\\Full_zusammengesetllt")
+target_dsn = {
+    "user": "sa",
+    "pass": "Mffu3011#",
+    "server": "GC-SERVER1\\GLOBALCUBE",
+    "database": "AUTOLINE",
+}
+temp_schema = "temp"
+target_schema = "import"
 transform = []
 
 
 def get_dtype(db_type):
     if db_type == "DATETIME":
         return "datetime64"
-    if db_type == "DECIMAL(28, 8)" or db_type == "DECIMAL(18, 0)" or db_type == "NUMERIC(18, 0)":
+    if (
+        db_type == "DECIMAL(28, 8)"
+        or db_type == "DECIMAL(18, 0)"
+        or db_type == "NUMERIC(18, 0)"
+    ):
         return "float64"
     return "object"
 
@@ -40,7 +49,7 @@ def columns_from_csv(source_csv):
 
 def transform_template(target_insp, source_csv, table, target_schema):
     target_insp_cols = target_insp.get_columns(table, schema=target_schema)
-    target_columns_list = [col['name'] for col in target_insp_cols]
+    target_columns_list = [col["name"] for col in target_insp_cols]
     source_columns_list = columns_from_csv(source_csv)
 
     target_columns = set(target_columns_list)
@@ -60,7 +69,7 @@ def transform_template(target_insp, source_csv, table, target_schema):
             pos = source_columns_list.index(col)
         else:
             pos = -1
-        template.append((pos, get_dtype(str(target_insp_cols[i]['type']))))
+        template.append((pos, get_dtype(str(target_insp_cols[i]["type"]))))
     return template
 
 
@@ -88,7 +97,7 @@ def transform_line(line):
 
 def fix_nulls(s):
     for line in s:
-        yield line.replace('\0', ' ')
+        yield line.replace("\0", " ")
 
 
 def transform_file(source_csv, template):
@@ -101,12 +110,13 @@ def transform_file(source_csv, template):
         return False
 
     print(f"Importiere {source_csv.name}...")
-    with open(source_csv, "r", encoding="utf-8", errors="ignore", newline="") as source_file, \
-         open(stage_csv, "w", encoding="utf-8", newline="") as target_file:
+    with open(
+        source_csv, "r", encoding="utf-8", errors="ignore", newline=""
+    ) as source_file, open(stage_csv, "w", encoding="utf-8", newline="") as target_file:
         csv_read = csv.reader(fix_nulls(source_file), delimiter=",")
         csv_write = csv.writer(target_file, delimiter="\t")
 
-        next(csv_read)    # ignore header
+        next(csv_read)  # ignore header
         i = 0
         for cols in csv_read:
             csv_write.writerow(transform_line(cols))
@@ -147,34 +157,49 @@ def batch(csv_dir, action):
     stage_schema = target_schema if action == "overwrite" else temp_schema
     print("@echo off")
     print("cd /d %~dp0")
-    print("set PYTHON=\"C:\\dev\\Python\\Python38-32\"")
+    print('set PYTHON="C:\\dev\\Python\\Python38-32"')
 
-    for (table, source_csv) in csv_tables(csv_dir, target_tables_ci):
+    for table, source_csv in csv_tables(csv_dir, target_tables_ci):
         print(f"echo =={table}==")
         stage_csv = Path(f"{source_csv.parent}\\stage\\{source_csv.name}")
         try:
-            tf_template = transform_template(target_insp, source_csv, table, target_schema)
-            template_json = json.dumps(tf_template).replace("\"", "\\\"")
-            print(f"sqlcmd.exe {conn_params(target_dsn)} -p -Q \"TRUNCATE TABLE [{stage_schema}].[{table}]\" ")
-            print(f"%PYTHON%\\python.exe csv_import.py transform \"{source_csv}\" -t \"{template_json}\" ")
-
-            print(f"bcp.exe [{stage_schema}].[{table}] in \"{stage_csv}\" {conn_params(target_dsn)} -c -C 65001 -e \"{stage_csv}.log\" ")
+            tf_template = transform_template(
+                target_insp, source_csv, table, target_schema
+            )
+            template_json = json.dumps(tf_template).replace('"', '\\"')
+            print(
+                f'sqlcmd.exe {conn_params(target_dsn)} -p -Q "TRUNCATE TABLE [{stage_schema}].[{table}]" '
+            )
+            print(
+                f'%PYTHON%\\python.exe csv_import.py transform "{source_csv}" -t "{template_json}" '
+            )
+
+            print(
+                f'bcp.exe [{stage_schema}].[{table}] in "{stage_csv}" {conn_params(target_dsn)} -c -C 65001 -e "{stage_csv}.log" '
+            )
             pkeys = target_insp.get_pk_constraint(table, schema=target_schema)
-            if len(pkeys['constrained_columns']) > 0:
-                delete_sql = f"DELETE T1 FROM [{target_schema}].[{table}] T1 INNER JOIN [{temp_schema}].[{table}] T2 ON " + \
-                             " AND ".join([f"T1.[{col}] = T2.[{col}]" for col in pkeys['constrained_columns']])
-                print(f"sqlcmd.exe {conn_params(target_dsn)} -p -Q \"{delete_sql}\" ")
+            if len(pkeys["constrained_columns"]) > 0:
+                delete_sql = (
+                    f"DELETE T1 FROM [{target_schema}].[{table}] T1 INNER JOIN [{temp_schema}].[{table}] T2 ON "
+                    + " AND ".join(
+                        [
+                            f"T1.[{col}] = T2.[{col}]"
+                            for col in pkeys["constrained_columns"]
+                        ]
+                    )
+                )
+                print(f'sqlcmd.exe {conn_params(target_dsn)} -p -Q "{delete_sql}" ')
             insert_sql = f"INSERT INTO [{target_schema}].[{table}] SELECT * FROM [{temp_schema}].[{table}]"
-            print(f"sqlcmd.exe {conn_params(target_dsn)} -p -Q \"{insert_sql}\" ")
+            print(f'sqlcmd.exe {conn_params(target_dsn)} -p -Q "{insert_sql}" ')
             print("")
         except Exception:
             print(f"rem {source_csv} fehlerhaft!")
 
 
-@plac.pos('action', "", choices=['batch', 'transform'])
-@plac.pos('csv_dir', "", type=Path)
-@plac.opt('mode', "", choices=['overwrite', 'append', 'update'])
-@plac.opt('template', "")
+@plac.pos("action", "", choices=["batch", "transform"])
+@plac.pos("csv_dir", "", type=Path)
+@plac.opt("mode", "", choices=["overwrite", "append", "update"])
+@plac.opt("template", "")
 def main(action, csv_dir, mode="overwrite", template="[]"):
     if action == "transform":
         transform_file(csv_dir, template)
@@ -182,6 +207,6 @@ def main(action, csv_dir, mode="overwrite", template="[]"):
         batch(csv_dir, mode)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     plac.call(main)
     # main("batch", csv_dir, "append")

+ 31 - 13
database/csv_kontenrahmen.py

@@ -2,18 +2,36 @@ import pandas as pd
 import numpy as np
 
 
-base_dir = 'V:\\Kunden\\Luchtenberg\\1 Umstellung SKR51\\GCStruct_Modell\\Kontenrahmen\\'
-source_csv = base_dir + 'Kontenrahmen_Export.csv'
-mul_csv = base_dir + 'Kontenrahmen_mul.csv'
-result_csv = base_dir + 'Kontenrahmen_Vorlage.csv'
+base_dir = (
+    "V:\\Kunden\\Luchtenberg\\1 Umstellung SKR51\\GCStruct_Modell\\Kontenrahmen\\"
+)
+source_csv = base_dir + "Kontenrahmen_Export.csv"
+mul_csv = base_dir + "Kontenrahmen_mul.csv"
+result_csv = base_dir + "Kontenrahmen_Vorlage.csv"
 
-df_source = pd.read_csv(source_csv, decimal=',', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)})
-df_source['Konto_Klasse'] = df_source['Konto_Nr'].str.slice(0, 1)
-df_source['Konto_Art'] = np.where(df_source['Konto_Klasse'].isin(['4', '5', '7', '8']), '2', '1')
-df_mul = pd.read_csv(mul_csv, decimal=',', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)})
-df_join = df_source.merge(df_mul, how='inner', on=['Konto_Klasse'], suffixes=[None, '_other'])
-df_join['Konto_Nr'] = df_join['Konto_Nr'] + '_' + df_join['Suffix']
-df_join.drop(['Konto_Klasse', 'Suffix'], axis=1, inplace=True)
-df_source.drop(['Konto_Klasse'], axis=1, inplace=True)
+df_source = pd.read_csv(
+    source_csv,
+    decimal=",",
+    sep=";",
+    encoding="latin-1",
+    converters={i: str for i in range(0, 200)},
+)
+df_source["Konto_Klasse"] = df_source["Konto_Nr"].str.slice(0, 1)
+df_source["Konto_Art"] = np.where(
+    df_source["Konto_Klasse"].isin(["4", "5", "7", "8"]), "2", "1"
+)
+df_mul = pd.read_csv(
+    mul_csv,
+    decimal=",",
+    sep=";",
+    encoding="latin-1",
+    converters={i: str for i in range(0, 200)},
+)
+df_join = df_source.merge(
+    df_mul, how="inner", on=["Konto_Klasse"], suffixes=[None, "_other"]
+)
+df_join["Konto_Nr"] = df_join["Konto_Nr"] + "_" + df_join["Suffix"]
+df_join.drop(["Konto_Klasse", "Suffix"], axis=1, inplace=True)
+df_source.drop(["Konto_Klasse"], axis=1, inplace=True)
 df_join = df_join.append(df_source).sort_index()
-df_join.to_csv(result_csv, decimal=',', sep=';', encoding='latin-1', index=None)
+df_join.to_csv(result_csv, decimal=",", sep=";", encoding="latin-1", index=None)

+ 31 - 16
database/csv_update.py

@@ -7,13 +7,15 @@ from functools import reduce
 # source_csv = 'P:\\SKR51_GCStruct\\Kontenrahmen_Vorlage.csv'
 # target_csv = 'P:\\SKR51_GCStruct\\GCStruct_Portal\\Kontenrahmen\\Kontenrahmen.csv'
 # result_csv = 'P:\\SKR51_GCStruct\\GCStruct_Portal\\Kontenrahmen\\Kontenrahmen_neu.csv'
-base_dir = 'V:\\Kunden\\Luchtenberg\\1 Umstellung SKR51\\GCStruct_Modell\\Kontenrahmen\\'
-source_csv = base_dir + 'Kontenrahmen_Vorlage.csv'
-target_csv = base_dir + 'Kontenrahmen.csv'
-result_csv = base_dir + 'Kontenrahmen_neu.csv'
+base_dir = (
+    "V:\\Kunden\\Luchtenberg\\1 Umstellung SKR51\\GCStruct_Modell\\Kontenrahmen\\"
+)
+source_csv = base_dir + "Kontenrahmen_Vorlage.csv"
+target_csv = base_dir + "Kontenrahmen.csv"
+result_csv = base_dir + "Kontenrahmen_neu.csv"
 
 debug = False
-cols_pkey = ['Konto_Nr']
+cols_pkey = ["Konto_Nr"]
 
 
 def update(d, other):
@@ -24,24 +26,37 @@ def update(d, other):
 def get_dict(cols, type):
     return dict(dict(zip(cols, [type] * len(cols))))
 
-# cols_dict = reduce(update, (get_dict(cols_pkey, np.str), get_dict(cols_str, np.str), get_dict(cols_float, np.float)), {})
 
+# cols_dict = reduce(update, (get_dict(cols_pkey, np.str), get_dict(cols_str, np.str), get_dict(cols_float, np.float)), {})
 
-df_source = pd.read_csv(source_csv, decimal=',', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)})
-df_source['pkey'] = reduce(lambda x, y: x + '_' + df_source[y], cols_pkey, '')
-df_source = df_source.set_index('pkey')
 
-df_target = pd.read_csv(target_csv, decimal=',', sep=';', encoding='latin-1', converters={i: str for i in range(0, 200)})
+df_source = pd.read_csv(
+    source_csv,
+    decimal=",",
+    sep=";",
+    encoding="latin-1",
+    converters={i: str for i in range(0, 200)},
+)
+df_source["pkey"] = reduce(lambda x, y: x + "_" + df_source[y], cols_pkey, "")
+df_source = df_source.set_index("pkey")
+
+df_target = pd.read_csv(
+    target_csv,
+    decimal=",",
+    sep=";",
+    encoding="latin-1",
+    converters={i: str for i in range(0, 200)},
+)
 target_columns = df_target.columns
-df_target['pkey'] = reduce(lambda x, y: x + '_' + df_target[y], cols_pkey, '')
-df_target = df_target.set_index('pkey')
+df_target["pkey"] = reduce(lambda x, y: x + "_" + df_target[y], cols_pkey, "")
+df_target = df_target.set_index("pkey")
 
-df_join = df_source.join(df_target, how='left', rsuffix='_other')[target_columns]
-df_remain = df_target.join(df_source, how='left', rsuffix='_other')
-df_remain = df_remain[pd.isna(df_remain[cols_pkey[0] + '_other'])][target_columns]
+df_join = df_source.join(df_target, how="left", rsuffix="_other")[target_columns]
+df_remain = df_target.join(df_source, how="left", rsuffix="_other")
+df_remain = df_remain[pd.isna(df_remain[cols_pkey[0] + "_other"])][target_columns]
 # df_result = df_join[(df_join['Gesamt'] != 0) & (df_join['Serviceberater'] != "")]
 
 # veraltete Konten nicht löschen
 df_join = df_join.append(df_remain).sort_index()
 
-df_join.to_csv(result_csv, decimal=',', sep=';', encoding='latin-1', index=None)
+df_join.to_csv(result_csv, decimal=",", sep=";", encoding="latin-1", index=None)

+ 44 - 13
database/data.py

@@ -4,10 +4,26 @@ from functools import reduce
 
 debug = False
 
-csv_file = 'data/offene_auftraege_eds_c11.csv'
+csv_file = "data/offene_auftraege_eds_c11.csv"
 cols_pkey = ["Hauptbetrieb", "Standort", "Nr", "Auftragsdatum"]
-cols_str = ["Serviceberater", "Order Number", "Fabrikat", "Model", "Fahrzeug", "Kostenstelle", "Marke", "Kunde", "Turnover_Type_Desc"]
-cols_float = ["Durchg\u00e4nge (Auftrag)", "Arbeitswerte", "Teile", "Fremdl.", "Anzahl Tage"]
+cols_str = [
+    "Serviceberater",
+    "Order Number",
+    "Fabrikat",
+    "Model",
+    "Fahrzeug",
+    "Kostenstelle",
+    "Marke",
+    "Kunde",
+    "Turnover_Type_Desc",
+]
+cols_float = [
+    "Durchg\u00e4nge (Auftrag)",
+    "Arbeitswerte",
+    "Teile",
+    "Fremdl.",
+    "Anzahl Tage",
+]
 
 
 def update(d, other):
@@ -19,18 +35,33 @@ def get_dict(cols, type):
     return dict(dict(zip(cols, [type] * len(cols))))
 
 
-cols_dict = reduce(update, (get_dict(cols_pkey, np.str), get_dict(cols_str, np.str), get_dict(cols_float, np.float)), {})
+cols_dict = reduce(
+    update,
+    (
+        get_dict(cols_pkey, np.str),
+        get_dict(cols_str, np.str),
+        get_dict(cols_float, np.float),
+    ),
+    {},
+)
 
-df = pd.read_csv(csv_file, decimal=',', sep=';', encoding='latin-1', usecols=cols_dict.keys(), dtype=cols_dict)
-df['pkey'] = reduce(lambda x, y: x + '_' + df[y], cols_pkey, '')
-df_sum = df.groupby('pkey').sum()
-df_unique = df[cols_pkey + cols_str + ['pkey']].drop_duplicates()
+df = pd.read_csv(
+    csv_file,
+    decimal=",",
+    sep=";",
+    encoding="latin-1",
+    usecols=cols_dict.keys(),
+    dtype=cols_dict,
+)
+df["pkey"] = reduce(lambda x, y: x + "_" + df[y], cols_pkey, "")
+df_sum = df.groupby("pkey").sum()
+df_unique = df[cols_pkey + cols_str + ["pkey"]].drop_duplicates()
 
-df_join = df_sum.join(df_unique.set_index('pkey'), rsuffix='_other')
-df_join['Gesamt'] = df_join['Arbeitswerte'] + df_join['Teile'] + df_join['Fremdl.']
-df_result = df_join[(df_join['Gesamt'] != 0) & (df_join['Serviceberater'] != '')]
+df_join = df_sum.join(df_unique.set_index("pkey"), rsuffix="_other")
+df_join["Gesamt"] = df_join["Arbeitswerte"] + df_join["Teile"] + df_join["Fremdl."]
+df_result = df_join[(df_join["Gesamt"] != 0) & (df_join["Serviceberater"] != "")]
 
-with open('data/offene_auftraege.json', 'w') as f:
-    f.write(df_result.to_json(orient='split', indent=2))
+with open("data/offene_auftraege.json", "w") as f:
+    f.write(df_result.to_json(orient="split", indent=2))
 
 print(df_result.shape)

+ 2 - 2
database/database.py

@@ -1,7 +1,7 @@
 def conn_string(dsn):
-    if dsn['driver'] == 'mssql':
+    if dsn["driver"] == "mssql":
         return f"mssql+pyodbc://{dsn['user']}:{dsn['pass']}@{dsn['server']}/{dsn['database']}?driver=SQL+Server+Native+Client+11.0"
-    if dsn['driver'] == 'mysql':
+    if dsn["driver"] == "mysql":
         return f"mysql+pymysql://{dsn['user']}:{dsn['pass']}@{dsn['server']}/{dsn['database']}?charset=utf8mb4"
     return f"pyodbc://{dsn['user']}:{dsn['pass']}@{dsn['server']}/{dsn['database']}?driver={dsn['driver']}"
 

+ 17 - 5
database/db_copy.py

@@ -2,11 +2,23 @@ import sqlalchemy.dialects.mssql.base
 from sqlalchemy import create_engine, MetaData, Boolean, DATETIME
 from database import conn_string
 
-source_dsn = {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'OPTIMA', 'driver': 'mssql'}
-source_schema = 'import'
+source_dsn = {
+    "user": "sa",
+    "pass": "Mffu3011#",
+    "server": "GC-SERVER1\\GLOBALCUBE",
+    "database": "OPTIMA",
+    "driver": "mssql",
+}
+source_schema = "import"
 
-target_dsn = {'user': 'root', 'pass': '', 'server': 'localhost', 'database': 'OPTIMA', 'driver': 'mysql'}
-target_schema = ''
+target_dsn = {
+    "user": "root",
+    "pass": "",
+    "server": "localhost",
+    "database": "OPTIMA",
+    "driver": "mysql",
+}
+target_schema = ""
 
 source_db = create_engine(conn_string(source_dsn))
 source_meta = MetaData()
@@ -19,7 +31,7 @@ for table in source_meta.sorted_tables:
     table.schema = None
 
     for col in table.c:
-        if getattr(col.type, 'collation', None) is not None:
+        if getattr(col.type, "collation", None) is not None:
             col.type.collation = None
         if type(col.type) is sqlalchemy.dialects.mssql.base.BIT:
             col.type = Boolean()

+ 10 - 8
database/gebos_backup.py

@@ -4,18 +4,20 @@ from pathlib import Path
 
 def gebos_backup(base_dir=None):
     if base_dir is None:
-        base_dir = 'E:\\GEBOS'
+        base_dir = "E:\\GEBOS"
 
-    source_path = Path(base_dir) / 'data'
-    target_path = base_dir + '/archive'
+    source_path = Path(base_dir) / "data"
+    target_path = base_dir + "/archive"
 
-    for source_file in source_path.glob('*.csv'):
+    for source_file in source_path.glob("*.csv"):
         # print(source_file)
-        table, timestamp = source_file.name.split('2', 1)
-        timestamp = '2' + timestamp
+        table, timestamp = source_file.name.split("2", 1)
+        timestamp = "2" + timestamp
         year = timestamp[:4]
         month = timestamp[4:6]
-        target = Path(f"{target_path}/{year}/{table}/{year}-{month}/{source_file.name}").absolute()
+        target = Path(
+            f"{target_path}/{year}/{table}/{year}-{month}/{source_file.name}"
+        ).absolute()
         os.makedirs(target.parent, exist_ok=True)
         if target.exists():
             target.unlink()
@@ -23,5 +25,5 @@ def gebos_backup(base_dir=None):
         source_file.rename(target)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     gebos_backup()

+ 10 - 10
database/gebos_convert.py

@@ -5,33 +5,33 @@ from pathlib import Path
 
 def convert_dir(path):
     source_path = Path(path)
-    target_path = source_path.parent.joinpath('staging')
+    target_path = source_path.parent.joinpath("staging")
 
-    for source_file in source_path.glob('*.csv'):
+    for source_file in source_path.glob("*.csv"):
         print(source_file.name)
         target_file = target_path / source_file.name
         convert_file(source_file, target_file)
 
 
 def convert_file(source, target):
-    with open(source, 'r', encoding='utf-8', newline='') as frh:
-        with open(target, 'w', encoding='latin-1', newline='') as fwh:
-            csv_reader = csv.reader(frh, delimiter=';')
-            csv_writer = csv.writer(fwh, delimiter='\t')
+    with open(source, "r", encoding="utf-8", newline="") as frh:
+        with open(target, "w", encoding="latin-1", newline="") as fwh:
+            csv_reader = csv.reader(frh, delimiter=";")
+            csv_writer = csv.writer(fwh, delimiter="\t")
             for row in csv_reader:
                 converted = [convert_field(col) for col in row]
                 csv_writer.writerow(converted)
 
 
 def convert_field(col):
-    if re.match(r'\d:\d\d:\d\d', col):
-        col = '0' + col
+    if re.match(r"\d:\d\d:\d\d", col):
+        col = "0" + col
     return col
 
 
 def main():
-    convert_dir('E:\\GEBOS\\data')
+    convert_dir("E:\\GEBOS\\data")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

+ 21 - 21
database/gebos_import.py

@@ -2,21 +2,21 @@ from pathlib import Path
 import subprocess
 
 cfg = {
-    'schema': 'temp',
-    'scripts_dir': 'E:\\GlobalCube\\Tasks\\scripts',
-    'query_dir': 'E:\\GlobalCube\\System\\NAVISION\\SQL\\exec',
+    "schema": "temp",
+    "scripts_dir": "E:\\GlobalCube\\Tasks\\scripts",
+    "query_dir": "E:\\GlobalCube\\System\\NAVISION\\SQL\\exec",
 }
 target_tables = {
-    'account2': 'GEBOS_Account',
-    'activity2': 'GEBOS_Activity',
-    'activitytype2': 'GEBOS_Activity_Type',
-    'costcenter2': 'GEBOS_Cost_Center',
-    'department2': 'GEBOS_Department',
-    'employee2': 'GEBOS_Employee',
-    'employeeaccount2': 'GEBOS_Employee_Account',
-    'employmenttype2': 'GEBOS_Employment_Type',
-    'timemodel2': 'GEBOS_Time_Model',
-    'timetracking2': 'GEBOS_Time_Tracking'
+    "account2": "GEBOS_Account",
+    "activity2": "GEBOS_Activity",
+    "activitytype2": "GEBOS_Activity_Type",
+    "costcenter2": "GEBOS_Cost_Center",
+    "department2": "GEBOS_Department",
+    "employee2": "GEBOS_Employee",
+    "employeeaccount2": "GEBOS_Employee_Account",
+    "employmenttype2": "GEBOS_Employment_Type",
+    "timemodel2": "GEBOS_Time_Model",
+    "timetracking2": "GEBOS_Time_Tracking",
 }
 
 
@@ -25,21 +25,21 @@ def task(args):
 
 
 def gebos_import(csv_file, target_table):
-    bulkcopy = str(Path(cfg['scripts_dir']) / 'bcp_import.bat')
-    sqlquery = str(Path(cfg['scripts_dir']) / 'sqlquery.bat')
-    sqlexec = str(Path(cfg['scripts_dir']) / 'sqlexec.bat')
-    query_file = target_table + '.sql'
+    bulkcopy = str(Path(cfg["scripts_dir"]) / "bcp_import.bat")
+    sqlquery = str(Path(cfg["scripts_dir"]) / "sqlquery.bat")
+    sqlexec = str(Path(cfg["scripts_dir"]) / "sqlexec.bat")
+    query_file = target_table + ".sql"
 
     task(f"{sqlquery} \"TRUNCATE TABLE [{cfg['schema']}].[{target_table}]\" ")
     task(f"{bulkcopy} [{cfg['schema']}].[{target_table}] \"{csv_file}\" ")
-    task(f"{sqlexec} {query_file} \"{csv_file}\" ")
+    task(f'{sqlexec} {query_file} "{csv_file}" ')
 
 
 def import_dir(base_dir=None):
     if base_dir is None:
-        base_dir = 'E:\\GEBOS\\staging'
+        base_dir = "E:\\GEBOS\\staging"
 
-    for csv_file in Path(base_dir).glob('*.csv'):
+    for csv_file in Path(base_dir).glob("*.csv"):
         print(csv_file.name)
         for key, table in target_tables.items():
             if not csv_file.name.startswith(key):
@@ -48,5 +48,5 @@ def import_dir(base_dir=None):
             csv_file.unlink()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import_dir()

+ 10 - 8
database/ift_backup.py

@@ -4,19 +4,21 @@ from pathlib import Path
 
 def ift_backup(base_dir=None):
     if base_dir is None:
-        base_dir = 'E:\\IFT'
+        base_dir = "E:\\IFT"
 
-    source_path = Path(base_dir) / 'prod'
-    target_path = base_dir + '/archive'
+    source_path = Path(base_dir) / "prod"
+    target_path = base_dir + "/archive"
 
-    for source_file in source_path.glob('*'):
+    for source_file in source_path.glob("*"):
         # print(source_file)
         file_temp = source_file.name
-        if file_temp.count('_') == 2:
-            prefix, filetype, timestamp = file_temp.split('_')
+        if file_temp.count("_") == 2:
+            prefix, filetype, timestamp = file_temp.split("_")
             year = timestamp[:4]
             month = timestamp[4:6]
-            target = Path(f"{target_path}/{year}/{filetype}/{year}-{month}/{source_file.name}").absolute()
+            target = Path(
+                f"{target_path}/{year}/{filetype}/{year}-{month}/{source_file.name}"
+            ).absolute()
             os.makedirs(target.parent, exist_ok=True)
             if target.exists():
                 target.unlink()
@@ -24,5 +26,5 @@ def ift_backup(base_dir=None):
             source_file.rename(target)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     ift_backup()

+ 32 - 39
database/ift_convert.py

@@ -7,86 +7,79 @@ from itertools import chain
 class IFTConverter:
     def __init__(self, base_dir):
         self.config = {
-            '_A_': {
-                '1': self.import_config(base_dir + 'actuals_header.txt'),
-                '2': self.import_config(base_dir + 'actuals_item.txt')
+            "_A_": {
+                "1": self.import_config(base_dir + "actuals_header.txt"),
+                "2": self.import_config(base_dir + "actuals_item.txt"),
             },
-            '_B_': {
-                '010': self.import_config(base_dir + 'budget_line.txt')
+            "_B_": {"010": self.import_config(base_dir + "budget_line.txt")},
+            "_C_": {"010": self.import_config(base_dir + "commitment_line.txt")},
+            "_K_": {
+                "1": self.import_config(base_dir + "controllingdoc_header.txt"),
+                "2": self.import_config(base_dir + "controllingdoc_item.txt"),
             },
-            '_C_': {
-                '010': self.import_config(base_dir + 'commitment_line.txt')
-            },
-            '_K_': {
-                '1': self.import_config(base_dir + 'controllingdoc_header.txt'),
-                '2': self.import_config(base_dir + 'controllingdoc_item.txt')
-            },
-            '_P_': {
-                '010': self.import_config(base_dir + 'plan_line.txt')
-            }
+            "_P_": {"010": self.import_config(base_dir + "plan_line.txt")},
         }
-        self.is_number = re.compile(r'\d+\.\d+\-?$')
+        self.is_number = re.compile(r"\d+\.\d+\-?$")
 
     def import_config(self, filename):
-        with open(filename, 'r') as frh:
-
-            return [(int(line['start']) - 1, int(line['start']) + int(line['length']) - 1)
-                    for line in csv.DictReader(frh, delimiter='\t')]
+        with open(filename, "r") as frh:
+            return [
+                (int(line["start"]) - 1, int(line["start"]) + int(line["length"]) - 1)
+                for line in csv.DictReader(frh, delimiter="\t")
+            ]
 
     def convert_dir(self, path):
         source_path = Path(path)
-        target_path = source_path.parent.joinpath('staging')
+        target_path = source_path.parent.joinpath("staging")
 
-        for filename in source_path.glob('*'):
+        for filename in source_path.glob("*"):
             print(filename.name)
-            if filename.name.count('_') < 2:
-                print('-> wrong file format')
+            if filename.name.count("_") < 2:
+                print("-> wrong file format")
             elif filename.stat().st_size == 0:
-                print('-> file is empty!')
+                print("-> file is empty!")
             else:
                 self.convert_file(filename, target_path)
 
     def convert_file(self, source, target_path):
         cfg = self.conversion_config(source.name)
-        target = target_path.joinpath(source.name + '.csv')
+        target = target_path.joinpath(source.name + ".csv")
 
         content = {}
         last_key = list(cfg.keys())[-1]
 
-        with open(source, 'r', encoding='utf-8', errors='ignore') as frh:
-            with open(target, 'w', encoding='utf-8') as fwh:
+        with open(source, "r", encoding="utf-8", errors="ignore") as frh:
+            with open(target, "w", encoding="utf-8") as fwh:
                 for line in frh.readlines():
                     for key, rules in cfg.items():
                         if line.startswith(key):
                             content[key] = self.convert_line(line, rules)
                             break
                     if line.startswith(last_key):
-                        fwh.write('\t'.join(chain(*content.values())) + '\n')
+                        fwh.write("\t".join(chain(*content.values())) + "\n")
 
     def convert_line(self, line, rules):
         return [self.convert_field(line, rule) for rule in rules]
 
     def convert_field(self, line, rule):
-        field = line[rule[0]:rule[1]].strip()
+        field = line[rule[0] : rule[1]].strip()
         if self.is_number.search(field):
-            field = field.replace(',', '')
-            if field[-1] == '-':
-                field = '-' + field[:-1]
+            field = field.replace(",", "")
+            if field[-1] == "-":
+                field = "-" + field[:-1]
         return field
 
     def conversion_config(self, filename):
         for key, cfg in self.config.items():
             if key in filename:
                 return cfg
-        return {
-            '0': []
-        }
+        return {"0": []}
 
 
 def main():
-    ift_conv = IFTConverter('E:\\GlobalCube\\Tasks\\Import\\config\\IFT\\')
-    ift_conv.convert_dir('E:\\IFT\\prod')
+    ift_conv = IFTConverter("E:\\GlobalCube\\Tasks\\Import\\config\\IFT\\")
+    ift_conv.convert_dir("E:\\IFT\\prod")
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     main()

+ 18 - 18
database/ift_import.py

@@ -2,16 +2,16 @@ from pathlib import Path
 import subprocess
 
 cfg = {
-    'schema': 'temp',
-    'scripts_dir': 'E:\\GlobalCube\\Tasks\\scripts',
-    'query_dir': 'E:\\GlobalCube\\System\\NAVISION\\SQL\\exec',
+    "schema": "temp",
+    "scripts_dir": "E:\\GlobalCube\\Tasks\\scripts",
+    "query_dir": "E:\\GlobalCube\\System\\NAVISION\\SQL\\exec",
 }
 target_tables = {
-    'A': 'IFT_Actuals',
-    'B': 'IFT_Budget',
-    'C': 'IFT_Commitment',
-    'K': 'IFT_Controlling_Documents',
-    'P': 'IFT_Plan'
+    "A": "IFT_Actuals",
+    "B": "IFT_Budget",
+    "C": "IFT_Commitment",
+    "K": "IFT_Controlling_Documents",
+    "P": "IFT_Plan",
 }
 
 
@@ -20,27 +20,27 @@ def task(args):
 
 
 def ift_import(csv_file, target_table):
-    bulkcopy = str(Path(cfg['scripts_dir']) / 'bcp_import.bat')
-    sqlquery = str(Path(cfg['scripts_dir']) / 'sqlquery.bat')
-    sqlexec = str(Path(cfg['scripts_dir']) / 'sqlexec.bat')
-    query_file = target_table + '.sql'
+    bulkcopy = str(Path(cfg["scripts_dir"]) / "bcp_import.bat")
+    sqlquery = str(Path(cfg["scripts_dir"]) / "sqlquery.bat")
+    sqlexec = str(Path(cfg["scripts_dir"]) / "sqlexec.bat")
+    query_file = target_table + ".sql"
 
     task(f"{sqlquery} \"TRUNCATE TABLE [{cfg['schema']}].[{target_table}]\" ")
     task(f"{bulkcopy} [{cfg['schema']}].[{target_table}] \"{csv_file}\" ")
-    task(f"{sqlexec} {query_file} \"{csv_file}\" ")
+    task(f'{sqlexec} {query_file} "{csv_file}" ')
 
 
 def import_dir(base_dir=None):
     if base_dir is None:
-        base_dir = 'E:\\IFT\\staging'
+        base_dir = "E:\\IFT\\staging"
 
-    for csv_file in Path(base_dir).glob('*.csv'):
+    for csv_file in Path(base_dir).glob("*.csv"):
         print(csv_file.name)
-        if csv_file.name.count('_') == 2:
-            _, filetype, _ = csv_file.name.split('_')
+        if csv_file.name.count("_") == 2:
+            _, filetype, _ = csv_file.name.split("_")
             ift_import(str(csv_file), target_tables[filetype])
             csv_file.unlink()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     import_dir()

+ 75 - 18
database/sql_query.py

@@ -3,32 +3,89 @@ from sqlalchemy import create_engine
 from database import conn_string
 
 cfg = {
-    'clients': ['deop01', 'deop02', 'deop03', 'deop04', 'deop05', 'deop06', 'deop07', 'deop08', 'deop09', 'deop10',
-                'deop11', 'deop12', 'deop13', 'deop14', 'deop15', 'deop16', 'deop17', 'deop18', 'deop19', 'deop20',
-                'deop21', 'deop22', 'deop23', 'deop24', 'deop25', 'deop26', 'deop27', 'deop28', 'deop29', 'deho02', 'deni02'],
-    'date_filter': "'2019-01-01'",
-    'source_dsn': {'user': 'sa', 'pass': 'Mffu3011#', 'server': 'GC-SERVER1\\GLOBALCUBE', 'database': 'desk01', 'driver': 'mssql'},
-    'source_schema': 'dbo',
-    'source_query': 'Belege_Planung_Ist_FC.sql',
-    'source_header': ['Datenbank', 'Betrieb_Nr', 'Marke', 'Konto_Nr', 'Bookkeep_Period', 'Betrag', 'Menge'],
-    'target_csv': 'Belege_Planung_Ist_FC.csv'
+    "clients": [
+        "deop01",
+        "deop02",
+        "deop03",
+        "deop04",
+        "deop05",
+        "deop06",
+        "deop07",
+        "deop08",
+        "deop09",
+        "deop10",
+        "deop11",
+        "deop12",
+        "deop13",
+        "deop14",
+        "deop15",
+        "deop16",
+        "deop17",
+        "deop18",
+        "deop19",
+        "deop20",
+        "deop21",
+        "deop22",
+        "deop23",
+        "deop24",
+        "deop25",
+        "deop26",
+        "deop27",
+        "deop28",
+        "deop29",
+        "deho02",
+        "deni02",
+    ],
+    "date_filter": "'2019-01-01'",
+    "source_dsn": {
+        "user": "sa",
+        "pass": "Mffu3011#",
+        "server": "GC-SERVER1\\GLOBALCUBE",
+        "database": "desk01",
+        "driver": "mssql",
+    },
+    "source_schema": "dbo",
+    "source_query": "Belege_Planung_Ist_FC.sql",
+    "source_header": [
+        "Datenbank",
+        "Betrieb_Nr",
+        "Marke",
+        "Konto_Nr",
+        "Bookkeep_Period",
+        "Betrag",
+        "Menge",
+    ],
+    "target_csv": "Belege_Planung_Ist_FC.csv",
 }
 
 
 def db_import(select_query, source_db, current_table, target_db, target_schema):
-    pd.read_sql(select_query, source_db).to_sql(current_table['target'], target_db, schema=target_schema, index=False, if_exists='append')
+    pd.read_sql(select_query, source_db).to_sql(
+        current_table["target"],
+        target_db,
+        schema=target_schema,
+        index=False,
+        if_exists="append",
+    )
 
 
-source_db = create_engine(conn_string(cfg['source_dsn']))
-with open(cfg['source_query'], 'r') as f:
+source_db = create_engine(conn_string(cfg["source_dsn"]))
+with open(cfg["source_query"], "r") as f:
     select_query = f.read()
 
 
-with open(cfg['target_csv'], 'w') as f:
-    f.write(';'.join(cfg['source_header']) + '\n')
+with open(cfg["target_csv"], "w") as f:
+    f.write(";".join(cfg["source_header"]) + "\n")
 
-for current_client in cfg['clients']:
-    current_query = select_query.replace('desk01', current_client)
-    pd.read_sql(current_query, source_db).to_csv(cfg['target_csv'], sep=';', decimal=',', encoding='latin-1',
-                                                 index=False, header=False, mode='a')
+for current_client in cfg["clients"]:
+    current_query = select_query.replace("desk01", current_client)
+    pd.read_sql(current_query, source_db).to_csv(
+        cfg["target_csv"],
+        sep=";",
+        decimal=",",
+        encoding="latin-1",
+        index=False,
+        header=False,
+        mode="a",
+    )
     print(current_client)