소스 검색

Skripterstellung csv-Dateien aus Export-Ordner in SQL-Server laden

gc-server3 2 달 전
부모
커밋
90110bbddd
4개의 변경된 파일227개의 추가작업 그리고 36개의 파일을 삭제
  1. 2 1
      cognos7/__init__.py
  2. 70 0
      cognos7/csv_column_types.py
  3. 13 28
      cognos7/mdl_convert.py
  4. 142 7
      cognos7/schema_ini_convert.py

+ 2 - 1
cognos7/__init__.py

@@ -1,2 +1,3 @@
-from cognos7.mdl_convert import convert_file, convert_folder
+from cognos7.csv_column_types import get_column_types
 from cognos7.iqd_convert import IqdConverter
+from cognos7.mdl_convert import convert_file, convert_folder

+ 70 - 0
cognos7/csv_column_types.py

@@ -0,0 +1,70 @@
+import re
+
+import pandas as pd
+
+data_types = {
+    "int64": "varchar(20)",
+    "float64": "decimal(18,8)",
+}
+
+int_values = ["Menge", "Anzahl"]
+decimal_values = ["Betrag"]
+
+
+def get_column_types(csv_file, header=None):
+    skip = 1 if header else 0
+    df = pd.read_csv(
+        csv_file,
+        sep=";",
+        decimal=",",
+        encoding="latin-1",
+        nrows=2000,
+        names=header,
+        skiprows=skip,
+    )
+
+    col_types = dict([(c, get_type(df[c])) for c in df.columns])
+    return col_types
+
+
+def is_datetime(value: str):
+    return re.match(r"\d{4}-\d{2}-\d{2} \d{2}:\d{2}", value)
+
+
+def is_date(value: str):
+    return re.match(r"\d{4}-\d{2}-\d{2}", value)
+
+
+def get_type(df_col: pd.Series) -> str:
+    if str(df_col.dtype) == "float64":
+        if all(df_col.isna()):
+            return "varchar(255)"
+        return "decimal(18,8)"
+
+    if str(df_col.dtype) == "int64":
+        for entry in decimal_values:
+            if entry in df_col.name:
+                return "decimal(18,8)"
+        for entry in int_values:
+            if entry in df_col.name:
+                return "int"
+        return "varchar(20)"
+
+    if all([is_datetime(str(value)) for value in df_col]):
+        return "datetime"
+
+    if all([is_date(str(value)) for value in df_col]):
+        return "date"
+
+    max_len = max([len(str(value)) for value in df_col])
+    if max_len < 15:
+        return "varchar(20)"
+    if max_len < 35:
+        return "varchar(50)"
+    if max_len < 85:
+        return "varchar(100)"
+    return "varchar(255)"
+
+
+if __name__ == "__main__":
+    get_column_types("C:\\GlobalCube_Entwicklung\\Export\\belege_eds_stk_ohne_service_aw.csv")

+ 13 - 28
cognos7/mdl_convert.py

@@ -1,7 +1,6 @@
 import json
-from pathlib import Path
 import re
-
+from pathlib import Path
 
 CONVERSION = [
     "Name",
@@ -55,16 +54,12 @@ def convert_block(block):
         elif key in ["DimensionView"]:
             if key + "s" not in result:
                 result[key + "s"] = []
-            result[key + "s"].append(
-                {"ID": words[i + offset + 1], "Name": words[i + offset + 2].strip('"')}
-            )
+            result[key + "s"].append({"ID": words[i + offset + 1], "Name": words[i + offset + 2].strip('"')})
             offset += 1
         elif key in ["MeasureInclude"]:
             if key + "s" not in result:
                 result[key + "s"] = []
-            result[key + "s"].append(
-                {"ID": words[i + offset + 1], "Include": words[i + offset + 2]}
-            )
+            result[key + "s"].append({"ID": words[i + offset + 1], "Include": words[i + offset + 2]})
             offset += 1
         elif key == "Calc":
             for j in range(i + offset + 1, len(words)):
@@ -79,9 +74,7 @@ def convert_block(block):
         elif key == "AllocationAdd":
             if key + "s" not in result:
                 result[key + "s"] = []
-            result[key + "s"].append(
-                {"Measure": words[i + offset + 2], "Type": words[i + offset + 4]}
-            )
+            result[key + "s"].append({"Measure": words[i + offset + 2], "Type": words[i + offset + 4]})
             offset += 3
         elif key in [
             "CustomViewList",
@@ -128,19 +121,13 @@ def remove_ids(nested):
     nested.pop("Primary", "")
     nested.pop("Lastuse", "")
     nested.pop("AssociationContext", "")
-    if (
-        nested.get("Type", "") == "SpecialCategory"
-        and "Label" in nested
-        and "20" in nested["Label"]
-    ):
+    if nested.get("Type", "") == "SpecialCategory" and "Label" in nested and "20" in nested["Label"]:
         nested.pop("Label", "")
 
     for col in ["Parent", "Levels", "CustomViewList"]:
         if col not in nested:
             continue
-        if col == "Levels" and (
-            isinstance(nested["Levels"], list) or nested["Levels"] == "0"
-        ):
+        if col == "Levels" and (isinstance(nested["Levels"], list) or nested["Levels"] == "0"):
             continue
         nested[col] = id_lookup.get(nested[col], {}).get("Name", "undefined")
 
@@ -188,10 +175,7 @@ def group_mdl_blocks(converted):
     for c, t in zip(converted, types):
         if t in [""]:
             continue
-        if (
-            t in ["Category", "SpecialCategory"]
-            and result["Dimensions"][-1]["Name"] == "Zeit"
-        ):
+        if t in ["Category", "SpecialCategory"] and result["Dimensions"][-1]["Name"] == "Zeit":
             if t == "Category" or c["Name"][0].isnumeric():
                 continue
 
@@ -250,11 +234,12 @@ def group_mdl_blocks(converted):
 
 def build_query(datasource):
     table = datasource["Name"]
-    suffix = "_fm" if datasource["SourceType"] == "CognosSourceQuery" else "_imr"
-    table_name = f"[staging].[{table}{suffix}]"
+    # suffix = "_fm" if datasource["SourceType"] == "CognosSourceQuery" else "_imr"
+    # table_name = f"[staging].[{table}{suffix}]"
+    table_name = f"[export_csv].[{table}]"
     view_name = f"[load].[{table}]"
-    columns = ",\n    ".join([extract_column(c) for c in datasource["Columns"]])
-    return f"CREATE VIEW {view_name}\nAS\nSELECT {columns} \nFROM {table_name}\nGO\n\n"
+    columns = ",\n\t".join([extract_column(c) for c in datasource["Columns"]])
+    return f"CREATE\n\tOR\n\nALTER VIEW {view_name}\nAS\nSELECT {columns} \nFROM {table_name}\nGO\n\n"
 
 
 def extract_column(col):
@@ -314,4 +299,4 @@ def convert_folder(base_dir):
 if __name__ == "__main__":
     # convert_file("data/S_Offene_Auftraege.mdl")
     # convert_file("data/F_Belege_SKR_SKR_Boettche.mdl")
-    convert_folder("data/mdl/")
+    convert_folder("cognos7/data/mdl/")

+ 142 - 7
cognos7/schema_ini_convert.py

@@ -1,7 +1,23 @@
+import json
+import os
+from itertools import count
 from pathlib import Path
 
+from csv_column_types import get_column_types
 
-def schema_convert(base_dir: str):
+col_type_convert = {
+    "varchar(20)": "to_varchar20",
+    "varchar(50)": "to_varchar50",
+    "varchar(100)": "to_varchar100",
+    "varchar(255)": "to_varchar255",
+    "decimal(18,8)": "to_decimal",
+    "int": "to_int",
+    "datetime": "to_datetime",
+    "date": "to_datetime",
+}
+
+
+def schema_convert(base_dir: str) -> None:
     base_dir = Path(base_dir).absolute()
     schema_file = base_dir / "schema.ini"
     with open(schema_file, "r", encoding="latin-1") as frh:
@@ -19,10 +35,10 @@ def schema_convert(base_dir: str):
         col_names = [c.split(" ")[0] for c in details["columns"].values()]
         tables_with_columns[table] = col_names
 
-    create_format_xml_files(base_dir, tables_with_columns)
+    create_format_xml_files(str(base_dir), tables_with_columns)
 
 
-def convert_table_info(table_info):
+def convert_table_info(table_info: str) -> tuple[str, dict]:
     info = table_info.split("]\n")
     if len(info) < 2:
         return ("", "")
@@ -34,9 +50,12 @@ def convert_table_info(table_info):
     return (info[0], details)
 
 
-def create_format_xml_files(base_dir, tables_with_columns: dict[str, list[str]]):
+def create_format_xml_files(base_dir: str, tables_with_columns: dict[str, list[str]]) -> None:
     for table, columns in tables_with_columns.items():
-        format_file = base_dir / (table + ".xml")
+        table_file = table
+        if table[:-4] != ".csv":
+            table_file = f"{table}.csv"
+        format_file = f"{base_dir}\\{table_file}.xml"
 
         record = []
         row = []
@@ -45,7 +64,8 @@ def create_format_xml_files(base_dir, tables_with_columns: dict[str, list[str]])
             record.append(
                 f'    <FIELD ID="{i}" xsi:type="CharTerm" TERMINATOR=";" MAX_LENGTH="255" COLLATION="SQL_Latin1_General_CP1_CI_AS"/>'
             )
-            row.append(f'    <COLUMN SOURCE="{i}" NAME="{col_name}" xsi:type="SQLVARYCHAR"/>')
+            col_name_escape = col_name.encode("ascii", "xmlcharrefreplace").decode()
+            row.append(f'    <COLUMN SOURCE="{i}" NAME="{col_name_escape}" xsi:type="SQLVARYCHAR"/>')
         record[-1] = record[-1].replace(";", "\\r\\n")
 
         with open(format_file, "w") as fwh:
@@ -59,5 +79,120 @@ def create_format_xml_files(base_dir, tables_with_columns: dict[str, list[str]])
             fwh.write("\n  </ROW>\n</BCPFORMAT>")
 
 
+def create_format_xml_from_folder(base_dir: str) -> None:
+    format_folder = f"{base_dir}\\Format"
+    os.makedirs(format_folder, exist_ok=True)
+    sql_folder = f"{base_dir}\\SQL"
+    os.makedirs(f"{sql_folder}\\views_export", exist_ok=True)
+    os.makedirs(f"{sql_folder}\\views_load", exist_ok=True)
+    os.makedirs(f"{sql_folder}\\exec_drop_create", exist_ok=True)
+    os.makedirs(f"{sql_folder}\\exec_update", exist_ok=True)
+    tables_with_columns = get_tables_with_columns_from_folder(base_dir)
+    with open(f"{format_folder}\\tables.json", "w") as fwh:
+        json.dump(tables_with_columns, fwh, indent=2)
+    create_format_xml_files(format_folder, tables_with_columns)
+    create_openrowset_sql_files(base_dir, format_folder, sql_folder, tables_with_columns)
+    create_load_sql_files(sql_folder, tables_with_columns)
+    create_drop_create_sql_files(sql_folder, tables_with_columns)
+    create_update_sql_files(sql_folder, tables_with_columns)
+
+
+def create_openrowset_sql_files(
+    base_dir: str, format_folder: str, sql_folder: str, tables_with_columns: dict[str, list[str]]
+) -> None:
+    for table, columns in tables_with_columns.items():
+        csv_file = f"{base_dir}\\{table}.csv"
+        format_file = f"{format_folder}\\{table}.csv.xml"
+        sql_file = f"{sql_folder}\\views_export\\export_csv.{table}.sql"
+
+        col_types = get_column_types(csv_file, columns)
+        select_fields = [get_select_statement(col, col_type) for col, col_type in col_types.items()]
+        query = (
+            "SELECT "
+            + ",\n\t".join(select_fields)
+            + "\nFROM OPENROWSET(\n"
+            + f"\tBULK '{csv_file}',\n"
+            + f"\tFORMATFILE = '{format_file}',\n"
+            + "\tFIRSTROW = 2\n"
+            + ") AS T1"
+        )
+
+        create_view = get_create_view("export_csv", table, query)
+        with open(sql_file, "w", encoding="latin-1") as fwh:
+            fwh.write(create_view)
+
+
+def get_create_view(schema: str, table: str, query: str) -> str:
+    create_view = (
+        "SET QUOTED_IDENTIFIER ON\nGO\n\nSET ANSI_NULLS ON\nGO\n\n"
+        + f"CREATE\n\tOR\n\nALTER VIEW [{schema}].[{table}]\nAS\n{query}\n"
+        + "GO\n\nSET QUOTED_IDENTIFIER OFF\nGO\n\nSET ANSI_NULLS OFF\nGO\n\n\nGO\n\n\n"
+    )
+    return create_view
+
+
+def get_select_statement(col: str, col_type: str) -> str:
+    convert = col_type_convert.get(col_type)
+    if convert:
+        return f"dbo.{convert}(T1.[{col}]) AS [{col}]"
+    return f"T1.[{col}]"
+
+
+def create_load_sql_files(sql_folder: str, tables_with_columns: dict[str, list[str]]):
+    for table, columns in tables_with_columns.items():
+        sql_file = f"{sql_folder}\\views_load\\load.{table}.sql"
+        cols = [f"[{c}]" for c in columns]
+        query = "SELECT " + ",\n\t".join(cols) + f"\nFROM [export_csv].[{table}]"
+        create_view = get_create_view("load", table, query)
+        with open(sql_file, "w", encoding="latin-1") as fwh:
+            fwh.write(create_view)
+
+
+def create_drop_create_sql_files(sql_folder: str, tables_with_columns: dict[str, list[str]], system: str = "OPTIMA"):
+    for table in tables_with_columns.keys():
+        sql_file = f"{sql_folder}\\exec_drop_create\\{table}.sql"
+        query = (
+            f"USE [GC]\nGO\n\nDROP TABLE IF EXISTS [{system.lower()}].[{table}]\nGO\n\n"
+            + f"SELECT *\nINTO [{system.lower()}].[{table}]\nFROM [{system}].[load].[{table}]"
+        )
+        with open(sql_file, "w", encoding="latin-1") as fwh:
+            fwh.write(query)
+
+
+def create_update_sql_files(sql_folder: str, tables_with_columns: dict[str, list[str]], system: str = "OPTIMA"):
+    for table in tables_with_columns.keys():
+        sql_file = f"{sql_folder}\\exec_update\\{table}.sql"
+        query = (
+            f"USE [GC]\nGO\n\nTRUNCATE TABLE [{system.lower()}].[{table}]\nGO\n\n"
+            + f"INSERT INTO [{system.lower()}].[{table}]\n"
+            + f"SELECT *\nFROM [{system}].[load].[{table}]"
+        )
+        with open(sql_file, "w", encoding="latin-1") as fwh:
+            fwh.write(query)
+
+
+def get_tables_with_columns_from_folder(base_dir: str) -> dict[str, list[str]]:
+    tables_with_columns = {}
+
+    for csv_file in Path(base_dir).glob("*.csv"):
+        table_name = csv_file.stem
+        with open(csv_file, "r", encoding="latin-1") as frh:
+            cols = frh.readline().strip("\n").split(";")
+        cols_unique = []
+        for c in cols:
+            c1 = c.strip('"')
+            if c1 not in cols_unique:
+                cols_unique.append(c1)
+                continue
+            for i in count(1):
+                c2 = f"{c1}_{i}"
+                if c2 not in cols and c2 not in cols_unique:
+                    cols_unique.append(c2)
+                    break
+        tables_with_columns[table_name] = cols_unique
+    return tables_with_columns
+
+
 if __name__ == "__main__":
-    schema_convert("C:\\GlobalCube_LOCOSOFT\\GCStruct_SKR51\\Kontenrahmen")
+    # schema_convert("C:\\GlobalCube_LOCOSOFT\\GCStruct_SKR51\\Kontenrahmen")
+    create_format_xml_from_folder("C:\\GlobalCube_LOCOSOFT\\System\\LOCOSOFT\\Export")