Эх сурвалжийг харах

csv bereinigen - erste Ansätze

gc-server3 1 жил өмнө
parent
commit
78320d9fdd
2 өөрчлөгдсөн 45 нэмэгдсэн , 20 устгасан
  1. 26 6
      database/db_create.py
  2. 19 14
      misc/csv_cleanup.py

+ 26 - 6
database/db_create.py

@@ -47,20 +47,36 @@ cfg = DbCreateConfig(
 class database_inspect:
     tables = []
 
-    def __init__(self, dsn):
+    def __init__(self, dsn, source=False):
         self.dsn = DsnConfig(**dsn)
+        self.type = "SOURCE" if source else "DEST"
         self.cursor = self.connect()
 
     def conn_string(self):
         if self.dsn.driver == "mssql":
-            return (
-                "Driver={SQL Server Native Client 11.0};"
-                + f"Server={self.dsn.server};Database={self.dsn.database};Uid={self.dsn.user};Pwd={self.dsn.password}"
+            return ";".join(
+                [
+                    "Driver={SQL Server Native Client 11.0}",
+                    f"Server={self.dsn.server}",
+                    f"Database={self.dsn.database}",
+                    f"Uid={self.dsn.user}",
+                    f"Pwd={self.dsn.password}",
+                ]
             )
         if self.dsn.driver == "mysql":
             return f"mysql+pymysql://{self.dsn.user}:{self.dsn.password}@{self.dsn.server}/{self.dsn.database}?charset=utf8mb4"
         return f"DSN={self.dsn.server};UID={self.dsn.user};PWD={self.dsn.password}"
 
+    def conn_ini(self):
+        return "\r\n".join(
+            [
+                f'{self.type}_SERVER="{self.dsn.server}"',
+                f'{self.type}_USER="{self.dsn.user}"',
+                f'{self.type}_PASSWORD="{self.dsn.password}"',
+                f'{self.type}_DATABASE="{self.dsn.database}"',
+            ]
+        )
+
     def bcp_conn_params(self):
         return f"-S {self.dsn.server} -d {self.dsn.database} -U {self.dsn.user} -P {self.dsn.password}"
 
@@ -131,17 +147,21 @@ def create(config_file="dbtools/OPTIMA.json"):  #
     base_dir = str(Path(cfg.batch_dir).parent)
 
     df = pd.read_csv(f"{base_dir}/{cfg.csv_file}", sep=";", encoding="latin-1")
+    if "cols" not in df.columns:
+        df["target_db"] = ""
+        df["cols"] = ""
+        df.to_csv(f"{base_dir}/{cfg.csv_file}", sep=";", encoding="latin-1")
     config = df[df["target"].notnull()]
     # print(config.head())
 
-    source_db = database_inspect(cfg.source_dsn)
+    source_db = database_inspect(cfg.source_dsn, source=True)
     source_tables = source_db.get_tables()
     print(source_db.get_prefix())
 
     target_db = database_inspect(cfg.target_dsn)
     target_tables = target_db.get_tables()
 
-    for index, current_table in config.iterrows():
+    for _, current_table in config.iterrows():
         with open(
             f"{cfg.batch_dir}/{current_table['target']}.bat", "w", encoding="cp850"
         ) as f:

+ 19 - 14
misc/csv_cleanup.py

@@ -8,23 +8,28 @@ MIN_AGE = datetime.now().timestamp() - 12 * 60 * 60
 
 
 def csv_cleanup(dirname: str = "misc/data"):
-    for csv_file in Path(dirname).glob("*.csv"):
-        temp_file = Path(str(csv_file) + ".tmp")
-        if csv_file.stat().st_mtime < MIN_AGE:
-            continue
-        print(csv_file.name)
-
-        with (
-            open(csv_file, "r", encoding="latin-1") as frh,
-            open(temp_file, "w", encoding="latin-1") as fwh,
-        ):
-            buffer = " "
-            while buffer != "":
+    if Path(dirname).is_file():
+        csv_cleanup_file(Path(dirname))
+    else:
+        for csv_file in Path(dirname).glob("*.csv"):
+            csv_cleanup_file(csv_file)
+
+
+def csv_cleanup_file(csv_file: Path):
+    temp_file = Path(str(csv_file) + ".tmp")
+    if csv_file.stat().st_mtime < MIN_AGE:
+        return
+    print(csv_file.name)
+    with open(csv_file, "r", encoding="latin-1") as frh:
+        with open(temp_file, "w", encoding="latin-1") as fwh:
+            while True:
                 buffer = frh.read(10_000)
                 fwh.write(re.sub(r'(?<!")\r?\n', "", buffer))
+                if buffer == "":
+                    break
 
-        csv_file.unlink()
-        temp_file.rename(csv_file)
+    csv_file.unlink()
+    temp_file.rename(csv_file)
 
 
 if __name__ == "__main__":