Selaa lähdekoodia

DB-Tools und PDF

- Parallele Ausführung des Imports
- Plac ausgelagert
- Zentraler Einstiegspunkt
Global Cube 2 vuotta sitten
vanhempi
commit
be77fe457c
5 muutettua tiedostoa jossa 98 lisäystä ja 30 poistoa
  1. 24 0
      dbtools/db.py
  2. 3 3
      dbtools/db_create.py
  3. 18 0
      dbtools/db_run.py
  4. 2 4
      gctools/apache_ldap.py
  5. 51 23
      gctools/pdf_test.py

+ 24 - 0
dbtools/db.py

@@ -0,0 +1,24 @@
+import bcp_log
+import db_create
+import db_run
+import plac
+
+
+class DBTools:
+    commands = ['create', 'logs', 'run']
+
+    @plac.pos('config_file', '', type=str)
+    def create(self, config_file='dbtools/OPTIMA.json'):
+        db_create.create(config_file)
+
+    @plac.pos('system', '', type=str)
+    def logs(self, system):
+        bcp_log.check_directory(system)
+
+    @plac.pos('system', '', type=str)
+    def run(self, system):
+        db_run.run(system)
+
+
+if __name__ == '__main__':
+    plac.Interpreter.call(DBTools)

+ 3 - 3
dbtools/db_create.py

@@ -4,8 +4,9 @@ from pathlib import Path
 from re import escape
 from numpy import select
 import pandas as pd
-import plac
 import pyodbc
+from dataclasses import dataclass
+
 
 DbCreateConfig = namedtuple('DbCreateConfig', 'name csv_file clients filter source_dsn target_dsn stage_dir batch_dir')
 DsnConfig = namedtuple('DsnConfig', 'user password server database driver schema')
@@ -77,7 +78,6 @@ class database_inspect():
         return table
 
 
-@plac.pos('config_file', '', type=str)
 def create(config_file='dbtools/OPTIMA.json'):
     cfg_import = json.load(open(config_file, 'r', encoding='latin-1'))
     base_dir = Path(config_file).resolve().parent
@@ -191,4 +191,4 @@ def create(config_file='dbtools/OPTIMA.json'):
 
 
 if __name__ == '__main__':
-    plac.call(create)
+    create()

+ 18 - 0
dbtools/db_run.py

@@ -0,0 +1,18 @@
+from concurrent.futures import ThreadPoolExecutor
+from pathlib import Path
+import subprocess
+
+
+def task(name):
+    return subprocess.Popen(f'C:\\Windows\\System32\\cmd.exe /C "{name}"', stdout=subprocess.DEVNULL).wait()
+
+
+def run(base_dir):
+    files = [str(f) for f in Path(base_dir).glob('*.bat') if not f.name.startswith('_')]
+
+    with ThreadPoolExecutor(max_workers=5) as executor:
+        executor.map(task, files)
+
+
+if __name__ == '__main__':
+    run('C:\\GlobalCube\\System\\OPTIMA\\SQL\\batch')

+ 2 - 4
gctools/apache_ldap.py

@@ -24,14 +24,12 @@ def connect_ldap3():
         print(conn.result)
 
 
-
 if __name__ == '__main__':
     # connect_pyldap()
-   connect_ldap3()
-
+    connect_ldap3()
 
 # from ldap3 import Server, Connection, AUTH_SIMPLE, STRATEGY_SYNC, ALL
 # s = Server(HOST, port=389, get_info=ALL)
 # c = Connection(s, authentication=AUTH_SIMPLE, user=user_dn, password=PASSWORD, check_names=True, lazy=False, client_strategy=STRATEGY_SYNC, raise_exceptions=True)
 # c.open()
-# c.bind()
+# c.bind()

+ 51 - 23
gctools/pdf_test.py

@@ -4,26 +4,54 @@ import json
 from pathlib import Path
 from datetime import datetime, timedelta
 
-today = datetime.now()
-yesterday = today - timedelta(days=1)
-current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
-errors = {
-    'empty': [],
-    'outdated': []
-}
-
-files = [f for f in Path('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu').glob('*.pdf')]
-files_count = len(files)
-
-for i, f in enumerate(files):
-    print(f'({i}/{files_count}) {f.name}                 ', end='\r')
-    with pdfplumber.open(str(f)) as pdf:
-        text = pdf.pages[0].extract_text()
-        if re.search(r'\d+ von \d+$', text):
-            errors['empty'].append(f.name)
-        report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
-        if report_date is not None and report_date.group() not in current_date:
-            errors['outdated'].append([f.name, report_date.group()])
-
-print()
-print(json.dumps(errors, indent=2))
+
+def current_date_test(base_dir: str):
+    today = datetime.now()
+    yesterday = today - timedelta(days=1)
+    current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
+    errors = {
+        'empty': [],
+        'outdated': []
+    }
+
+    files = [f for f in Path(base_dir).glob('*.pdf')]
+    files_count = len(files)
+
+    for i, f in enumerate(files):
+        print(f'({i}/{files_count}) {f.name}                 ', end='\r')
+        with pdfplumber.open(str(f)) as pdf:
+            text = pdf.pages[0].extract_text()
+            if re.search(r'\d+ von \d+$', text):
+                errors['empty'].append(f.name)
+            report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
+            if report_date is not None and report_date.group() not in current_date:
+                errors['outdated'].append([f.name, report_date.group()])
+
+    print()
+    print(json.dumps(errors, indent=2))
+
+
+def missing_data(base_dir: str) -> list[str]:
+    errors = []
+    for f in Path(base_dir).glob('*.pdf'):
+        with pdfplumber.open(str(f)) as pdf:
+            pages = len(pdf.pages)
+            text = pdf.pages[0].extract_text()
+
+        if pages < 2 and (re.search(r'Daten nicht verf', text) or (len(text) < 650 and text.count('\n') < 8)):
+            errors.append(f.name)
+            f.rename(base_dir + '/empty/' + f.name)
+            continue
+
+        with open(base_dir + '/plain/' + f.name + '.txt', 'wb') as fwh:
+            fwh.write(text.encode('utf8'))
+    return errors
+
+
+def main():
+    # current_date_test('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu')
+    missing_data('C:\\GlobalCube\\ReportOutput')
+
+
+if __name__ == '__main__':
+    main()