3 vuotta sitten · be77fe457c
--- a/dbtools/db.py
+++ b/dbtools/db.py
@@ -0,0 +1,24 @@
 
				+import bcp_log
			
 
				+import db_create
			
 
				+import db_run
			
 
				+import plac
			
 
				+
			
 
				+
			
 
				+class DBTools:
			
 
				+    commands = ['create', 'logs', 'run']
			
 
				+
			
 
				+    @plac.pos('config_file', '', type=str)
			
 
				+    def create(self, config_file='dbtools/OPTIMA.json'):
			
 
				+        db_create.create(config_file)
			
 
				+
			
 
				+    @plac.pos('system', '', type=str)
			
 
				+    def logs(self, system):
			
 
				+        bcp_log.check_directory(system)
			
 
				+
			
 
				+    @plac.pos('system', '', type=str)
			
 
				+    def run(self, system):
			
 
				+        db_run.run(system)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    plac.Interpreter.call(DBTools)
			
--- a/dbtools/db_create.py
+++ b/dbtools/db_create.py
@@ -4,8 +4,9 @@ from pathlib import Path
 
				 from re import escape

			
 
				 from numpy import select

			
 
				 import pandas as pd

			
 
				-import plac

			
 
				 import pyodbc

			
 
				+from dataclasses import dataclass

			
 
				+

			
 
				 

			
 
				 DbCreateConfig = namedtuple('DbCreateConfig', 'name csv_file clients filter source_dsn target_dsn stage_dir batch_dir')

			
 
				 DsnConfig = namedtuple('DsnConfig', 'user password server database driver schema')

			
@@ -77,7 +78,6 @@ class database_inspect():
 
				         return table

			
 
				 

			
 
				 

			
 
				-@plac.pos('config_file', '', type=str)

			
 
				 def create(config_file='dbtools/OPTIMA.json'):

			
 
				     cfg_import = json.load(open(config_file, 'r', encoding='latin-1'))

			
 
				     base_dir = Path(config_file).resolve().parent

			
@@ -191,4 +191,4 @@ def create(config_file='dbtools/OPTIMA.json'):
 
				 

			
 
				 

			
 
				 if __name__ == '__main__':

			
 
				-    plac.call(create)

			
 
				+    create()

			
--- a/dbtools/db_run.py
+++ b/dbtools/db_run.py
@@ -0,0 +1,18 @@
 
				+from concurrent.futures import ThreadPoolExecutor
			
 
				+from pathlib import Path
			
 
				+import subprocess
			
 
				+
			
 
				+
			
 
				+def task(name):
			
 
				+    return subprocess.Popen(f'C:\\Windows\\System32\\cmd.exe /C "{name}"', stdout=subprocess.DEVNULL).wait()
			
 
				+
			
 
				+
			
 
				+def run(base_dir):
			
 
				+    files = [str(f) for f in Path(base_dir).glob('*.bat') if not f.name.startswith('_')]
			
 
				+
			
 
				+    with ThreadPoolExecutor(max_workers=5) as executor:
			
 
				+        executor.map(task, files)
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    run('C:\\GlobalCube\\System\\OPTIMA\\SQL\\batch')
			
--- a/gctools/apache_ldap.py
+++ b/gctools/apache_ldap.py
@@ -24,14 +24,12 @@ def connect_ldap3():
 
				         print(conn.result)
			
 
				 
			
 
				 
			
 
				-
			
 
				 if __name__ == '__main__':
			
 
				     # connect_pyldap()
			
 
				-   connect_ldap3()
			
 
				-
			
 
				+    connect_ldap3()
			
 
				 
			
 
				 # from ldap3 import Server, Connection, AUTH_SIMPLE, STRATEGY_SYNC, ALL
			
 
				 # s = Server(HOST, port=389, get_info=ALL)
			
 
				 # c = Connection(s, authentication=AUTH_SIMPLE, user=user_dn, password=PASSWORD, check_names=True, lazy=False, client_strategy=STRATEGY_SYNC, raise_exceptions=True)
			
 
				 # c.open()
			
 
				-# c.bind()
			
 
				+# c.bind()
			
--- a/gctools/pdf_test.py
+++ b/gctools/pdf_test.py
@@ -4,26 +4,54 @@ import json
 
				 from pathlib import Path
			
 
				 from datetime import datetime, timedelta
			
 
				 
			
 
				-today = datetime.now()
			
 
				-yesterday = today - timedelta(days=1)
			
 
				-current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
			
 
				-errors = {
			
 
				-    'empty': [],
			
 
				-    'outdated': []
			
 
				-}
			
 
				-
			
 
				-files = [f for f in Path('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu').glob('*.pdf')]
			
 
				-files_count = len(files)
			
 
				-
			
 
				-for i, f in enumerate(files):
			
 
				-    print(f'({i}/{files_count}) {f.name}                 ', end='\r')
			
 
				-    with pdfplumber.open(str(f)) as pdf:
			
 
				-        text = pdf.pages[0].extract_text()
			
 
				-        if re.search(r'\d+ von \d+$', text):
			
 
				-            errors['empty'].append(f.name)
			
 
				-        report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
			
 
				-        if report_date is not None and report_date.group() not in current_date:
			
 
				-            errors['outdated'].append([f.name, report_date.group()])
			
 
				-
			
 
				-print()
			
 
				-print(json.dumps(errors, indent=2))
			
 
				+
			
 
				+def current_date_test(base_dir: str):
			
 
				+    today = datetime.now()
			
 
				+    yesterday = today - timedelta(days=1)
			
 
				+    current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
			
 
				+    errors = {
			
 
				+        'empty': [],
			
 
				+        'outdated': []
			
 
				+    }
			
 
				+
			
 
				+    files = [f for f in Path(base_dir).glob('*.pdf')]
			
 
				+    files_count = len(files)
			
 
				+
			
 
				+    for i, f in enumerate(files):
			
 
				+        print(f'({i}/{files_count}) {f.name}                 ', end='\r')
			
 
				+        with pdfplumber.open(str(f)) as pdf:
			
 
				+            text = pdf.pages[0].extract_text()
			
 
				+            if re.search(r'\d+ von \d+$', text):
			
 
				+                errors['empty'].append(f.name)
			
 
				+            report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
			
 
				+            if report_date is not None and report_date.group() not in current_date:
			
 
				+                errors['outdated'].append([f.name, report_date.group()])
			
 
				+
			
 
				+    print()
			
 
				+    print(json.dumps(errors, indent=2))
			
 
				+
			
 
				+
			
 
				+def missing_data(base_dir: str) -> list[str]:
			
 
				+    errors = []
			
 
				+    for f in Path(base_dir).glob('*.pdf'):
			
 
				+        with pdfplumber.open(str(f)) as pdf:
			
 
				+            pages = len(pdf.pages)
			
 
				+            text = pdf.pages[0].extract_text()
			
 
				+
			
 
				+        if pages < 2 and (re.search(r'Daten nicht verf', text) or (len(text) < 650 and text.count('\n') < 8)):
			
 
				+            errors.append(f.name)
			
 
				+            f.rename(base_dir + '/empty/' + f.name)
			
 
				+            continue
			
 
				+
			
 
				+        with open(base_dir + '/plain/' + f.name + '.txt', 'wb') as fwh:
			
 
				+            fwh.write(text.encode('utf8'))
			
 
				+    return errors
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    # current_date_test('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu')
			
 
				+    missing_data('C:\\GlobalCube\\ReportOutput')
			
 
				+
			
 
				+
			
 
				+if __name__ == '__main__':
			
 
				+    main()