Browse Source

PDF-Tools von Schneider-Gruppe

Global Cube 2 years ago
parent
commit
de1feaa3b0
2 changed files with 26 additions and 18 deletions
  1. 21 11
      pdf/pdf_merge.py
  2. 5 7
      pdf/pdf_test.py

+ 21 - 11
pdf/pdf_merge.py

@@ -1,20 +1,30 @@
 from PyPDF2 import PdfMerger
 from pathlib import Path
+import re
+import json
 
 
-def main(base_dir: str, report_name):
-    target = f"{base_dir}/{report_name}_gesamt.pdf"
-    Path(target).unlink()
-    pdfs = [f for f in Path(base_dir).glob(report_name + '_*.pdf')]
+def pdf_merge(base_dir: str, report_name):
+    report_prefix = re.sub(r'\[([^\]]+)\]', '', report_name)
+    target = f"{base_dir}\\{report_prefix}_0.pdf"
+    if Path(target).exists():
+        Path(target).unlink()
+    pdfs = [f for f in Path(base_dir).glob(report_prefix + '*.pdf')]
+    if len(pdfs) > 0:
+        with PdfMerger() as merger:
+            for pdf in sorted(pdfs, key=lambda f: f.stat().st_mtime):
+                merger.append(pdf)
+            merger.write(target)
 
-    merger = PdfMerger()
 
-    for pdf in pdfs:
-        merger.append(pdf)
-
-    merger.write(target)
-    merger.close()
+def get_reports(config_dir, folder):
+    filename = config_dir + '\\reports.json'
+    reports = json.load(open(filename, 'r'))
+    return [r['name'] for r in reports if r['path'].startswith(folder)]
 
 
 if __name__ == '__main__':
-    main('C:\\GlobalCube\\ReportOutput', 'Z.07 Einzelbetrachtung_lfd_Monat')
+    reports = get_reports('C:\\GlobalCube\\Tasks\\gctools\\logs\\config', 'Team Content/ReportOutput')
+    for r in reports:
+        print(r)
+        pdf_merge('C:\\GlobalCube\\ReportOutput\\raw', r)

+ 5 - 7
pdf/pdf_test.py

@@ -31,26 +31,24 @@ def current_date_test(base_dir: str):
     print(json.dumps(errors, indent=2))
 
 
-def missing_data(base_dir: str) -> list[str]:
+def missing_data(base_dir: str):
     errors = []
     for f in Path(base_dir).glob('*.pdf'):
         with pdfplumber.open(str(f)) as pdf:
             pages = len(pdf.pages)
             text = pdf.pages[0].extract_text()
 
-        if pages < 2 and (re.search(r'Daten nicht verf', text) or (len(text) < 650 and text.count('\n') < 8)):
+        # print(text)
+        if pages < 2 and (re.search(r'Daten nicht verf', text) or text.endswith('unprod.abw.') or (len(text) < 650 and text.count('\n') < 8)):
             errors.append(f.name)
-            f.rename(base_dir + '/empty/' + f.name)
-            continue
+            f.rename(base_dir + '/../leer/' + f.name)
 
-        with open(base_dir + '/plain/' + f.name + '.txt', 'wb') as fwh:
-            fwh.write(text.encode('utf8'))
     return errors
 
 
 def main():
     # current_date_test('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu')
-    missing_data('C:\\GlobalCube\\ReportOutput')
+    missing_data('C:\\GlobalCube\\ReportOutput\\raw')
 
 
 if __name__ == '__main__':