| 1234567891011121314151617181920212223242526272829 | 
							- import pdfplumber
 
- import re
 
- import json
 
- from pathlib import Path
 
- from datetime import datetime, timedelta
 
- today = datetime.now()
 
- yesterday = today - timedelta(days=1)
 
- current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
 
- errors = {
 
-     'empty': [],
 
-     'outdated': []
 
- }
 
- files = [f for f in Path('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu').glob('*.pdf')]
 
- files_count = len(files)
 
- for i, f in enumerate(files):
 
-     print(f'({i}/{files_count}) {f.name}                 ', end='\r')
 
-     with pdfplumber.open(str(f)) as pdf:
 
-         text = pdf.pages[0].extract_text()
 
-         if re.search(r'\d+ von \d+$', text):
 
-             errors['empty'].append(f.name)
 
-         report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
 
-         if report_date is not None and report_date.group() not in current_date:
 
-             errors['outdated'].append([f.name, report_date.group()])
 
- print()
 
- print(json.dumps(errors, indent=2))
 
 
  |