|  | @@ -4,26 +4,54 @@ import json
 | 
	
		
			
				|  |  |  from pathlib import Path
 | 
	
		
			
				|  |  |  from datetime import datetime, timedelta
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -today = datetime.now()
 | 
	
		
			
				|  |  | -yesterday = today - timedelta(days=1)
 | 
	
		
			
				|  |  | -current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
 | 
	
		
			
				|  |  | -errors = {
 | 
	
		
			
				|  |  | -    'empty': [],
 | 
	
		
			
				|  |  | -    'outdated': []
 | 
	
		
			
				|  |  | -}
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -files = [f for f in Path('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu').glob('*.pdf')]
 | 
	
		
			
				|  |  | -files_count = len(files)
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -for i, f in enumerate(files):
 | 
	
		
			
				|  |  | -    print(f'({i}/{files_count}) {f.name}                 ', end='\r')
 | 
	
		
			
				|  |  | -    with pdfplumber.open(str(f)) as pdf:
 | 
	
		
			
				|  |  | -        text = pdf.pages[0].extract_text()
 | 
	
		
			
				|  |  | -        if re.search(r'\d+ von \d+$', text):
 | 
	
		
			
				|  |  | -            errors['empty'].append(f.name)
 | 
	
		
			
				|  |  | -        report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
 | 
	
		
			
				|  |  | -        if report_date is not None and report_date.group() not in current_date:
 | 
	
		
			
				|  |  | -            errors['outdated'].append([f.name, report_date.group()])
 | 
	
		
			
				|  |  | -
 | 
	
		
			
				|  |  | -print()
 | 
	
		
			
				|  |  | -print(json.dumps(errors, indent=2))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def current_date_test(base_dir: str):
 | 
	
		
			
				|  |  | +    today = datetime.now()
 | 
	
		
			
				|  |  | +    yesterday = today - timedelta(days=1)
 | 
	
		
			
				|  |  | +    current_date = [today.strftime('%d.%m.%Y'), today.strftime('%d/%m/%Y'), yesterday.strftime('%d.%m.%Y'), yesterday.strftime('%d/%m/%Y')]
 | 
	
		
			
				|  |  | +    errors = {
 | 
	
		
			
				|  |  | +        'empty': [],
 | 
	
		
			
				|  |  | +        'outdated': []
 | 
	
		
			
				|  |  | +    }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    files = [f for f in Path(base_dir).glob('*.pdf')]
 | 
	
		
			
				|  |  | +    files_count = len(files)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    for i, f in enumerate(files):
 | 
	
		
			
				|  |  | +        print(f'({i}/{files_count}) {f.name}                 ', end='\r')
 | 
	
		
			
				|  |  | +        with pdfplumber.open(str(f)) as pdf:
 | 
	
		
			
				|  |  | +            text = pdf.pages[0].extract_text()
 | 
	
		
			
				|  |  | +            if re.search(r'\d+ von \d+$', text):
 | 
	
		
			
				|  |  | +                errors['empty'].append(f.name)
 | 
	
		
			
				|  |  | +            report_date = re.search(r'\d{2}[\./]\d{2}[\./]\d{4}', text.split('\n')[0])
 | 
	
		
			
				|  |  | +            if report_date is not None and report_date.group() not in current_date:
 | 
	
		
			
				|  |  | +                errors['outdated'].append([f.name, report_date.group()])
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    print()
 | 
	
		
			
				|  |  | +    print(json.dumps(errors, indent=2))
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def missing_data(base_dir: str) -> list[str]:
 | 
	
		
			
				|  |  | +    errors = []
 | 
	
		
			
				|  |  | +    for f in Path(base_dir).glob('*.pdf'):
 | 
	
		
			
				|  |  | +        with pdfplumber.open(str(f)) as pdf:
 | 
	
		
			
				|  |  | +            pages = len(pdf.pages)
 | 
	
		
			
				|  |  | +            text = pdf.pages[0].extract_text()
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        if pages < 2 and (re.search(r'Daten nicht verf', text) or (len(text) < 650 and text.count('\n') < 8)):
 | 
	
		
			
				|  |  | +            errors.append(f.name)
 | 
	
		
			
				|  |  | +            f.rename(base_dir + '/empty/' + f.name)
 | 
	
		
			
				|  |  | +            continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        with open(base_dir + '/plain/' + f.name + '.txt', 'wb') as fwh:
 | 
	
		
			
				|  |  | +            fwh.write(text.encode('utf8'))
 | 
	
		
			
				|  |  | +    return errors
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +def main():
 | 
	
		
			
				|  |  | +    # current_date_test('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu')
 | 
	
		
			
				|  |  | +    missing_data('C:\\GlobalCube\\ReportOutput')
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +if __name__ == '__main__':
 | 
	
		
			
				|  |  | +    main()
 |