|
@@ -31,26 +31,24 @@ def current_date_test(base_dir: str):
|
|
print(json.dumps(errors, indent=2))
|
|
print(json.dumps(errors, indent=2))
|
|
|
|
|
|
|
|
|
|
-def missing_data(base_dir: str) -> list[str]:
|
|
|
|
|
|
+def missing_data(base_dir: str):
|
|
errors = []
|
|
errors = []
|
|
for f in Path(base_dir).glob('*.pdf'):
|
|
for f in Path(base_dir).glob('*.pdf'):
|
|
with pdfplumber.open(str(f)) as pdf:
|
|
with pdfplumber.open(str(f)) as pdf:
|
|
pages = len(pdf.pages)
|
|
pages = len(pdf.pages)
|
|
text = pdf.pages[0].extract_text()
|
|
text = pdf.pages[0].extract_text()
|
|
|
|
|
|
- if pages < 2 and (re.search(r'Daten nicht verf', text) or (len(text) < 650 and text.count('\n') < 8)):
|
|
|
|
|
|
+ # print(text)
|
|
|
|
+ if pages < 2 and (re.search(r'Daten nicht verf', text) or text.endswith('unprod.abw.') or (len(text) < 650 and text.count('\n') < 8)):
|
|
errors.append(f.name)
|
|
errors.append(f.name)
|
|
- f.rename(base_dir + '/empty/' + f.name)
|
|
|
|
- continue
|
|
|
|
|
|
+ f.rename(base_dir + '/../leer/' + f.name)
|
|
|
|
|
|
- with open(base_dir + '/plain/' + f.name + '.txt', 'wb') as fwh:
|
|
|
|
- fwh.write(text.encode('utf8'))
|
|
|
|
return errors
|
|
return errors
|
|
|
|
|
|
|
|
|
|
def main():
|
|
def main():
|
|
# current_date_test('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu')
|
|
# current_date_test('C:\\GAPS_BMW\\Portal\\Publish\\daten\\GAPS_BMW_neu')
|
|
- missing_data('C:\\GlobalCube\\ReportOutput')
|
|
|
|
|
|
+ missing_data('C:\\GlobalCube\\ReportOutput\\raw')
|
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
if __name__ == '__main__':
|