|
@@ -18,42 +18,53 @@ def csv_cleanup(dirname: str = "misc/data"):
|
|
|
|
|
|
def csv_cleanup_file(csv_file: Path):
|
|
def csv_cleanup_file(csv_file: Path):
|
|
temp_file = Path(str(csv_file) + ".tmp")
|
|
temp_file = Path(str(csv_file) + ".tmp")
|
|
|
|
+ error_file = Path(str(csv_file) + ".err")
|
|
file_mtime = csv_file.stat().st_mtime
|
|
file_mtime = csv_file.stat().st_mtime
|
|
if file_mtime < MIN_AGE:
|
|
if file_mtime < MIN_AGE:
|
|
return
|
|
return
|
|
print(csv_file.name)
|
|
print(csv_file.name)
|
|
with open(csv_file, "r", encoding="latin-1") as frh:
|
|
with open(csv_file, "r", encoding="latin-1") as frh:
|
|
with open(temp_file, "w", encoding="latin-1") as fwh:
|
|
with open(temp_file, "w", encoding="latin-1") as fwh:
|
|
- header = frh.readline()
|
|
|
|
- fwh.write(header)
|
|
|
|
- sep_count = header.count(";")
|
|
|
|
|
|
+ with open(error_file, "w", encoding="latin-1") as ewh:
|
|
|
|
+ header = frh.readline()
|
|
|
|
+ fwh.write(header)
|
|
|
|
+ ewh.write(header)
|
|
|
|
+ sep_count = header.count(";")
|
|
|
|
|
|
- buffer = ""
|
|
|
|
- buffer_count = 0
|
|
|
|
|
|
+ buffer = ""
|
|
|
|
+ buffer_count = 0
|
|
|
|
+ solved_count = 0
|
|
|
|
+ error_count = 0
|
|
|
|
|
|
- for line in frh.readlines():
|
|
|
|
- line = line.replace("\t", "")
|
|
|
|
- current_count = line.count(";")
|
|
|
|
- if buffer_count + current_count == sep_count:
|
|
|
|
- if buffer == "":
|
|
|
|
- fwh.write(line)
|
|
|
|
- continue
|
|
|
|
- fwh.write(buffer + line)
|
|
|
|
- print("*", end="")
|
|
|
|
- buffer = ""
|
|
|
|
- buffer_count = 0
|
|
|
|
- elif buffer_count + current_count < sep_count:
|
|
|
|
- buffer += re.subn(r"[\r\n]+", "", line)[0]
|
|
|
|
- buffer_count = buffer.count(";")
|
|
|
|
- else:
|
|
|
|
- line2 = cleanup_line(buffer + line)
|
|
|
|
- if line2.count(";") == sep_count:
|
|
|
|
- fwh.write(line2)
|
|
|
|
|
|
+ for line in frh.readlines():
|
|
|
|
+ line = line.replace("\t", "")
|
|
|
|
+ current_count = line.count(";")
|
|
|
|
+ if buffer_count + current_count == sep_count:
|
|
|
|
+ if buffer == "":
|
|
|
|
+ fwh.write(line)
|
|
|
|
+ continue
|
|
|
|
+ fwh.write(buffer + line)
|
|
|
|
+ print("*", end="")
|
|
|
|
+ solved_count += 1
|
|
|
|
+ buffer = ""
|
|
|
|
+ buffer_count = 0
|
|
|
|
+ elif buffer_count + current_count < sep_count:
|
|
|
|
+ buffer += re.subn(r"[\r\n]+", "", line)[0]
|
|
|
|
+ buffer_count = buffer.count(";")
|
|
else:
|
|
else:
|
|
- print(f"Error: {line2}")
|
|
|
|
- buffer = ""
|
|
|
|
- buffer_count = 0
|
|
|
|
- print("")
|
|
|
|
|
|
+ line2 = cleanup_line(buffer + line)
|
|
|
|
+ if line2.count(";") == sep_count:
|
|
|
|
+ fwh.write(line2)
|
|
|
|
+ else:
|
|
|
|
+ ewh.write(line2)
|
|
|
|
+ print("-", end="")
|
|
|
|
+ error_count += 1
|
|
|
|
+ buffer = ""
|
|
|
|
+ buffer_count = 0
|
|
|
|
+ if error_count + solved_count > 0:
|
|
|
|
+ print("")
|
|
|
|
+ if error_count > 0:
|
|
|
|
+ print(f"!! Anzahl Fehler: {error_count} !!")
|
|
|
|
|
|
os.utime(temp_file, (file_mtime, file_mtime))
|
|
os.utime(temp_file, (file_mtime, file_mtime))
|
|
csv_file.unlink()
|
|
csv_file.unlink()
|