|
@@ -3,8 +3,6 @@ import re
|
|
|
from datetime import datetime
|
|
|
from pathlib import Path
|
|
|
|
|
|
-import plac
|
|
|
-
|
|
|
MIN_AGE = datetime.now().timestamp() - 12 * 60 * 60
|
|
|
|
|
|
|
|
@@ -23,7 +21,7 @@ def csv_cleanup_file(csv_file: Path):
|
|
|
if file_mtime < MIN_AGE:
|
|
|
return
|
|
|
print(csv_file.name)
|
|
|
- with open(csv_file, "r", encoding="latin-1") as frh:
|
|
|
+ with open(csv_file, "r", encoding="latin-1", errors="ignore") as frh:
|
|
|
with open(temp_file, "w", encoding="latin-1") as fwh:
|
|
|
with open(error_file, "w", encoding="latin-1") as ewh:
|
|
|
header = frh.readline()
|
|
@@ -63,10 +61,15 @@ def csv_cleanup_file(csv_file: Path):
|
|
|
buffer_count = 0
|
|
|
if error_count + solved_count > 0:
|
|
|
print("")
|
|
|
- if error_count > 0:
|
|
|
- print(f"!! Anzahl Fehler: {error_count} !!")
|
|
|
|
|
|
os.utime(temp_file, (file_mtime, file_mtime))
|
|
|
+ if solved_count > 0:
|
|
|
+ print(f"Zeilenverschiebungen behoben: {solved_count}")
|
|
|
+
|
|
|
+ if error_count == 0:
|
|
|
+ error_file.unlink()
|
|
|
+ else:
|
|
|
+ print(f"!! Anzahl Fehler in '{csv_file.name}': {error_count} !!")
|
|
|
csv_file.unlink()
|
|
|
temp_file.rename(csv_file)
|
|
|
|
|
@@ -75,32 +78,17 @@ def cleanup_line(line):
|
|
|
res = []
|
|
|
buffer = ""
|
|
|
|
|
|
- for col in line.strip("\r\n").split(";"):
|
|
|
+ line_iter = iter(line.strip("\r\n").split(";"))
|
|
|
+
|
|
|
+ for col in line_iter:
|
|
|
if '"' not in col:
|
|
|
+ # no string
|
|
|
res.append(col)
|
|
|
continue
|
|
|
- if col == '"':
|
|
|
- # special case
|
|
|
- if buffer == "":
|
|
|
- buffer = col
|
|
|
- else:
|
|
|
- res.append(buffer + col)
|
|
|
- buffer = ""
|
|
|
- continue
|
|
|
- if col[0] == '"' and col[-1] == '"':
|
|
|
- res.append(col)
|
|
|
- continue
|
|
|
- if col[0] == '"':
|
|
|
- buffer = col
|
|
|
- continue
|
|
|
- if col[-1] == '"':
|
|
|
- res.append(buffer + col)
|
|
|
- buffer = ""
|
|
|
- continue
|
|
|
-
|
|
|
- return ";".join(res) + "\n"
|
|
|
|
|
|
+ buffer = col
|
|
|
+ while buffer == '"' or buffer[-1] != '"':
|
|
|
+ buffer += next(line_iter)
|
|
|
+ res.append(buffer)
|
|
|
|
|
|
-if __name__ == "__main__":
|
|
|
- plac.call(csv_cleanup)
|
|
|
- # csv_cleanup("C:\\Users\\GAPS\\Desktop\\GuV_8_O21_csv.csv")
|
|
|
+ return ";".join(res) + "\n"
|