csv_cleanup.py 800 B

12345678910111213141516171819202122232425262728293031
  1. import plac
  2. import re
  3. from pathlib import Path
  4. from datetime import datetime
  5. MIN_AGE = datetime.now().timestamp() - 12 * 60 * 60
  6. def csv_cleanup(dirname: str = "misc/data"):
  7. for csv_file in Path(dirname).glob("*.csv"):
  8. temp_file = Path(str(csv_file) + ".tmp")
  9. if csv_file.stat().st_mtime < MIN_AGE:
  10. continue
  11. print(csv_file.name)
  12. with (
  13. open(csv_file, "r", encoding="latin-1") as frh,
  14. open(temp_file, "w", encoding="latin-1") as fwh,
  15. ):
  16. buffer = " "
  17. while buffer != "":
  18. buffer = frh.read(10_000)
  19. fwh.write(re.sub(r'(?<!")\r?\n', "", buffer))
  20. csv_file.unlink()
  21. temp_file.rename(csv_file)
  22. if __name__ == "__main__":
  23. plac.call(csv_cleanup)