csv_cleanup.py 936 B

123456789101112131415161718192021222324252627282930313233343536
  1. import plac
  2. import re
  3. from pathlib import Path
  4. from datetime import datetime
  5. MIN_AGE = datetime.now().timestamp() - 12 * 60 * 60
  6. def csv_cleanup(dirname: str = "misc/data"):
  7. if Path(dirname).is_file():
  8. csv_cleanup_file(Path(dirname))
  9. else:
  10. for csv_file in Path(dirname).glob("*.csv"):
  11. csv_cleanup_file(csv_file)
  12. def csv_cleanup_file(csv_file: Path):
  13. temp_file = Path(str(csv_file) + ".tmp")
  14. if csv_file.stat().st_mtime < MIN_AGE:
  15. return
  16. print(csv_file.name)
  17. with open(csv_file, "r", encoding="latin-1") as frh:
  18. with open(temp_file, "w", encoding="latin-1") as fwh:
  19. while True:
  20. buffer = frh.read(10_000)
  21. fwh.write(re.sub(r'(?<!")\r?\n', "", buffer))
  22. if buffer == "":
  23. break
  24. csv_file.unlink()
  25. temp_file.rename(csv_file)
  26. if __name__ == "__main__":
  27. plac.call(csv_cleanup)