csv_trim.py 923 B

12345678910111213141516171819202122232425262728293031
  1. import os
  2. import re
  3. from datetime import datetime
  4. from pathlib import Path
  5. MAX_AGE = datetime.now().timestamp() - 12 * 60 * 60
  6. def csv_trim(dirname: str = "misc/data"):
  7. if Path(dirname).is_file():
  8. csv_trim_file(Path(dirname), True)
  9. else:
  10. for csv_file in Path(dirname).glob("*.csv"):
  11. csv_trim_file(csv_file)
  12. def csv_trim_file(csv_file: Path, ignore_age: bool = False):
  13. temp_file = Path(str(csv_file) + ".tmp")
  14. file_mtime = csv_file.stat().st_mtime
  15. if not ignore_age and file_mtime < MAX_AGE:
  16. return
  17. print(csv_file.name)
  18. with open(csv_file, "r", encoding="latin-1", errors="ignore") as frh:
  19. with open(temp_file, "w", encoding="latin-1") as fwh:
  20. for line in frh.readlines():
  21. fwh.write(re.sub(r"[ ]+\t", "\t", line))
  22. os.utime(temp_file, (file_mtime, file_mtime))
  23. csv_file.unlink()
  24. temp_file.rename(csv_file)