123456789101112131415161718192021222324252627282930313233343536 |
- import plac
- import re
- from pathlib import Path
- from datetime import datetime
- MIN_AGE = datetime.now().timestamp() - 12 * 60 * 60
- def csv_cleanup(dirname: str = "misc/data"):
- if Path(dirname).is_file():
- csv_cleanup_file(Path(dirname))
- else:
- for csv_file in Path(dirname).glob("*.csv"):
- csv_cleanup_file(csv_file)
- def csv_cleanup_file(csv_file: Path):
- temp_file = Path(str(csv_file) + ".tmp")
- if csv_file.stat().st_mtime < MIN_AGE:
- return
- print(csv_file.name)
- with open(csv_file, "r", encoding="latin-1") as frh:
- with open(temp_file, "w", encoding="latin-1") as fwh:
- while True:
- buffer = frh.read(10_000)
- fwh.write(re.sub(r'(?<!")\r?\n', "", buffer))
- if buffer == "":
- break
- csv_file.unlink()
- temp_file.rename(csv_file)
- if __name__ == "__main__":
- plac.call(csv_cleanup)
|