mazda_archive.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162
  1. import hashlib
  2. import logging
  3. import os
  4. import re
  5. import shutil
  6. from datetime import datetime, timedelta
  7. from pathlib import Path
  8. logger = logging.getLogger("mazda")
  9. def file_get_hash(filename: str) -> str:
  10. with open(filename, "r") as frh:
  11. data = frh.read()
  12. return calculate_sha256(data)
  13. def calculate_sha256(data: str) -> str:
  14. return hashlib.sha256(data.encode()).hexdigest()
  15. def archive_files(export_dir: str):
  16. last_week = (datetime.now() - timedelta(days=6)).timestamp()
  17. for file in Path(export_dir).glob("*.json"):
  18. if file.stat().st_ctime < last_week:
  19. file.unlink()
  20. archive_path = Path(export_dir + "/Archiv")
  21. for file in Path(export_dir + "/temp").glob("*.json"):
  22. p = re.search(r"order-report_(\d*)_(20\d{4})_", file.name)
  23. if not p:
  24. continue
  25. order_no = p[1]
  26. period = p[2]
  27. year = period[:4]
  28. dest_folder = archive_path / year / period
  29. os.makedirs(dest_folder, exist_ok=True)
  30. file_hash = file_get_hash(file)
  31. if has_identical_file(dest_folder, file_hash, order_no):
  32. file.unlink()
  33. continue
  34. shutil.copy(file, archive_path.parent / file.name)
  35. file.rename(dest_folder / file.name)
  36. def has_identical_file(target: Path, file_hash: str, order_no: str) -> bool:
  37. for archived_file in Path(target).glob("*.json"):
  38. if order_no not in archived_file.name:
  39. continue
  40. if file_get_hash(archived_file) == file_hash:
  41. return True
  42. return False
  43. def main():
  44. base_dir = "C:/projekte/mazda/"
  45. archive_files(base_dir + "export/Mazda")
  46. if __name__ == "__main__":
  47. main()