nasa_archive.py 1.4 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950
  1. import hashlib
  2. import logging
  3. import os
  4. import re
  5. import shutil
  6. from datetime import datetime, timedelta
  7. from pathlib import Path
  8. logger = logging.getLogger("nasa")
  9. def file_get_hash(filename: str) -> str:
  10. with open(filename, "r") as frh:
  11. data = frh.read()
  12. return calculate_sha256(data)
  13. def calculate_sha256(data: str) -> str:
  14. return hashlib.sha256(data.encode()).hexdigest()
  15. def archive_files(export_dir: str):
  16. last_week = (datetime.now() - timedelta(days=6)).timestamp()
  17. for file in Path(export_dir).glob("*.json"):
  18. if file.stat().st_ctime < last_week:
  19. file.unlink()
  20. archive_path = Path(export_dir + "/Archiv")
  21. for file in Path(export_dir + "/temp").glob("*.json"):
  22. p = re.search(r"NASA_\d{5}_(20\d{4})_", file.name)
  23. if not p:
  24. continue
  25. period = p[1]
  26. year = period[:4]
  27. dest_folder = archive_path / year / period
  28. os.makedirs(dest_folder, exist_ok=True)
  29. file_hash = file_get_hash(file)
  30. if has_identical_file(dest_folder, file_hash):
  31. file.unlink()
  32. continue
  33. shutil.copy(file, archive_path.parent / file.name)
  34. file.rename(dest_folder / file.name)
  35. def has_identical_file(target: Path, file_hash: str) -> bool:
  36. for archived_file in Path(target).glob("*.json"):
  37. if file_get_hash(archived_file) == file_hash:
  38. return True
  39. return False