1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950 |
- import hashlib
- import logging
- import os
- import re
- import shutil
- from datetime import datetime, timedelta
- from pathlib import Path
- logger = logging.getLogger("nasa")
- def file_get_hash(filename: str) -> str:
- with open(filename, "r") as frh:
- data = frh.read()
- return calculate_sha256(data)
- def calculate_sha256(data: str) -> str:
- return hashlib.sha256(data.encode()).hexdigest()
- def archive_files(export_dir: str):
- last_week = (datetime.now() - timedelta(days=6)).timestamp()
- for file in Path(export_dir).glob("*.json"):
- if file.stat().st_ctime < last_week:
- file.unlink()
- archive_path = Path(export_dir + "/Archiv")
- for file in Path(export_dir + "/temp").glob("*.json"):
- p = re.search(r"NASA_\d{5}_(20\d{4})_", file.name)
- if not p:
- continue
- period = p[1]
- year = period[:4]
- dest_folder = archive_path / year / period
- os.makedirs(dest_folder, exist_ok=True)
- file_hash = file_get_hash(file)
- if has_identical_file(dest_folder, file_hash):
- file.unlink()
- continue
- shutil.copy(file, archive_path.parent / file.name)
- file.rename(dest_folder / file.name)
- def has_identical_file(target: Path, file_hash: str) -> bool:
- for archived_file in Path(target).glob("*.json"):
- if file_get_hash(archived_file) == file_hash:
- return True
- return False
|