import hashlib import logging import os import re import shutil from datetime import datetime, timedelta from pathlib import Path logger = logging.getLogger("mazda") def file_get_hash(filename: str) -> str: with open(filename, "r") as frh: data = frh.read() return calculate_sha256(data) def calculate_sha256(data: str) -> str: return hashlib.sha256(data.encode()).hexdigest() def archive_files(export_dir: str): last_week = (datetime.now() - timedelta(days=6)).timestamp() for file in Path(export_dir).glob("*.json"): if file.stat().st_ctime < last_week: file.unlink() archive_path = Path(export_dir + "/Archiv") for file in Path(export_dir + "/temp").glob("*.json"): p = re.search(r"order-report_(\d*)_(20\d{4})_", file.name) if not p: continue order_no = p[1] period = p[2] year = period[:4] dest_folder = archive_path / year / period os.makedirs(dest_folder, exist_ok=True) file_hash = file_get_hash(file) if has_identical_file(dest_folder, file_hash, order_no): file.unlink() continue shutil.copy(file, archive_path.parent / file.name) file.rename(dest_folder / file.name) def has_identical_file(target: Path, file_hash: str, order_no: str) -> bool: for archived_file in Path(target).glob("*.json"): if order_no not in archived_file.name: continue if file_get_hash(archived_file) == file_hash: return True return False def main(): base_dir = "C:/projekte/mazda/" archive_files(base_dir + "export/Mazda") if __name__ == "__main__": main()