bcp_log.py 3.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123
  1. import re
  2. from dataclasses import dataclass
  3. from datetime import datetime
  4. from pathlib import Path
  5. @dataclass
  6. class BulkcopyResult:
  7. file_name: str
  8. timestamp: datetime
  9. imported: int = -1
  10. exported: int = -1
  11. ignored: int = 0
  12. import_duration: float = 0.0
  13. export_duration: float = 0.0
  14. file_size: int = -1
  15. @property
  16. def missing(self) -> int:
  17. return self.exported - self.imported - self.ignored
  18. def to_csv(self) -> str:
  19. return (
  20. f"{self.file_name};{self.timestamp.strftime('%Y-%m-%dT%H:%M:%S')};"
  21. + f"{self.exported};{self.imported};{self.ignored};{self.missing};"
  22. + f"{self.export_duration};{self.import_duration};"
  23. + f"{self.file_size}"
  24. )
  25. def __str__(self) -> str:
  26. return "\n".join(
  27. [
  28. f"Filename: {self.file_name}",
  29. f"Last run: {self.timestamp.strftime('%d.%m.%Y %H:%M')}",
  30. "",
  31. f"Exported: {self.exported:>7}",
  32. f"Imported: {self.imported:>7}",
  33. f"Ignored: {self.ignored:>7}",
  34. f"Missing: {self.missing:>7}",
  35. "",
  36. f"Duration: {self.export_duration:>11} s",
  37. f" {self.import_duration:>11} s",
  38. "",
  39. f"Filesize: {self.file_size:>7}",
  40. ]
  41. )
  42. def check_logfiles(prefix: str, base_dir: str) -> BulkcopyResult:
  43. ts = datetime.fromtimestamp(Path(f"{base_dir}/{prefix}.bcp2.log").stat().st_mtime)
  44. result = BulkcopyResult(file_name=prefix, timestamp=ts)
  45. with open(f"{base_dir}/{prefix}.in.log", "r") as frh:
  46. result.ignored = len(frh.readlines()) // 2
  47. # info output of export
  48. with open(f"{base_dir}/{prefix}.bcp1.log", "r", encoding="cp850", errors="ignore") as frh:
  49. raw_logs = frh.read()
  50. result.exported = rows_copied(raw_logs)
  51. result.export_duration = total_time(raw_logs)
  52. # info output of import
  53. with open(f"{base_dir}/{prefix}.bcp2.log", "r", encoding="cp850", errors="ignore") as frh:
  54. raw_logs = frh.read()
  55. result.imported = rows_copied(raw_logs)
  56. result.import_duration = total_time(raw_logs)
  57. csv_file = Path(f"{base_dir}/{prefix}.csv")
  58. if csv_file.exists():
  59. result.file_size = csv_file.stat().st_size
  60. return result
  61. def rows_copied(raw_logs: str) -> int:
  62. match = re.search(r"(\d+) Zeilen kopiert.", raw_logs)
  63. if match:
  64. return int(match.group(1))
  65. match = re.search(r"(\d+) rows copied.", raw_logs)
  66. if match:
  67. return int(match.group(1))
  68. return -1
  69. def total_time(raw_logs: str) -> float:
  70. match = re.search(r"Zeit .* gesamt\s*: (\d+)", raw_logs)
  71. if match:
  72. return int(match.group(1)) / 1000
  73. match = re.search(r"Clock Time .* Total\s*: (\d+)", raw_logs)
  74. if match:
  75. return int(match.group(1)) / 1000
  76. return 0.0
  77. def check_directory(base_dir: str, res: list[BulkcopyResult] | None = None) -> list[BulkcopyResult]:
  78. if res is None:
  79. res = []
  80. for folder in Path(base_dir).glob("*"):
  81. if not folder.is_dir():
  82. continue
  83. res = check_directory(str(folder), res)
  84. for filename in Path(base_dir).glob("*.bcp2.log"):
  85. stem = filename.name[:-9]
  86. res.append(check_logfiles(stem, base_dir))
  87. return res
  88. def export_log_csv(res: list[BulkcopyResult], output_file: str):
  89. with open(output_file, "w") as fwh:
  90. fwh.write("filename;timestamp;imported;exported;ignored;missing;import_duration;export_duration;file_size\n")
  91. for log in res:
  92. fwh.write(log.to_csv() + "\n")
  93. def bcp_log(logs_dir: str, output_file: str):
  94. res = check_directory(logs_dir)
  95. export_log_csv(res, output_file)
  96. if __name__ == "__main__":
  97. base_dir = str(Path(__file__).parent)
  98. bcp_log(base_dir + "/SQL/temp", base_dir + "/SQL/bcp.csv.log")
  99. # check_logfiles('ORDER_LINE_1')