imap.py 6.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176
  1. from imap_tools import MailBox, AND, MailMessage
  2. import re
  3. import json
  4. import os
  5. import plac
  6. from datetime import datetime, date
  7. class Imap:
  8. commands = ["cleanup", "move", "remove", "add_to_whitelist", "undelivered"]
  9. whitelist = {}
  10. blacklist = []
  11. def __init__(self):
  12. with open(os.path.dirname(__file__) + "/whitelist.json", "r") as frh:
  13. self.whitelist = json.load(frh)
  14. with open(os.path.dirname(__file__) + "/blacklist.json", "r") as frh:
  15. self.blacklist = json.load(frh)
  16. def cleanup(self):
  17. date_now = datetime.now()
  18. date_criteria = date(date_now.year, date_now.month - 1, 1)
  19. msg_limit = 100
  20. with MailBox("mail.global-cube.com").login("archiv", "Gc01am64!") as mb:
  21. folder_list = [f.name for f in mb.folder.list() if "Archive." in f.name]
  22. for folder in folder_list:
  23. msg_count = mb.folder.status(folder)["MESSAGES"]
  24. if msg_count < msg_limit:
  25. continue
  26. mb.folder.set(folder)
  27. uids = [0] * 101
  28. while len(uids) > 100:
  29. messages = mb.fetch(
  30. criteria=AND(date_lt=date_criteria), mark_seen=True, limit=1000, headers_only=True
  31. )
  32. uids = [msg.uid for msg in messages]
  33. mb.delete(uids[0 : (msg_count - msg_limit)])
  34. def move(self):
  35. with MailBox("mail.global-cube.com").login("archiv", "Gc01am64!") as mb:
  36. messages = mb.fetch(
  37. criteria=AND(from_="@global-cube.com"),
  38. mark_seen=True,
  39. bulk=True,
  40. limit=1000,
  41. headers_only=True,
  42. )
  43. for msg in messages:
  44. match = re.findall(r"\+(.*)@", msg.from_)
  45. if not match:
  46. # print(msg.from_, msg.to, msg.subject)
  47. continue
  48. domain = "@" + match[0]
  49. subfolder = "Archive." + domain.replace("@", "").replace(".", "_")
  50. if not mb.folder.exists(subfolder):
  51. mb.folder.create(subfolder)
  52. if self.is_valid_message(msg, domain):
  53. mb.move(msg.uid, subfolder)
  54. # else:
  55. # print(domain, ', '.join(msg.to), msg.subject)
  56. # print(msg.text)
  57. # print([att.filename for att in msg.attachments if att.filename.endswith('.pdf')])
  58. def is_valid_message(self, msg, domain):
  59. for to in msg.to:
  60. if domain in to or "@global-cube.de" in to or msg.from_ == to:
  61. continue
  62. if domain not in self.whitelist:
  63. return False
  64. valid_domain = [entry in to for entry in self.whitelist[domain]]
  65. if not any(valid_domain):
  66. return False
  67. return True
  68. def add_to_whitelist(self):
  69. with MailBox("mail.global-cube.com").login("archiv", "Gc01am64!") as mb:
  70. mb.folder.set("whitelist")
  71. messages = mb.fetch(
  72. criteria=AND(from_="@global-cube.com"),
  73. mark_seen=True,
  74. bulk=True,
  75. limit=1000,
  76. headers_only=True,
  77. )
  78. for msg in messages:
  79. match = re.findall(r"\+(.*)@", msg.from_)
  80. if not match:
  81. # print(msg.from_, msg.to, msg.subject)
  82. continue
  83. domain = "@" + match[0]
  84. if not self.is_valid_message(msg, domain):
  85. if domain not in self.whitelist:
  86. self.whitelist[domain] = []
  87. for to in msg.to:
  88. if domain not in to and to not in self.whitelist[domain]:
  89. self.whitelist[domain].append(to)
  90. with open(os.path.dirname(__file__) + "/whitelist.json", "w") as fwh:
  91. json.dump(self.whitelist, fwh, indent=2)
  92. uids = [msg.uid for msg in messages]
  93. mb.delete(uids)
  94. def remove_absence_messages(self):
  95. with MailBox("mail.global-cube.com").login("versand", "y6!avXX3tQvr") as mb:
  96. messages = mb.fetch(
  97. criteria=AND(to="@global-cube.com"),
  98. mark_seen=False,
  99. bulk=True,
  100. limit=1000,
  101. )
  102. selected = []
  103. for msg in messages:
  104. for b in self.blacklist:
  105. if b in msg.subject or b in msg.text:
  106. selected.append(msg.uid)
  107. mb.delete(selected)
  108. def remove(self):
  109. self.remove_absence_messages()
  110. def undelivered(self):
  111. with MailBox("mail.global-cube.com").login("versand", "y6!avXX3tQvr") as mb:
  112. selected = []
  113. for msg in mb.fetch(
  114. criteria=AND(to="@global-cube.com"),
  115. mark_seen=False,
  116. bulk=True,
  117. limit=1000,
  118. ):
  119. for k in ["undeliverable", "returned", "zurückgeschickt"]:
  120. if k in msg.subject.lower():
  121. match = re.search(
  122. r"<([\w\-\+\.]+@[\w\-\.]+\.[\w\-\.]+)>(.*)",
  123. msg.text,
  124. re.DOTALL,
  125. )
  126. if match:
  127. mail_to = match[1]
  128. text = self.get_error_message(match[2])
  129. selected.append(";".join([mail_to, text]))
  130. if len(msg.attachments) > 0:
  131. msg2 = MailMessage.from_bytes(msg.attachments[0].payload)
  132. selected[-1] += ";" + ";".join([msg2.from_, msg2.subject, msg2.date_str])
  133. with open(os.path.dirname(__file__) + "/undelivered.csv", "w") as fwh:
  134. fwh.write("\n".join(selected))
  135. def get_error_message(self, text):
  136. m1 = re.search(r"550 5.1.1 (Mailbox <.*> does not exist)", text)
  137. if m1:
  138. return m1[1]
  139. m2 = re.search(r"(Host or domain name not found)", text)
  140. if m2:
  141. return m2[1]
  142. m3 = re.search(r"550 5.1.1 (User unknown: .*)\(", text)
  143. if m3:
  144. return m3[1]
  145. m4 = re.search(r"550 5.1.1 (.*)", text)
  146. if m4:
  147. return m4[1].replace("\n", "").replace("\r", "")
  148. m5 = re.search(r"(X-Microsoft-Antispam)", text)
  149. if m5:
  150. return m5[1]
  151. return text[:100]
  152. if __name__ == "__main__":
  153. # plac.Interpreter.call(Imap)
  154. Imap().undelivered()