imap.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189
  1. import json
  2. import os
  3. import re
  4. from dataclasses import dataclass
  5. from datetime import datetime, timedelta
  6. import plac
  7. from imap_tools import AND, MailBox, MailMessage
  8. @dataclass
  9. class ImapCredentials:
  10. server: str
  11. username: str
  12. password: str
  13. class Imap:
  14. commands = ["cleanup", "move", "remove", "add_to_whitelist", "undelivered"]
  15. whitelist = {}
  16. blacklist = []
  17. credentials = {
  18. "archiv": ImapCredentials("mail.global-cube.com", "archiv", "Gc01am64!"),
  19. "versand": ImapCredentials("mail.global-cube.com", "versand", "y6!avXX3tQvr"),
  20. }
  21. def __init__(self):
  22. with open(os.path.dirname(__file__) + "/whitelist.json", "r") as frh:
  23. self.whitelist = json.load(frh)
  24. with open(os.path.dirname(__file__) + "/blacklist.json", "r") as frh:
  25. self.blacklist = json.load(frh)
  26. def connect(self, key):
  27. creds = self.credentials[key]
  28. return MailBox(creds.server).login(creds.username, creds.password)
  29. def cleanup(self):
  30. date_now = datetime.now()
  31. date_criteria = (date_now - timedelta(days=60)).date()
  32. msg_limit = 100
  33. with self.connect("archiv") as mb:
  34. folder_list = [f.name for f in mb.folder.list() if "Archive." in f.name]
  35. folder_list.sort()
  36. with open("mailserver/folder_list.json", "w") as fwh:
  37. json.dump(folder_list, fwh, indent=2)
  38. for folder in folder_list:
  39. msg_count = mb.folder.status(folder)["MESSAGES"]
  40. if msg_count < msg_limit:
  41. continue
  42. mb.folder.set(folder)
  43. uids = [0] * 101
  44. while len(uids) > 100:
  45. messages = mb.fetch(
  46. criteria=AND(date_lt=date_criteria), mark_seen=True, limit=1000, headers_only=True
  47. )
  48. uids = [msg.uid for msg in messages]
  49. mb.delete(uids[0 : (msg_count - msg_limit)])
  50. def move(self):
  51. with self.connect("archiv") as mb:
  52. messages = mb.fetch(
  53. criteria=AND(from_="@global-cube.com"), mark_seen=True, bulk=True, limit=1000, headers_only=True
  54. )
  55. for msg in messages:
  56. if msg.subject.count(";") > 3:
  57. # statusmail
  58. mb.delete([msg.uid])
  59. continue
  60. match = re.findall(r"\+(.*)@", msg.from_)
  61. if not match:
  62. # print(msg.from_, msg.to, msg.subject)
  63. continue
  64. domain = "@" + match[0]
  65. subfolder = "Archive." + domain.replace("@", "").replace(".", "_")
  66. if not mb.folder.exists(subfolder):
  67. mb.folder.create(subfolder)
  68. if self.is_valid_message(msg, domain):
  69. mb.move(msg.uid, subfolder)
  70. # else:
  71. # print(domain, ', '.join(msg.to), msg.subject)
  72. # print(msg.text)
  73. # print([att.filename for att in msg.attachments if att.filename.endswith('.pdf')])
  74. def is_valid_message(self, msg, domain):
  75. for to in msg.to:
  76. if domain in to or "@global-cube.de" in to or msg.from_ == to:
  77. continue
  78. if domain not in self.whitelist:
  79. return False
  80. valid_domain = [entry in to for entry in self.whitelist[domain]]
  81. if not any(valid_domain):
  82. return False
  83. return True
  84. def add_to_whitelist(self):
  85. with self.connect("archiv") as mb:
  86. mb.folder.set("whitelist")
  87. messages = mb.fetch(
  88. criteria=AND(from_="@global-cube.com"), mark_seen=True, bulk=True, limit=1000, headers_only=True
  89. )
  90. for msg in messages:
  91. match = re.findall(r"\+(.*)@", msg.from_)
  92. if not match:
  93. # print(msg.from_, msg.to, msg.subject)
  94. continue
  95. domain = "@" + match[0]
  96. if not self.is_valid_message(msg, domain):
  97. if domain not in self.whitelist:
  98. self.whitelist[domain] = []
  99. for to in msg.to:
  100. if domain not in to and to not in self.whitelist[domain]:
  101. self.whitelist[domain].append(to)
  102. with open(os.path.dirname(__file__) + "/whitelist.json", "w") as fwh:
  103. json.dump(self.whitelist, fwh, indent=2)
  104. uids = [msg.uid for msg in messages]
  105. mb.delete(uids)
  106. def remove_absence_messages(self):
  107. with self.connect("versand") as mb:
  108. messages = mb.fetch(
  109. criteria=AND(to="@global-cube.com"), mark_seen=False, bulk=True, limit=1000, headers_only=True
  110. )
  111. selected = []
  112. for msg in messages:
  113. for b in self.blacklist:
  114. if b in msg.subject or b in msg.text:
  115. selected.append(msg.uid)
  116. mb.delete(selected)
  117. def remove(self):
  118. self.remove_absence_messages()
  119. def undelivered(self):
  120. with self.conncet("versand") as mb:
  121. selected = []
  122. for msg in mb.fetch(
  123. criteria=AND(to="@global-cube.com"),
  124. mark_seen=False,
  125. bulk=True,
  126. limit=1000,
  127. ):
  128. for k in ["undeliverable", "returned", "zurückgeschickt"]:
  129. if k in msg.subject.lower():
  130. match = re.search(
  131. r"<([\w\-\+\.]+@[\w\-\.]+\.[\w\-\.]+)>(.*)",
  132. msg.text,
  133. re.DOTALL,
  134. )
  135. if match:
  136. mail_to = match[1]
  137. text = self.get_error_message(match[2])
  138. selected.append(";".join([mail_to, text]))
  139. if len(msg.attachments) > 0:
  140. msg2 = MailMessage.from_bytes(msg.attachments[0].payload)
  141. selected[-1] += ";" + ";".join([msg2.from_, msg2.subject, msg2.date_str])
  142. with open(os.path.dirname(__file__) + "/undelivered.csv", "w") as fwh:
  143. fwh.write("\n".join(selected))
  144. def get_error_message(self, text):
  145. m1 = re.search(r"550 5.1.1 (Mailbox <.*> does not exist)", text)
  146. if m1:
  147. return m1[1]
  148. m2 = re.search(r"(Host or domain name not found)", text)
  149. if m2:
  150. return m2[1]
  151. m3 = re.search(r"550 5.1.1 (User unknown: .*)\(", text)
  152. if m3:
  153. return m3[1]
  154. m4 = re.search(r"550 5.1.1 (.*)", text)
  155. if m4:
  156. return m4[1].replace("\n", "").replace("\r", "")
  157. m5 = re.search(r"(X-Microsoft-Antispam)", text)
  158. if m5:
  159. return m5[1]
  160. return text[:100]
  161. if __name__ == "__main__":
  162. plac.Interpreter.call(Imap)
  163. # Imap().undelivered()