imap.py 7.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196
  1. import json
  2. import os
  3. import re
  4. from dataclasses import dataclass
  5. from datetime import datetime, timedelta
  6. import plac
  7. from imap_tools import AND, MailBox, MailMessage
  8. @dataclass
  9. class ImapCredentials:
  10. server: str
  11. username: str
  12. password: str
  13. class Imap:
  14. commands = ["cleanup", "move", "remove", "add_to_whitelist", "undelivered"]
  15. whitelist = {}
  16. blacklist = []
  17. credentials = {
  18. "archiv": ImapCredentials("mail.global-cube.com", "archiv", "Gc01am64!"),
  19. "versand": ImapCredentials("mail.global-cube.com", "versand", "y6!avXX3tQvr"),
  20. }
  21. def __init__(self):
  22. with open(os.path.dirname(__file__) + "/whitelist.json", "r") as frh:
  23. self.whitelist = json.load(frh)
  24. with open(os.path.dirname(__file__) + "/blacklist.json", "r") as frh:
  25. self.blacklist = json.load(frh)
  26. def connect(self, key):
  27. creds = self.credentials[key]
  28. return MailBox(creds.server).login(creds.username, creds.password)
  29. def cleanup(self):
  30. date_now = datetime.now()
  31. date_criteria = (date_now - timedelta(days=60)).date()
  32. msg_limit = 100
  33. with self.connect("archiv") as mb:
  34. folder_list = [f.name for f in mb.folder.list() if "Archive." in f.name]
  35. folder_list.append("INBOX")
  36. folder_list.sort()
  37. with open("mailserver/folder_list.json", "w") as fwh:
  38. json.dump(folder_list, fwh, indent=2)
  39. for folder in folder_list:
  40. msg_count = mb.folder.status(folder)["MESSAGES"]
  41. if msg_count < msg_limit:
  42. continue
  43. mb.folder.set(folder)
  44. uids = [0] * (msg_limit + 1)
  45. while len(uids) > msg_limit:
  46. messages = mb.fetch(
  47. criteria=AND(date_lt=date_criteria), mark_seen=True, limit=1000, headers_only=True
  48. )
  49. uids = [msg.uid for msg in messages]
  50. mb.delete(uids[0 : (msg_count - msg_limit)])
  51. def move(self):
  52. with self.connect("archiv") as mb:
  53. messages = mb.fetch(
  54. criteria=AND(from_="@global-cube.com"), mark_seen=True, bulk=True, limit=1000, headers_only=True
  55. )
  56. for msg in messages:
  57. if (
  58. msg.subject.count(";") > 3
  59. or "status@global-cube.com" in msg.to
  60. or "Kontenaktualisierung" in msg.subject
  61. or "Wochenbericht" in msg.subject
  62. ):
  63. # statusmail
  64. mb.delete([msg.uid])
  65. continue
  66. match = re.findall(r"\+(.*)@", msg.from_)
  67. if not match:
  68. # print(msg.from_, msg.to, msg.subject)
  69. continue
  70. domain = "@" + match[0]
  71. subfolder = "Archive." + domain.replace("@", "").replace(".", "_")
  72. if not mb.folder.exists(subfolder):
  73. mb.folder.create(subfolder)
  74. if self.is_valid_message(msg, domain):
  75. mb.move(msg.uid, subfolder)
  76. # else:
  77. # print(domain, ', '.join(msg.to), msg.subject)
  78. # print(msg.text)
  79. # print([att.filename for att in msg.attachments if att.filename.endswith('.pdf')])
  80. def is_valid_message(self, msg, domain):
  81. for to in msg.to:
  82. if domain in to or "@global-cube.de" in to or msg.from_ == to:
  83. continue
  84. if domain not in self.whitelist:
  85. return False
  86. valid_domain = [entry in to for entry in self.whitelist[domain]]
  87. if not any(valid_domain):
  88. return False
  89. return True
  90. def add_to_whitelist(self):
  91. with self.connect("archiv") as mb:
  92. mb.folder.set("whitelist")
  93. messages = mb.fetch(
  94. criteria=AND(from_="@global-cube.com"), mark_seen=True, bulk=True, limit=1000, headers_only=True
  95. )
  96. for msg in messages:
  97. match = re.findall(r"\+(.*)@", msg.from_)
  98. if not match:
  99. # print(msg.from_, msg.to, msg.subject)
  100. continue
  101. domain = "@" + match[0]
  102. if not self.is_valid_message(msg, domain):
  103. if domain not in self.whitelist:
  104. self.whitelist[domain] = []
  105. for to in msg.to:
  106. if domain not in to and to not in self.whitelist[domain]:
  107. self.whitelist[domain].append(to)
  108. with open(os.path.dirname(__file__) + "/whitelist.json", "w") as fwh:
  109. json.dump(self.whitelist, fwh, indent=2)
  110. uids = [msg.uid for msg in messages]
  111. mb.delete(uids)
  112. def remove_absence_messages(self):
  113. with self.connect("versand") as mb:
  114. messages = mb.fetch(
  115. criteria=AND(to="@global-cube.com"), mark_seen=False, bulk=True, limit=1000, headers_only=True
  116. )
  117. selected = []
  118. for msg in messages:
  119. for b in self.blacklist:
  120. if b in msg.subject or b in msg.text:
  121. selected.append(msg.uid)
  122. mb.delete(selected)
  123. def remove(self):
  124. self.remove_absence_messages()
  125. def undelivered(self):
  126. with self.conncet("versand") as mb:
  127. selected = []
  128. for msg in mb.fetch(
  129. criteria=AND(to="@global-cube.com"),
  130. mark_seen=False,
  131. bulk=True,
  132. limit=1000,
  133. ):
  134. for k in ["undeliverable", "returned", "zurückgeschickt"]:
  135. if k in msg.subject.lower():
  136. match = re.search(
  137. r"<([\w\-\+\.]+@[\w\-\.]+\.[\w\-\.]+)>(.*)",
  138. msg.text,
  139. re.DOTALL,
  140. )
  141. if match:
  142. mail_to = match[1]
  143. text = self.get_error_message(match[2])
  144. selected.append(";".join([mail_to, text]))
  145. if len(msg.attachments) > 0:
  146. msg2 = MailMessage.from_bytes(msg.attachments[0].payload)
  147. selected[-1] += ";" + ";".join([msg2.from_, msg2.subject, msg2.date_str])
  148. with open(os.path.dirname(__file__) + "/undelivered.csv", "w") as fwh:
  149. fwh.write("\n".join(selected))
  150. def get_error_message(self, text):
  151. m1 = re.search(r"550 5.1.1 (Mailbox <.*> does not exist)", text)
  152. if m1:
  153. return m1[1]
  154. m2 = re.search(r"(Host or domain name not found)", text)
  155. if m2:
  156. return m2[1]
  157. m3 = re.search(r"550 5.1.1 (User unknown: .*)\(", text)
  158. if m3:
  159. return m3[1]
  160. m4 = re.search(r"550 5.1.1 (.*)", text)
  161. if m4:
  162. return m4[1].replace("\n", "").replace("\r", "")
  163. m5 = re.search(r"(X-Microsoft-Antispam)", text)
  164. if m5:
  165. return m5[1]
  166. return text[:100]
  167. if __name__ == "__main__":
  168. plac.Interpreter.call(Imap)
  169. # Imap().undelivered()