imap.py 6.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. import json
  2. import os
  3. import re
  4. from dataclasses import dataclass
  5. from datetime import datetime, timedelta
  6. import plac
  7. from imap_tools import AND, MailBox, MailMessage
  8. @dataclass
  9. class ImapCredentials:
  10. server: str
  11. username: str
  12. password: str
  13. class Imap:
  14. commands = ["cleanup", "move", "remove", "add_to_whitelist", "undelivered"]
  15. whitelist = {}
  16. blacklist = []
  17. credentials = {
  18. "archiv": ImapCredentials("mail.global-cube.com", "archiv", "Gc01am64!"),
  19. "versand": ImapCredentials("mail.global-cube.com", "versand", "y6!avXX3tQvr"),
  20. }
  21. def __init__(self):
  22. with open(os.path.dirname(__file__) + "/whitelist.json", "r") as frh:
  23. self.whitelist = json.load(frh)
  24. with open(os.path.dirname(__file__) + "/blacklist.json", "r") as frh:
  25. self.blacklist = json.load(frh)
  26. def connect(self, key):
  27. creds = self.credentials[key]
  28. return MailBox(creds.server).login(creds.username, creds.password)
  29. def cleanup(self):
  30. date_now = datetime.now()
  31. date_criteria = (date_now - timedelta(days=60)).date()
  32. msg_limit = 100
  33. with self.connect("archiv") as mb:
  34. folder_list = [f.name for f in mb.folder.list() if "Archive." in f.name]
  35. for folder in folder_list:
  36. msg_count = mb.folder.status(folder)["MESSAGES"]
  37. if msg_count < msg_limit:
  38. continue
  39. mb.folder.set(folder)
  40. uids = [0] * 101
  41. while len(uids) > 100:
  42. messages = mb.fetch(
  43. criteria=AND(date_lt=date_criteria), mark_seen=True, limit=1000, headers_only=True
  44. )
  45. uids = [msg.uid for msg in messages]
  46. mb.delete(uids[0 : (msg_count - msg_limit)])
  47. def move(self):
  48. with self.connect("archiv") as mb:
  49. messages = mb.fetch(
  50. criteria=AND(from_="@global-cube.com"), mark_seen=True, bulk=True, limit=1000, headers_only=True
  51. )
  52. for msg in messages:
  53. if msg.subject.count(";") > 3:
  54. # statusmail
  55. mb.delete([msg.uid])
  56. continue
  57. match = re.findall(r"\+(.*)@", msg.from_)
  58. if not match:
  59. # print(msg.from_, msg.to, msg.subject)
  60. continue
  61. domain = "@" + match[0]
  62. subfolder = "Archive." + domain.replace("@", "").replace(".", "_")
  63. if not mb.folder.exists(subfolder):
  64. mb.folder.create(subfolder)
  65. if self.is_valid_message(msg, domain):
  66. mb.move(msg.uid, subfolder)
  67. # else:
  68. # print(domain, ', '.join(msg.to), msg.subject)
  69. # print(msg.text)
  70. # print([att.filename for att in msg.attachments if att.filename.endswith('.pdf')])
  71. def is_valid_message(self, msg, domain):
  72. for to in msg.to:
  73. if domain in to or "@global-cube.de" in to or msg.from_ == to:
  74. continue
  75. if domain not in self.whitelist:
  76. return False
  77. valid_domain = [entry in to for entry in self.whitelist[domain]]
  78. if not any(valid_domain):
  79. return False
  80. return True
  81. def add_to_whitelist(self):
  82. with self.connect("archiv") as mb:
  83. mb.folder.set("whitelist")
  84. messages = mb.fetch(
  85. criteria=AND(from_="@global-cube.com"), mark_seen=True, bulk=True, limit=1000, headers_only=True
  86. )
  87. for msg in messages:
  88. match = re.findall(r"\+(.*)@", msg.from_)
  89. if not match:
  90. # print(msg.from_, msg.to, msg.subject)
  91. continue
  92. domain = "@" + match[0]
  93. if not self.is_valid_message(msg, domain):
  94. if domain not in self.whitelist:
  95. self.whitelist[domain] = []
  96. for to in msg.to:
  97. if domain not in to and to not in self.whitelist[domain]:
  98. self.whitelist[domain].append(to)
  99. with open(os.path.dirname(__file__) + "/whitelist.json", "w") as fwh:
  100. json.dump(self.whitelist, fwh, indent=2)
  101. uids = [msg.uid for msg in messages]
  102. mb.delete(uids)
  103. def remove_absence_messages(self):
  104. with self.connect("versand") as mb:
  105. messages = mb.fetch(
  106. criteria=AND(to="@global-cube.com"), mark_seen=False, bulk=True, limit=1000, headers_only=True
  107. )
  108. selected = []
  109. for msg in messages:
  110. for b in self.blacklist:
  111. if b in msg.subject or b in msg.text:
  112. selected.append(msg.uid)
  113. mb.delete(selected)
  114. def remove(self):
  115. self.remove_absence_messages()
  116. def undelivered(self):
  117. with self.conncet("versand") as mb:
  118. selected = []
  119. for msg in mb.fetch(
  120. criteria=AND(to="@global-cube.com"),
  121. mark_seen=False,
  122. bulk=True,
  123. limit=1000,
  124. ):
  125. for k in ["undeliverable", "returned", "zurückgeschickt"]:
  126. if k in msg.subject.lower():
  127. match = re.search(
  128. r"<([\w\-\+\.]+@[\w\-\.]+\.[\w\-\.]+)>(.*)",
  129. msg.text,
  130. re.DOTALL,
  131. )
  132. if match:
  133. mail_to = match[1]
  134. text = self.get_error_message(match[2])
  135. selected.append(";".join([mail_to, text]))
  136. if len(msg.attachments) > 0:
  137. msg2 = MailMessage.from_bytes(msg.attachments[0].payload)
  138. selected[-1] += ";" + ";".join([msg2.from_, msg2.subject, msg2.date_str])
  139. with open(os.path.dirname(__file__) + "/undelivered.csv", "w") as fwh:
  140. fwh.write("\n".join(selected))
  141. def get_error_message(self, text):
  142. m1 = re.search(r"550 5.1.1 (Mailbox <.*> does not exist)", text)
  143. if m1:
  144. return m1[1]
  145. m2 = re.search(r"(Host or domain name not found)", text)
  146. if m2:
  147. return m2[1]
  148. m3 = re.search(r"550 5.1.1 (User unknown: .*)\(", text)
  149. if m3:
  150. return m3[1]
  151. m4 = re.search(r"550 5.1.1 (.*)", text)
  152. if m4:
  153. return m4[1].replace("\n", "").replace("\r", "")
  154. m5 = re.search(r"(X-Microsoft-Antispam)", text)
  155. if m5:
  156. return m5[1]
  157. return text[:100]
  158. if __name__ == "__main__":
  159. plac.Interpreter.call(Imap)
  160. # Imap().undelivered()