apachelog.py 1.7 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253
  1. import re
  2. from datetime import datetime
  3. import xml.etree.ElementTree as ET
  4. import csv
  5. base_dir = '/home/robert/projekte/python/logviewer'
  6. def convert_log(line):
  7. # 10.6.173.69 - - [01/Dec/2020:10:03:16 +0100] "GET /GAPS_BMW/index.php5?&rc=MISView&rm=getReport&ras[]=49331 HTTP/1.1" 200 64512
  8. match = list(re.findall(r"([\d\.]+) - - \[(.*)\] \"GET /.*&ras\[\]=(\d+) .* 200 (\d+)", line)[0])
  9. match[1] = datetime.strptime(match[1], "%d/%b/%Y:%H:%M:%S %z").isoformat(timespec='seconds')
  10. return match
  11. def parse_log(filename):
  12. with open(filename, 'r') as frh:
  13. logs = [convert_log(line) for line in frh.readlines() if line.find('rc=MISView&rm=getReport') > -1]
  14. return logs
  15. def parse_portal_xml(filename):
  16. portal = ET.parse(filename)
  17. folders = portal.getroot().find('Publishes').findall('Publish')
  18. res = {}
  19. for p in folders:
  20. user = p.get('User')
  21. name = p.get('Name')
  22. for i in p.find('Images').findall('Image'):
  23. res[i.get('ID')] = [user, name, i.find('Report').text]
  24. return res
  25. def combine_logs_reports(logs, reports):
  26. return [line + reports.get(line[2], [''] * 3) for line in logs]
  27. def main():
  28. reports = parse_portal_xml(base_dir + '/config/GAPS_BMW_NEU.xml')
  29. logs = parse_log(base_dir + '/apache/access.log')
  30. header = ['ip', 'timestamp', 'id', 'bytes', 'user', 'section', 'report']
  31. logs = combine_logs_reports(logs, reports)
  32. with open(base_dir + '/export/accesslog.csv', 'w') as fwh:
  33. csv_writer = csv.writer(fwh, delimiter=';')
  34. csv_writer.writerow(header)
  35. csv_writer.writerows(logs)
  36. print(len(logs))
  37. print(logs[0])
  38. if __name__ == '__main__':
  39. main()