pdf_watermark.py 1.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152
  1. import PyPDF2
  2. import plac
  3. from pathlib import Path
  4. from doc2pdf import doc2pdf
  5. def add_watermark(input_file):
  6. watermark_file = 'c:/tools/template/Briefbogen_GC_Jahnstr_GmbH.pdf'
  7. output_file = Path(input_file).parent.joinpath("mit_Briefpapier", Path(input_file).name)
  8. if not output_file.parent.exists():
  9. output_file.parent.mkdir()
  10. with open(input_file, "rb") as filehandle_input:
  11. # read content of the original file
  12. pdf = PyPDF2.PdfFileReader(filehandle_input)
  13. with open(watermark_file, "rb") as filehandle_watermark:
  14. # read content of the watermark
  15. watermark = PyPDF2.PdfFileReader(filehandle_watermark)
  16. # get first page of the watermark PDF
  17. first_page_watermark = watermark.getPage(0)
  18. # create a pdf writer object for the output file
  19. pdf_writer = PyPDF2.PdfFileWriter()
  20. # get first page of the original PDF
  21. for i in range(pdf.getNumPages()):
  22. current_page = pdf.getPage(i)
  23. # merge the two pages
  24. current_page.mergePage(first_page_watermark)
  25. # add page
  26. pdf_writer.addPage(current_page)
  27. with open(output_file, "wb") as filehandle_output:
  28. # write the watermarked file to the new file
  29. pdf_writer.write(filehandle_output)
  30. def add_watermark_to_folder(base_dir):
  31. for filename in Path(base_dir).glob('*.doc'):
  32. doc2pdf(str(filename))
  33. for filename in Path(base_dir).glob('*.pdf'):
  34. add_watermark(str(filename))
  35. print(filename.name)
  36. if __name__ == '__main__':
  37. plac.call(add_watermark_to_folder)
  38. # add_watermark_to_folder(str(Path(__file__).parent) + '/data/')