Winterpixel
6/18/2019 - 1:21 PM

Reading and Editing PDFs

import os
import PyPDF2

# Working dir
os.chdir('c:\\users\\torre\\documents')

# Open in read-binary mode
pdf_file = open('meetingminutes1.pdf', 'rb')

reader = PyPDF2.PdfFileReader(pdf_file)
print(reader.numPages)

page = reader.getPage(0)
print(page.extractText())

# All text in doc
for page_num in range(reader.numPages):
    print(reader.getPage(page_num).extractText())

pdf1_file = open('meetingminutes1.pdf', 'rb')
pdf2_file = open('meetingminutes2.pdf', 'rb')

# One reader for each file
reader1 = PyPDF2.PdfFileReader(pdf1_file)
reader2 = PyPDF2.PdfFileReader(pdf2_file)

# Loop through each and add the pages to a new doc
writer = PyPDF2.PdfFileWriter()

for page_num in range(reader1.numPages):
    page = reader1.getPage(page_num)
    writer.addPage(page)

for page_num in range(reader2.numPages):
    page = reader2.getPage(page_num)
    writer.addPage(page)

# Save the output file
output_file = open('combinedminutes.pdf', 'wb')
writer.write(output_file)

# Dont leave anything opened
output_file.close()
pdf1_file.close()
pdf2_file.close()