import os
import PyPDF2
# Working dir
os.chdir('c:\\users\\torre\\documents')
# Open in read-binary mode
pdf_file = open('meetingminutes1.pdf', 'rb')
reader = PyPDF2.PdfFileReader(pdf_file)
print(reader.numPages)
page = reader.getPage(0)
print(page.extractText())
# All text in doc
for page_num in range(reader.numPages):
print(reader.getPage(page_num).extractText())
pdf1_file = open('meetingminutes1.pdf', 'rb')
pdf2_file = open('meetingminutes2.pdf', 'rb')
# One reader for each file
reader1 = PyPDF2.PdfFileReader(pdf1_file)
reader2 = PyPDF2.PdfFileReader(pdf2_file)
# Loop through each and add the pages to a new doc
writer = PyPDF2.PdfFileWriter()
for page_num in range(reader1.numPages):
page = reader1.getPage(page_num)
writer.addPage(page)
for page_num in range(reader2.numPages):
page = reader2.getPage(page_num)
writer.addPage(page)
# Save the output file
output_file = open('combinedminutes.pdf', 'wb')
writer.write(output_file)
# Dont leave anything opened
output_file.close()
pdf1_file.close()
pdf2_file.close()