iamsk
1/19/2019 - 6:59 AM

pdf_signed_info.py

#!-*- coding:utf-8 -*-

import re

from pdfminer.pdfparser import PDFParser
from pdfminer.pdfdocument import PDFDocument
from pdfminer.pdftypes import resolve1
from pdfrw import PdfReader


class PDFSignedInfo(object):
    def use_pdfminer(self, filename):
        fp = open(filename, 'rb')
        parser = PDFParser(fp)
        doc = PDFDocument(parser)
        fields = resolve1(doc.catalog['AcroForm'])['Fields']
        signed_main_body_list = []
        for i in fields:
            field = resolve1(i)
            name, value = field.get('T'), field.get('V')
            # print '{0}: {1}'.format(name, value)
            c = value.resolve()['Contents']
            signed_main_body_list.append(self.get_main_body_name(c))
        return signed_main_body_list

    def get_main_body(self, filename):
        x = PdfReader(filename)
        fields = x.Root.AcroForm.Fields if x.Root.AcroForm else []
        signed_main_body_list = []
        for field in fields:
            contents = field.V.Contents
            data = contents.to_bytes()
            signed_main_body_list.append(self.get_main_body_name(data))
        return signed_main_body_list

    @classmethod
    def get_main_body_name(cls, data):
        obj = re.search(r'[@&]([^@&]*公司)', data)
        company = obj and obj.group() and obj.group(1)
        if not company:
            obj = re.search(r'@([^@&]*)@', data)
            user = obj and obj.group() and obj.group(1)
            return user
        return company


if __name__ == '__main__':
    p = PDFSignedInfo()
    # print p.use_pdfminer('a.pdf')
    print p.get_main_body('c.pdf')