Albeiro-Dev
4/20/2020 - 3:51 PM

Extraite du texte avec Python

https://pdfminersix.readthedocs.io/en/latest/tutorials/highlevel.html


from pdfminer.layout import LAParams
from io import StringIO
from pdfminer.high_level import extract_text_to_fp
from shutil import copyfileobj

output_string = StringIO()
with open('/home/spiritusspei/Downloads/CV.pdf', 'rb') as fin:
    extract_text_to_fp(fin, output_string, laparams=LAParams(),output_type='html', codec=None)
    print(output_string.getvalue())
    with open('/home/spiritusspei/Downloads/CV.xml', 'w') as fd:
        output_string.seek(0)
        copyfileobj(output_string, fd)