jshcrm
1/25/2018 - 8:35 PM

pdf_generator.py

"""
Usage:
    pdf = PDF(
        pages=[
            ('my_first_page.html', 'first_page_stylesheet.css'),
            ('my_second_page.html', ['second_page_stylesheet_1.css, 'second_page_stylesheet_1.css')
        ],
        output="app_directory/pdfs/my_pdf.pdf",
        context={
            'template_variable_1': template_variable_1,
            'template_variable_2': template_variable_2
        },
        base_stylesheets="base.css",
        header=('path_to_header.html', 'path_to_header_css.css'),
        footer=('path_to_footer.html', 'path_to_footer_css.css')
    )
    
    pdf.write_pdf()
"""

from weasyprint import HTML
from weasyprint.document import Document, DocumentMetadata

from django.template import loader


def get_page_body(boxes):
    for box in boxes:
        if box.element_tag == 'body':
            return box

        return get_page_body(box.all_children())

    
class PDF(object):

    def __init__(self, pages, output, context=None, base_stylesheets=None, header=None, footer=None):
        """
        pages -- a list of tuple of strings containing the path to the html and page specific stylesheets. 
            Styleshees can be string or list.
            
            ex. [('path_to_my_html.html', 'path_to_my_stylesheet.css'),
                ('path_to_my_html.html', ['path_to_my_first_stylesheet.css', 'path_to_the_second.css'])]
        
        output -- string path to output file the file to
        
        context -- django context dictionary
        
        base_stylesheets -- path to base stylesheets to include in all pages. can be a string path to a single
            stylesheet or a list of stylesheet paths.
            
        header -- tuple of path to html and path to stylesheet
        
        footer -- tuple of path to html and path to stylesheet
        """
        
        self.pages = pages
        self.output = output
        self.context = context or {}
        self.header = header
        self.footer = footer
        
        self.base_stylesheets = base_stylesheets or []
        if not isinstance(base_stylesheets, list):
            self.base_stylesheets = [self.base_stylesheets]

    def html(self, template):
        html_loader = loader.get_template(template)
        return html_loader.render(self.context)

    def pdf_page(self, template, stylesheets):
        html = self.html(template)
        html = HTML(string=html).render(stylesheets=stylesheets)
        return html.pages
    
    def add_content(self, pages, content):
        content_page = self.pdf_page(content[0], content[1])[0]
        content_body = get_page_body(content_page._page_box.all_children())
        content_body = content_page.copy_with_children(content_page.all_children())

        for i, page in enumerate(pages):
            if not i:
                continue

            page_body = get_page_body(page._page_box.all_children())
            page_body.children += page_body.all_children()

    def write_pdf(self):
        pages = []

        for page in self.pages:
            template, stylesheets = page

            stylesheets = stylesheets or []
            if not isinstance(stylesheets, list):
                stylesheets = [stylesheets]

            pages += [p for p in self.pdf_page(template, self.base_stylesheets + stylesheets)]
            
            if self.header is not None:
                add_content(pages, self.header)
            
            if self.footer is not None:
                add_content(pages, self.footer)
            
        document = Document(pages=pages, metadata=DocumentMetadata(), url_fetcher=None)
        document.write_pdf(self.output)