tidusx18
3/13/2018 - 7:59 PM

PDF Parser

const util = require('util');
const fs = require('fs');
const pdf = require('pdf-parse');

let dataBuffer = fs.readFileSync('./sample.pdf');

pdf(dataBuffer).then( data => {

    let regex = {
        student: /re: *accommodations {1,4} *for *(.*);/i,
        course: /Class: *([\S]{3}) *(\d{4}) *Section: *(\S{3})/i,
        accommodations: /•.*(?= ADA)/i
    }

    let text = data.text;
    let results = {};

    let studentName = text.match( regex.student );
    let course = text.match( regex.course );
    let accommodations = text.match( regex.accommodations )[0].split('•');

    console.log(`Student: ${studentName[1]}`);
    results.name = studentName;

    console.log(`Course: ${course[1]}${course[2]} ${course[1]}`);
    results.course = course;

    results.accommodations = [];

    for(let accommodation of accommodations) {

        if( !(/[a-b]/i.test(accommodation)) ) { continue; }

        // console.log(`DRC Accommodation: ${accommodation.trim()}`);
        results.accommodations.push(accommodation);
    }

    console.log( util.inspect(accommodations, { depth: null, colors: true }) );
});