jackrabbit-d
10/18/2017 - 2:05 PM

Index PDFs

PHP   Edit    Copy   Raw
<?php

// REQUIRES XPDF INSTALLED
// http://www.foolabs.com/xpdf/download.html
// also create pdf.txt with 777 permissions

include 'wp-load.php';

$args = array(
    'post_type' => 'attachment',
    'numberposts' => -1,
    'post_status' => null,
    'post_parent' => null, // any parent
    'post_mime_type' => 'application/pdf'
    ); 
$attachments = get_posts($args);
if ($attachments) {
    $c = 0;
    $s = 0;
    foreach ($attachments as $a) {
        if($a->post_content == ''){
            $f = str_replace('http://your-domain.com/','',$a->guid);
            shell_exec('/serverroot/path/to/xpdf/pdftotext ' . $f . ' pdf.txt');
            $mypdf = file_get_contents("pdf.txt");
            $mypdf = str_replace(' ','-',$mypdf);
            $mypdf = preg_replace('/[^A-Za-z0-9\-]/', '', $mypdf);
            $mypdf = str_replace('-',' ',$mypdf);
            if($mypdf == ''){
                echo "<b>Notice: </b>".$a->post_title." does not contain any readable text. Please enter text into attachment manually.<br/><br/>";
                $s++;
            }else{
                wp_update_post(array(
                    'ID' => $a->ID,
                    'post_content' => $mypdf
                ));
                $c++;
            }
        }else{
            $s++;
        }
    }
    echo "<b>{$c} PDFs successfully indexed.<br/>{$s} skipped.</b>";
}

?>