RPeraltaJr
11/6/2018 - 4:40 PM

XML Job Scrape

Parse XML data and insert into MySQL database using PHP

<?php

# Scrape an XML Feed and dump results into a database

# set headers
// ini_set('display_errors', 1);
// ini_set('display_startup_errors', 1);
// error_reporting(E_ALL);
header('Content-Type: text/xml');


# connect to database
include('db.php');


# XML feed
$url = "https://www.linkup.com/xmlFeed.php?access=";


# declare and truncate existing table
$table_name = "jobs";
$sql = "TRUNCATE TABLE $table_name";
if($db->query($sql)) {
  // echo "table cleaned<br>";
}


# get xml content
$xmlstr = file_get_contents($url);
$xmlcont = new SimpleXMLElement($xmlstr);
// foreach($xmlcont as $job) {
//   echo "{$job->company} - {$job->title} - {$job->city} - {$job->state} \r\n";
// }


# log activity file
date_default_timezone_set("America/New_York");
$file_date = date('Y-m');
$timestamp = date('Y-m-d h:m:s');
$log_file = "logs/{$file_date}_log";


# insert into database
if( !empty($xmlstr) || $xmlstr !== false ):

  $job_counter = 0;
  foreach($xmlcont as $job):

    // METHOD #1: ALL JOBS
    $stmt = $db->prepare( "INSERT INTO $table_name (job_id, company, title, city, state, description, country, url, zip, email, facility, recruiter, shift, type, last_updated, posted) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ");
    $stmt->execute( array( $job->jobid, $job->company, $job->title, $job->city, $job->state, $job->rawdesc, $job->country, $job->url, $job->zip, $job->email, $job->faciltiy, $job->recruiter, $job->shift, $job->type, $job->lastmodifieddate, $job->pubdate ) );
    $job_counter++;

    // // METHOD #2: BY UNIQUE IDS
    // // run query to get count
    // $stmt = $db->prepare("SELECT job_id FROM $table_name WHERE job_id=?");
    // $stmt->execute(array($job->jobid));
    // $row_count = $stmt->rowCount();

    // // if count == 0
    // if( $row_count == 0 ):
    //   $stmt = $db->prepare( "INSERT INTO $table_name (job_id, company, title, city, state, description, country, url, zip, email, facility, recruiter, shift, type, last_updated, posted) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) ");
    //   $stmt->execute( array( $job->jobid, $job->company, $job->title, $job->city, $job->state, $job->rawdesc, $job->country, $job->url, $job->zip, $job->email, $job->faciltiy, $job->recruiter, $job->shift, $job->type, $job->lastmodifieddate, $job->pubdate ) );
    //   $job_counter++;
    // endif;

  endforeach;
  
  echo "$job_counter jobs added!<br>";
  $log = "$job_counter jobs added on " . date('Y-m-d h:m:s') . "\r\n";
  file_put_contents($log_file, $log, FILE_APPEND);

else:

  echo "Feed is empty. No jobs added!";
  $log = "Feed is empty on " . date('Y-m-d h:m:s') . "\r\n";
  file_put_contents($log_file, $log, FILE_APPEND);

endif;
<?php

// * set headers
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);

// header('Content-Type: text/xml');

// * connect to database
include('db.php');

// * XML feed
$url = "https://recruiting.adp.com/srccsh/public/ws_req_feed.guid?c=1090041&d=ExternalDoNotUse&f=???&o=true";

// * declare and truncate existing table
$table_name = "jobs";
$sql = "TRUNCATE TABLE $table_name";
if($db->query($sql)) {
  // echo "table cleaned<br>";
}

// * get XML contents
$xmlstr = file_get_contents($url);
$xml = simplexml_load_string($xmlstr, 'SimpleXMLElement', LIBXML_NOCDATA);
$jobs_array = json_decode(json_encode((array)$xml), TRUE);
$jobs = $jobs_array['JOB'];
// foreach($jobs as $job):
//     echo $job['JOBTITLE'];
//     echo "<br><br>";
// endforeach;

// * log activity file
date_default_timezone_set("America/New_York");
$file_date = date('Y-m');
$timestamp = date('Y-m-d h:m:s');
$log_file = "logs/{$file_date}_log";

// * insert into database
if( !empty($xmlstr) || $xmlstr !== false ):

  $job_counter = 0;
  foreach($jobs as $job):

    $stmt = $db->prepare( "INSERT INTO $table_name (req_id, title, category, city, state, location, link) VALUES (?, ?, ?, ?, ?, ?, ?) ");
    $stmt->execute([
        $job['REQNUMBER'], 
        $job['JOBTITLE'],
        "N/A",
        $job['LOCATIONCITY'], 
        $job['LOCATIONSTATE'], 
        $job['JOBLOCATION'], 
        $job['JOBLINK']
    ]);
    $job_counter++;

  endforeach;
  
  echo "$job_counter jobs added!<br>";
  $log = "$job_counter jobs added on " . date('Y-m-d h:m:s') . "\r\n";
  file_put_contents($log_file, $log, FILE_APPEND);

else:

  echo "Feed is empty. No jobs added!";
  $log = "Feed is empty on " . date('Y-m-d h:m:s') . "\r\n";
  file_put_contents($log_file, $log, FILE_APPEND);

endif;