Scraping a JSON feed. source: https://packagist.org/packages/bcncommerce/json-stream
<?php
ini_set('display_errors', 1);
ini_set('display_startup_errors', 1);
error_reporting(E_ALL);
function formatBytes($bytes, $precision = 2) {
$units = array("b", "kb", "mb", "gb", "tb");
$bytes = max($bytes, 0);
$pow = floor(($bytes ? log($bytes) : 0) / log(1024));
$pow = min($pow, count($units) - 1);
$bytes /= (1 << (10 * $pow));
return round($bytes, $precision) . " " . $units[$pow];
}
require_once "vendor/autoload.php";
// uses json streamer to minimize memory usage
use Bcn\Component\Json\Reader;
use Bcn\Component\StreamWrapper\Stream;
// ==========================
// Get job data and save locally
// ==========================
$job_data_file = "job-data.json";
$job_url = "https://boards-api.greenhouse.io/v1/boards/[CLIENT_NAME]/jobs?content=true";
$file = fopen($job_data_file, "w+");
$job_data = file_get_contents($job_url);
$job_data = json_decode($job_data);
$job_data = json_encode($job_data, JSON_PRETTY_PRINT);
fputs($file, $job_data);
// ==========================
// Parse job file and enter into DB
// ==========================
$database_address = "localhost";
$database_name = "database";
$database_user = "root";
$database_pass = "root";
$db = new PDO("mysql:host=$database_address;dbname=$database_name", $database_user, $database_pass, array(
PDO::ATTR_ERRMODE => PDO::ERRMODE_EXCEPTION,
));
$table_name = "jobs";
// first check file isn't empty
if (filesize($job_data_file) > 0) {
// truncate table
$query = $db->prepare("TRUNCATE TABLE $table_name"); // remove all records from a table
$query->execute();
$query->closeCursor();
$fh = fopen($job_data_file, "r"); // open file
$reader = new Reader($fh);
$reader->enter(Reader::TYPE_OBJECT);
$reader->enter("jobs", Reader::TYPE_ARRAY);
while ($job = $reader->read()) {
// echo "<pre>";
// echo var_dump($job);
// echo "</pre>";
// exit();
$job = (object) $job;
$job_id = (int) $job->id;
$job_title = $job->title;
$location = $job->location["name"];
$url = $job->absolute_url;
$department = $job->departments[0]["name"];
$updated_at = $job->updated_at;
// insert into db
$query = $db->prepare("INSERT INTO $table_name (`job_id`, `title`, `location`, `job_url`, `department`, `updated_at`) VALUES ($job_id, :job_title, :location, :url, :department, :updated_at)");
$params = [
":job_title" => $job_title,
":location" => $location,
":url" => $url,
":department" => $department,
":updated_at" => $updated_at
];
$query->execute($params);
$query->closeCursor();
}
$reader->leave();
$reader->leave();
fclose($fh);
} else {
// TODO:
// create email alert
}
echo formatBytes(memory_get_usage()) . "<br>";
SET SQL_MODE = "NO_AUTO_VALUE_ON_ZERO";
SET time_zone = "+00:00";
CREATE TABLE `jobs` (
`id` int(11) NOT NULL,
`job_id` int(11) DEFAULT NULL,
`title` text,
`location` text,
`job_url` text,
`department` text,
`updated_at` timestamp NOT NULL DEFAULT CURRENT_TIMESTAMP ON UPDATE CURRENT_TIMESTAMP
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
ALTER TABLE `jobs`
ADD PRIMARY KEY (`id`);
ALTER TABLE `jobs`
MODIFY `id` int(11) NOT NULL AUTO_INCREMENT;