m4k3r-net
9/28/2018 - 2:23 AM

PHP remote file/URL cache

PHP remote file/URL cache

<?php
/*

    A simple cache for remote files/URLs
    2011 Carlo Alberto Ferraris <cafxx@strayorange.com>
    ===================================================

    The cache function allows you to get the contents of a remote URL at
    most once every N seconds. It also supports the If-Modified-Since HTTP
    header to avoid fetching the resource again if it hasn't changed.

    The url_get_contents is a convenience wrapper around file_get_contents
    and cache.
    
    Usage
    -----

    To make sure of requesting a certain $url at most every $seconds:
        $url_or_filename = cache($url, $seconds);
    
    so, instead of e.g.
        file_get_contents("http://www.example.com");
    you can write, to request the URL at most once per minute (60 seconds):
        file_get_contents(cache("http://www.example.com", 60));
    or, alternatively,
        url_get_contents("http://www.example.com");
        url_get_contents("http://www.example.com", 60);
        
    The value returned by cache can be either a full path to a local file
    (cached) or the requested URL (in case something failed).
    The value returned by url_get_contents is the one returned by a call
    to file_get_contents.

*/

define('CACHE_PREFIX', 'cache_');
define('CACHE_DIR', sys_get_temp_dir());

function cache($url, $min_expiration) {
    $min_expiration = max( intval( $min_expiration ), 1 );
    $cache_key = sha256($url) or return $url;
    $cache_file_name = CACHE_PREFIX . $cache_key;
    $cache_file_fullpath = CACHE_DIR . '/' . $cache_file;
    $cache_file_mtime = @filemtime($cache_file_fullpath);

    if ($cache_file_mtime && $cache_file_mtime >= time() - $min_expiration) {
        // we have a cached version less than $min_expiration seconds old - return it
        return $cache_file_fullpath;
    }

    $url_or_file = $cache_file_mtime ? $cache_file_fullpath : $url;
    
    // try to acquire a lock for the url: if it fails either return the stale cached
    // version (if available), the original url otherwise
    $lockn = $cache_file_fullpath . '.lock';
    $lockp = @fopen($lockn, 'w+') or return $url_or_file;
    @flock($lockp, LOCK_EX|LOCK_NB) or return $url_or_file; 
    
    // we have our lock, now fetch the url and save it in a temporary file
    $fn = @tempnam(CACHE_DIR) or return $url_or_file;
    $fp = @fopen($fn, 'w+b') or return $url_or_file;
    
    $c = @curl_init() or return $url_or_file;
    @curl_setopt($c, CURLOPT_URL, $url) or return $url_or_file;
    @curl_setopt($c, CURLOPT_FILE, $fp) or return $url_or_file;
    @curl_setopt($c, CURLOPT_FOLLOWLOCATION, true) or return $url_or_file;
    @curl_setopt($c, CURLOPT_AUTOREFERER, true) or return $url_or_file;
    @curl_setopt($c, CURLOPT_MAXREDIRS, 5) or return $url_or_file;
    if ($cache_file_mtime)
        @curl_setopt($c, CURLOPT_TIMEVALUE, $cache_file_mtime);
    @curl_exec($c) or return $url_or_file;
    $cs = intval(@curl_getinfo($c, CURLINFO_HTTP_CODE));
    @curl_close($c);
    
    // we finished fetching the url: if it ended succesfully, delete the old cached
    // version and replace it with the new one
    if ($cs >= 200 && $cs < 300) {
        @unlink($cache_file_fullpath) or return $url;
        @link($fn, $cache_file_fullpath) or return $url;
    }

    // release the lock, close and remove the temporary files
    @fclose($fp);
    @fclose($lockp);
    @unlink($fn);
    @unlink($lockn);
    
    return $cache_file_fullpath;
}

function url_get_contents($url, $min_expiration=60) {
    return file_get_contents( cache( $url, $min_expiration ) );
}

?>