donfanning
8/15/2018 - 2:16 PM

PHP: Codeigniter: IP Geo Scraper

PHP: Codeigniter: IP Geo Scraper

// application/library/Scraper.php
<?php  if ( ! defined('BASEPATH')) exit('No direct script access allowed');

/**
 * CodeIgniter Scraper Library
 *
 * A library to provide basic XPath scraping support
 *
 * @package        CodeIgniter
 * @author        Kyle J. Dye | www.kyledye.com | kyle@kyledye.com
 * @copyright    Copyright (c) 2010, Kyle J. Dye.
 * @license        http://codeigniter.com/user_guide/license.html
 * @link            http://kyledye.com
 * @version        Version 0.1
 */

class Scraper {

    var $CI;
    var $url;
    var $old_setting;
    var $html;
    var $xpath;
    var $raw_file;
    var $elements;

    /**
     *    Construct function
     *
     *  Initializes core functionality, clears previous XML errors, etc.
     *  Loads the URL helper for assistance in prepping URLS
     */

    function __construct() {
        $this->CI =& get_instance();
        $this->CI->load->helper('url');
        $this->old_setting = libxml_use_internal_errors(true);
        libxml_clear_errors();
        $this->html = new DOMDocument();
    }

    /**
     *  Capture DOM function
     *
     *  Initial function that captures the url or from the contents
     *  of a file (usually uses the file_get_contents() function)
     *
     */

    function capture_dom($url = "", $raw_file = null) {
        if(empty($url)):
            if(empty($raw_file))
                return(FALSE);
            $this->raw_file = $raw_file;
            $this->html->loadHtml($this->raw_file);
            $this->xpath = new DOMXPath($this->html);
            return(TRUE);
        else:
            $url = prep_url($url);
            $this->url = $url;
            $this->html->loadHtmlFile($this->url);
            $this->xpath = new DOMXPath($this->html);
            return(TRUE);
        endif;
    }


    /**
     *    Find function
     *
     *  Provides a primative interface to capture values from DOM.
     *  Allows for two different styles of queries, examples below.
     *
     *  Example One: A flat query
     *  A typical query where you want to simply capture a single value
     *    Example usage:
     *
     *  $page_title = $this->scraper->find(array('name' => 'results', 'query' => '//title'));
     *
     *  @returns: $page_title = array('name' => 'This is an example website title');
     *
     *
     *  A more complex query can have subqueries (only one level is supported in this version)
     *    For this example, imagine I am scraping a table with many rows and 3 columns (first name,
     *    last_name, and an email link with a mailto href)
     *
     *  Complex Example:
     *
     *  $table_rows = $this->scraper->find(array(
     *        'name' => 'rows', // optional - defaults to 'results'
     *        'query' => '/table[@id="mytable"]/tbody//tr', //required
     *        'subqueries' => array( // optional - but requires associative array for ease of use
     *            'first_name' => '//td[1]',
     *            'last_name' => '//td[2]',
     *            'email' => '//td[3]/a/@href'
     *        )
     *  ));
     *
     *    @returns: $table_rows = array(
     *        'rows' => array(
     *            array('first_name' => 'Kyle', 'last_name' => 'Dye', 'email' => 'mailto:example@example.com'),
     *            array('first_name' => 'Joe', 'last_name' => 'Schmoe', 'email' => 'mailto:tacobellhoe@runsfromtheboarder.com'),
     *            É and so on É
     *    )
     *  );
     *
     */

    function find($xpaths = array()) {

            if(!isset($xpaths['query']))
                return(FALSE);
            if(!isset($xpaths['name']))
                $master_name = "results";
            else
                $master_name = $xpaths['name'];

            if(!isset($xpaths['subqueries'])):
                return(array($master_name => $this->xpath->query($xpaths['query'])->item(0)->nodeValue));
            endif;

            $returns = array();
            $items = $this->xpath->query($xpaths['query']);

            if($items->length == 0)
                return(FALSE);

            $inc = 0;

            // foreach($items as $item):

            //     $tmp_dom = new DOMDocument;
            //     $tmp_dom->appendChild($tmp_dom->importNode($item, true));
            //     $xpath = new DOMXPath($tmp_dom);
            //     foreach($xpaths['subqueries'] as $sq_label => $sq):
            //         $returns[$inc][$sq_label] = trim($xpath->query($sq)->item(0)->nodeValue);
            //     endforeach;

            //     $inc++;

            // endforeach;

            return(array($master_name => $items));
    }

    /**
     *    Cleanup Function
     *
     *    Should be run after querying to clear out XML errors and to have tidy code :)
     */

    public function cleanup() {
        libxml_clear_errors();
        libxml_use_internal_errors($this->old_setting);
    }

}

?>


// Controller/method
public function geo()
	{
		$this->load->library('scraper');
		$ip = '174.79.250.40'; //$this->session->userdata('ip_address');
		$url = "http://www.ipinfodb.com/ip_locator.php?ip=$ip";

		$this->scraper->capture_dom($url);
		$scrape = $this->scraper->find(array(
					'name' => 'results',
					'query' => '//div[@class="section"]/ul',
				));

		// Clean Data
		$wordlist = array("Country", "State/Province", "City", "Zip or postal code", "Latitude", "Longitude", "Timezone", " ");
		foreach($wordlist as $word)
		    $scrape = str_replace($word, "", $scrape);
		$scrape = explode(':', $scrape['results']);

		$geo_data['ip_address'] = $scrape[1];
		$geo_data['country'] 	= $scrape[2];
		$geo_data['state'] 		= $scrape[3];
		$geo_data['city']	 	= $scrape[4];
		$geo_data['zip'] 		= $scrape[5];
		$geo_data['latitude'] 	= $scrape[6];
		$geo_data['longitude'] 	= $scrape[7];

		 //echo '<pre>';
		 //print_r($geo_data);
	}