pranayaryal
3/14/2016 - 1:19 AM

webscraper

<?php
namespace Scraper;

use Model\Gender;

class Mauijim extends Base
{
    protected $site_url = "http://www.mauijim.com";
    protected $test_mode = true;

    public function setup()
    {
        parent::setup();
    }

    protected function getUrls()
    {
        return [
            Gender::GENDER_MENS => $this->site_url . "/en/Shop/Sunglasses/c/Sunglasses/results?q=%3Aname-asc%3Agender-gender%3AMen&page=",
            Gender::GENDER_LADIES => $this->site_url . "/en/Shop/Sunglasses/c/Sunglasses/results?q=%3Aname-asc%3Agender-gender%3AWomen&page=",
        ];
    }

    protected function setBrands()
    {
        $this->brands = [
            'MJ' => ['id' => 3]
        ];
    }

    protected function doLogin()
    {
        return true;
    }

    /**
     * scrapes each frame for detail
     * @param int $gender , brand $brand
     * @return array $frame_name
     */
    protected function scrapeBrandFrames($brand, $key)
    {

        $all_frames = $this->combineGenders();

        $frame = [];

        foreach ($all_frames as $gender => $hrefs) {

            foreach ($hrefs as $k => $v) {
                $this->loadPage($this->site_url . $v);

                if ($this->hasErrorPageClass()) {
                    continue;
                }

                $frame["source_name"] = 'MJ';
                $frame["source_key"] = $this->getSourceKey();
                $frame["name"] = $this->html->find('h2', 0)->innertext;
                $frame["brand_id"] = $brand['id'];
                $frame["gender_id"] = $gender;
                $frame["type_id"] = 2;
                $frame["directory"] = $brand['directory'];
                $frame["url_key"] = $this->constructFrameUrl($frame, $brand['url_key'], $frame['gender_id']);
                $frame["styles"] = $this->buildStylesArray();
                $frame["rxable"] = $this->isRxable();

                $this->persistFrameData($frame);

                $this->frames_total++;
                $this->frames_processed++;
            }
        }
    }

    protected function hasErrorPageClass()
    {
        $error_page = $this->html->find(".error-page", 0);
        return !!$error_page;
    }

    protected function getSourceKey()
    {
        $pr_code = $this->html->find(".pr-code", 0);

        if (!$pr_code) {
            throw new FatalException("Could not extract source key");
        }

        return $this->extractSourceKey($pr_code->innertext);
    }

    protected function extractSourceKey($product_code)
    {
        if (!strpos($product_code, '-') || !preg_match("#\\d+#", $product_code, $matches)) {
            throw new FatalException("Product code is not in the expected format");
        }

        return $matches[0];
    }

    /**
     * @return bool
     */
    protected function isRxable()
    {
        $pr_avail = $this->html->find(".pr-avail", 0);
        return !!$pr_avail;
    }

    public function getFrameHrefs($gender_code)
    {
        $links = [];

        for ($i = 0; $i <= 8; $i++) {

            $this->loadPage($this->getUrls()[$gender_code] . $i);


            foreach ($this->html->find('a.product-title') as $atag) {


                $links[] = $atag->href;

            }
        }

        return $links;


    }

    public function getFrameDetails($gender)
    {

        $frame = [];


        foreach ($gender as $f) {

            $page = $this->loadPage($this->site_url . $f);


            $frame["name"] = $this->html->find('h2', 0)->innertext;

            $frame["styles"] = $this->buildStylesArray();


        }

        return $frame;
    }


    public function buildStylesArray()
    {
        //this gets json data of frame details
        $data = $this->html->find('a.fcw-itemlnk');

        $style = [];


        foreach ($data as $d) {
            $frame_details = json_decode($d->getAttribute('data-fcwvariants'), true);

            $msrp = preg_replace('/\$/', '', $frame_details['price']);

            if ($this->isReplacementLens($frame_details['productCode'])) {
                continue;
            }

            $style[] = [
                'color' => $frame_details['frame'],
                'msrp' => floatval($msrp),
                'wholesale' => 0,
                'image_path' => $this->site_url . $frame_details['productZoomImage'],
                'image_code' => $this->extractImageCode($frame_details['productZoomImage']),
                'size' => $this->constructSize(),
                'size_vertical' => $this->extractSizeVertical(),
            ];

        }

        return $style;
    }

    protected function isReplacementLens($product_code)
    {
        return stripos($product_code, 'AL-') === 0;
    }

    protected function extractImageCode($image_url)
    {
        $parts = parse_url($image_url);
        $pathinfo = pathinfo($parts['path']);
        return $pathinfo['filename'];
    }

    protected function constructSize()
    {
        $width = round($this->html->find('span.hchart-number', 0)->innertext);
        $bridge = round($this->html->find('span.hchart-number', 1)->innertext);
        $temple = round($this->html->find('span.hchart-number', 5)->innertext);
        return $width . "-" . $bridge . "-" . $temple;
    }

    protected function extractSizeVertical()
    {
        return round($this->html->find('span.hchart-number', 3)->innertext, 2);
    }

    /**
     * @return array
     */
    public function getFrames()
    {
        return $this->frames;
    }

    public function combineGenders()
    {
        $female_and_unisex = $this->getFrameHrefs(Gender::GENDER_LADIES);
        $male_and_unisex = $this->getFrameHrefs(Gender::GENDER_MENS);
        $male = array_diff($male_and_unisex, $female_and_unisex);
        $female = array_diff($female_and_unisex, $male_and_unisex);
        $unisex = array_intersect($male_and_unisex, $female_and_unisex);


        $all_frames = [
            Gender::GENDER_MENS => $male,
            Gender::GENDER_LADIES => $female,
            Gender::GENDER_UNISEX => $unisex,
        ];

        return $all_frames;
    }


    public function getUnisex()
    {
        return $all_hrefs = array_merge($this->getFrameHrefs(Gender::GENDER_MENS), $this->getFrameHrefs(Gender::GENDER_LADIES));
//
//        $unisex=[];
//
//        foreach ($all_hrefs as $k=>$hrefs) {
//
//            $sunglasses = preg_match('/sunglasses/', $hrefs, $matches);
//            $new_entries = preg_match('/new-arrivals/', $hrefs, $matches);
//
//            if ($sunglasses or $new_entries) {
//                $unisex[]=$hrefs;
//            }
//        }
//
//       return $unisex;
    }


}