Kcko
11/1/2019 - 3:13 PM

Parsing og tags

<?php 


function autoUTF($s)
{
    if (preg_match('#[\x80-\x{1FF}\x{2000}-\x{3FFF}]#u', $s)) // detect UTF-8
    {
        return $s;
    }
    elseif (preg_match('#[\x7F-\x9F\xBC]#', $s)) // detect WINDOWS-1250
    {
        return iconv('WINDOWS-1250', 'UTF-8', $s);
    }
    else // assume ISO-8859-2
    {
        return iconv('ISO-8859-2', 'UTF-8', $s);
    }
}
 

$url = 'https://navratdoreality.cz/uzdraveny-fotbalista-8853.html';

$html = file_get_contents($url);

libxml_use_internal_errors(true); // Yeah if you are so worried about using @ with warnings
$doc = new DomDocument();
$doc->loadHTML($html);
$xpath = new DOMXPath($doc);
$query = '//*/meta[starts-with(@property, \'og:\')]';
$metas = $xpath->query($query);
$rmetas = array();
foreach ($metas as $meta) {
    $property = $meta->getAttribute('property');
    $content = $meta->getAttribute('content');
    $rmetas[$property] = html_entity_decode(autoUTF($content));
}
var_dump($rmetas);



// $pattern='/<\s*meta\s+property="og:([^"]+)"\s+content="([^"]*)/i';
// preg_match_all($pattern, $html, $out);
// var_dump(array_combine($out[1], $out[2]));