js web crawler
var offset = 0;
var html;
var data;
var stopflag = false;
function loadNext() {
console.log('offset = ', offset);
var url = "http://website/ru/search.phtml?ia=M&lf=F&af=45&at=47&t=z&s_c=3159_4312_0_0&form=1&offset=";
url += offset;
html.innerHTML = xhr(url);
if (html.innerHTML == undefined) {
console.info("This is the end");
stopflag = true;
}
count = selectNames(html);
offset += count;
}
function xhr(url) {
var xhr = new XMLHttpRequest();
xhr.open("GET", url, false);
xhr.setRequestHeader("X-Requested-With", "XMLHttpRequest");
xhr.send();
if (xhr.status != 200) {
// обработать ошибку
// alert( xhr.status + ': ' + xhr.statusText ); // пример вывода: 404: Not Found
console.info(xhr.status + ': ' + xhr.statusText + 'url = ', url);
} else {
// вывести результат
return xhr.responseText;
}
}
function getPosition(str, m, i) {
return str.split(m, i).join(m).length;
}
function selectNames(html) {
var regex = /data: {"profile":.*"name":"(.*)","age".*"login":"(.*)","state/g
match = regex.exec(html.innerHTML);
var lowername;
var count = 0;
while (match != null) {
lowername = match[1].toLowerCase();
if (lowername.startsWith('та') || lowername.startsWith('ta'))
console.log(match[1], 'http://www.mamba.ru/' + match[2]);
match = regex.exec(html.innerHTML);
++count;
}
return count;
}
weber = document.querySelector('section.weber');
if (weber == null) {
html = document.createElement('section');
html.setAttribute('class', 'weber');
document.body.appendChild(html);
data = document.createElement('section');
data.setAttribute('class', 'firstName');
document.body.appendChild(data);
}
while (stopflag == false) {
loadNext();
}