Crawl facebook feeds in nodejs with node-simplecrawler
var Crawler = require("simplecrawler");
var Url = require("url");
var target = "https://graph.facebook.com/ledzeppelin/feed?access_token=1597581200507009%7Ce749be55ea86249f92ae56b081c37b38&fields=from%2Cmessage%2Ccreated_time%2Ctype%2Clink%2Ccomments.summary(true)%2Clikes.summary(true)%2Cshares&since=2016-07-11&until=2016-07-14&limit=10";
var url = Url.parse(target);
var crawler = new Crawler(url.host);
crawler.initialPath = url.path;
crawler.initialPort = 443;
crawler.initialProtocol = "https";
crawler.maxConcurrency = 1;
crawler.maxDepth = 2;
crawler.stripQuerystring = false;
crawler.decodeResponses = true;
crawler.supportedMimeTypes.push(/^application\/json/i);
crawler.on("fetchcomplete", function(queueItem, buffer, response) {
console.log("Completed fetching resource:", queueItem.url);
node.send({payload: "Completed fetching resource: " + queueItem.url});
node.send({payload: buffer});
});
crawler.on("fetcherror", function(queueItem, response) {
console.log("Error fetching resource:", queueItem.url);
//console.log(queueItem);
node.send({payload: {"error" : queueItem}});
});
crawler.discoverResources = function(buffer, queueItem) {
try {
var data = JSON.parse(buffer);
if (data.paging && data.paging.next) {
return [data.paging.next];
}
}
catch (e) {
console.log(e);
}
return [];
};
crawler.start();
console.log("start crawling...");