crazy4groovy
5/9/2014 - 1:20 PM

Casperjs example of how to login and scrape an AJAX page.

Casperjs example of how to login and scrape an AJAX page.

var casper = require('casper').create({
	verbose : true,
	logLevel : 'info'
});
var images = [];
var fs=require("fs")

/**
 * Configuration here
 */
var login_username = "username";
var login_password = "password";
var scraped_username = "username2";

/**
 * Everything starts here!
 * I use the mobile version of facebook as the DOM is waaay simpler to scrape.
 */
casper.start('http://m.facebook.com', function() {

	// The pretty HUGE viewport allows for roughly 1200 images.
	// If you need more you can either resize the viewport or scroll down the viewport to load more DOM (probably the best approach).
	this.viewport(2048,4096);

	this.fill('form#login_form', {
		'email': login_username,
		'pass':  login_password
	}, true);
});

casper.thenOpen("https://m.facebook.com/"+scraped_username+"?v=photos", function(){
	// We wait four seconds so that the page loaded (the lazy load is amazing for actual users but bots don't like it ;)
	this.wait(4000,function(){
		// Just to be sure we are on the correct page.
		this.capture("photo_index.png");

		// and then we fetch the images
		images = this.evaluate(function(){
			var images = document.querySelectorAll(".timeline.photos i.img");
			return Array.prototype.map.call(images,function(e){
				return e.style.backgroundImage.match(/url\((.*)\)/)[1].split("/").reverse()[0];
			});
		});
	});
});

casper.then(function(){
	// once done we write the images URLs to screen.
	// I'm still working on a proper way to download the images locally. Any idea?
	this.each(images, function(self, fname) {
		var url = "https://fbcdn-photos-a.akamaihd.net/hphotos-ak-ash4/s720x720/"+fname;
		this.echo(url);
	});
});

casper.run();