tpai
11/10/2014 - 10:57 AM

A web crawler sample based on nightwatch.

A web crawler sample based on nightwatch.

Instruction

  1. download poe.js, package.json and latest version of selenium server into the same folder

  2. install require modules

$ npm install
  1. copy examples/ and lib/ at current folder
$ cp node_modules/nightwatch/examples .
$ cp node_modules/nightwatch/lib .
  1. create tests/ folder
$ mkdir tests
  1. modify the acc and pwd variable in poe.js, and move it to tests/ folder.
$ mv poe.js tests/
  1. launch selenium server (must install firefox)
$ java -jar selenium-server-standalone-{VERSION}.jar
  1. run command below
$ nightwatch --test tests/poe
module.exports = {
	tags: ['poe'],
	'Get POE Reward Images' : function (client) {

		var cheerio = require("cheerio") // easy to parse HTML
		var acc = "<your-account>"
		var pwd = "<your-password>"

		client
			.url('http://web.poe.garena.tw/login')
			.waitForElementVisible('body', 1000)
			.assert.title('Garena')
			.assert.visible('#sso_login_form_account')
			.setValue('#sso_login_form_account', acc)
			.setValue('#sso_login_form_password', pwd)
			.waitForElementVisible('#confirm-btn', 1000)
			.click('#confirm-btn')
			.pause(2000)
			.assert.visible('div.tab-links')
			.url("http://web.poe.garena.tw/account/view-profile/"+acc+"/events")
			.source(function(result) { // .source() will dump the target page into text format
				$ = cheerio.load(result.value) // so it needs to be parse
				var images = $("div.reward img")
				for(var i=0;i<images.length;i++) {
					console.log($(images[i]).attr("src"));
				}
			})
			.end();
	}
};
{
  "name": "nightwatch_webcrawler",
  "description": "A web crawler sample based on nightwatch.",
  "version": "0.0.1",
  "author": {
    "name": "tonypai",
    "email": "tony77794@gmail.com"
  },
  "homepage": "http://github.com/tpai",
  "dependencies": {
    "nightwatch": ">=0.5.33",
    "cheerio": ">=0.18.0"
  }
}