Bernardstanislas
6/10/2015 - 6:39 AM

OMDB scrapper

OMDB scrapper

var http = require('http');
var fs = require('fs');

var runningSockets = 1000;
var maxId = 10000000;
var filename = 'dump';

if (http.globalAgent.maxSockets < runningSockets) {
    http.globalAgent.maxSockets = runningSockets;
}

var file_stream = fs.createWriteStream(__dirname + '/' + filename, {flags : 'w'});


function createRequest(index) {
	if (index % (maxId / 100) === 0) {
		console.log((index / (maxId / 100)) + '% done');
	}
	if (index >= maxId) {
		process.exit(0);
	} else {
		var req = http.request({
			host: 'www.omdbapi.com',
			port: 80,
			path: '/?i=tt' + pad(index, 7) + '&plot=short&r=json',
			method: 'GET'
		}, function(res) {
			var msg = '';
			res.setEncoding('utf8');
			res.on('data', function(chunk) {
				msg += chunk;
			});
			res.on('error', function() {
				createRequest(index + runningSockets);
			});
			res.on('end', function() {
				if (msg.slice(0,1) == '{') {
					file_stream.write(msg + '\n');
				}
				createRequest(index + runningSockets);
			});
		});
		req.end();
	}
}
for (j = 0; j < runningSockets; j++) {
	createRequest(j);
}

function pad(num, size) {
	var s = "000000000" + num;
	return s.substr(s.length-size);
}