LM Image batch downloader
var fs = require('fs');
var process = require('process');
var zlib = require('zlib');
var request = require('request');
var async = require('async');
var fileType = require('file-type');
var sqlite = require('sqlite3');
function imgurl(id) {
return 'https://message.learnmode.net:5443/image/' + id + '?size=X';
}
function formatHash(hash) {
if (!hash) return '';
return hash.substr(0, 6) + '~' + hash.substr(hash.length - 4, 4);
}
var stdout = process.stdout;
var imglist = fs.readFileSync('imglist.txt', {encoding: 'utf-8'}).split('\n');
var db = new sqlite.Database('imgdb.sqlite', sqlite.OPEN_READWRITE);
var stmt_int = db.prepare('INSERT INTO image (hash, size, content, date, format) VALUES (?,?,?,?,?)');
var cnt = 0;
async.waterfall([
function(async_next) {
db.get('SELECT hash FROM image ORDER BY hash desc', function(err, row) {
if (err)
throw new Error(err);
else if (!row) {
console.log('No previous data available. Fetching from start...');
async_next(null, undefined);
} else
async_next(null, row.hash);
});
}, function(prevHash, async_next) {
var idx = prevHash ? imglist.indexOf(prevHash) : 0;
if (prevHash) {
console.log('Fetching from #' + idx + ': ' + formatHash(prevHash) + '...');
}
async_next(null, idx);
}, function(idx, async_next) {
// maybe slow here...
imglist = imglist.slice(idx);
var work_counter = 0;
async.eachLimit(imglist, 20, function(v, callback) {
work_counter++;
async.waterfall([
function(async_next) {
request.get(imgurl(v), {encoding: null}, function(err, resp, body) {
if (err) return async_next(err);
async_next(null, resp, body);
});
}, function(resp, body, async_next) {
zlib.gzip(body, function(err, buffer) {
if (err) return async_next(err);
async_next(null, resp, body, buffer);
});
}, function(resp, body, body_zipped, async_next) {
var mime = fileType(body).mime;
stmt_int.run([
v,
body.length,
body_zipped,
new Date(resp.headers['last-modified']).toJSON(),
mime
], function(err) {
if (err) {
if (err.code == 'SQLITE_CONSTRAINT')
async_next(null, null);
else return async_next(err);
}
async_next(null, body, body_zipped, mime);
});
}], function(err, body, body_zipped, mime) {
work_counter--;
cnt++;
if (err) {
console.log(err);
throw new Error(err);
} else if (!body) {
// skipped
console.log(
formatHash(v), '=== SKIPPED ==='
);
return;
}
console.log(
formatHash(v),
'[' + mime + ']',
body.length,
body_zipped.length,
'cnt=' + cnt,
'workers=' + work_counter
);
callback(null);
});
});
}], function(err, result) {
// OAO
});