Scrape Google images search results
var readdir = require('recursive-readdir');
var fs = require('fs');
var co = require('co');
/*
Simple script that removes trailing "garbage" from detectable images' filenames
I.e. Google Image file downloads via `wget --no-check-certificate -nc -T 4 -t 4 -i images.txt`
*/
var dir = process.argv[2];
if (!dir) {
console.log('Please specify a directory to process - Aborting.');
return;
}
var img1 = /^.*\.(jpe?g|png|gif)$/i;
var img2 = /\.(jpe?g|png|gif)(.*)/i;
function renameFile(fileName) {
var matched = fileName.match(img2);
if (!matched) return;
var type = matched[1];
var cruft = matched[2];
let i = fileName.indexOf(cruft);
let goodName = fileName.substr(0, i);
// just make sure it's a unique name
i = goodName.indexOf(type);
goodName = goodName.substr(0, i) + '_.' + type;
fs.renameSync(fileName, goodName);
console.log(fileName, ' --> ', goodName);
}
co(function* init() {
var fileNames = yield readdir(dir);
fileNames.forEach(f => {
if (!img1.test(f)) renameFile(f);
});
});
!function(document) {
var d = document.createElement('div');
d.style.cssText='position: fixed; top: 1em; left: 1em; z-index: 1000; background-color: rgba(188,188,88,.5); padding: 1em; border-radius: 1em; max-height: 50%; max-width: 50%; overflow: scroll;'
d.ondblclick=d.remove;
document.body.appendChild(d);
var matches = [];
function eachEl(selector, cb) {
[].forEach.call(document.querySelectorAll(selector), cb);
}
var delay = +prompt('delay in ms', '250');
var i = 0;
eachEl('a.rg_l', function(a) {
setTimeout(eachBigImg.bind(this, a), (i++)*delay);
});
function eachBigImg(a) {
a.click();
setTimeout(eachEl.bind(this, 'div.irc_bg div.irc_rimask a', eachImg), delay/1.5 | 0);
}
function eachImg(a) {
var href = decodeURIComponent(a.href);
var match = href.match(/imgurl=([^\&]+)/);
//match = match?match[1]:'unmatched!! '+href;
if (!match || ~matches.indexOf(match[1])) return;
//console.log(match[1]);
matches.push(match[1]);
d.innerHTML += match[1]+'<br>';
}
}(document);