adam-acosta
2/14/2018 - 4:49 PM

website scraper

full entire complete website scraper with more features, easy to use library; non js scraper

/*

https://github.com/website-scraper/node-website-scraper
https://www.npmjs.com/package/website-scraper-2
https://scraper.nepochataya.pp.ua/

npm install website-scraper
*/

var scraper = require('website-scraper');
scraper({
  urls: [
    'http://rsworldpi.com/'
  ],
  recursive: true,
  maxRecursiveDepth: 1,
  filenameGenerator: 'bySiteStructure',
  directory: 'rsworldpi',
  subdirectories: [
    {directory: 'img', extensions: ['.jpg', '.png', '.svg']},
    {directory: 'js', extensions: ['.js']},
    {directory: 'css', extensions: ['.css']}
  ],
  sources: [
    {selector: 'img', attr: 'src'},
    {selector: 'link[rel="stylesheet"]', attr: 'href'},
    {selector: 'script', attr: 'src'}
  ],
  request: {
    headers: {
      'User-Agent': 'Mozilla/5.0 (Linux; Android 4.2.1; en-us; Nexus 4 Build/JOP40D) AppleWebKit/535.19 (KHTML, like Gecko) Chrome/18.0.1025.166 Mobile Safari/535.19'
    }
  }
}).then(function (result) {
  console.log('hello');
}).catch(function(err){
  console.log('fail' + err);
});