movii
6/28/2017 - 3:07 PM

笔记:「饭否精选·日历」微信小程序制作记录-爬虫部分:6. Sample

笔记:「饭否精选·日历」微信小程序制作记录-爬虫部分:6. Sample

const request = (url, callback) => {
  return new Promise((resolve, reject) => {
    const req = require('http').get(url, (response) => {
      const body = [];
      response.on('data', (chunk) => body.push(chunk));
      response.on('end', () => resolve(body.join('')))
    });
    req.on('error', err => reject(err));
  })
};

module.exports = request;
const request = require('./request');

const URL_PERFIX = 'http://blog.fanfou.com/digest/json/';
const URL_SURFIX = '.json';

const ENTRIES_URL = 'http://blog.fanfou.com/digest/json/index.json'

function delay(t) {
  return new Promise(resolve => setTimeout(resolve, t));
}

function loadAllURls() {
  return request(ENTRIES_URL)
    .then(data => JSON.parse(data))
    .then(entries => entries.map(entry => entry.replace(/^\.\/json\/|\.json$/ig, '')))
    .then(entries => entries.filter(e => e.includes('daily')))
}

function fetchDailyDetail (URLs) {
  let count = 0
  return URLs.reduce((promise, url) => {
    return promise.then(() => delay(1 * 1000).then(() => {
      return request(URL_PERFIX + url + URL_SURFIX).then(data => {
        console.log(`${++count}: ${url} done.`)
        // save data 
      })
    })).then(() => 'done')
  }, Promise.resolve());
}

loadAllURls(ENTRIES_URL).then(urls => fetchDailyDetail(urls).then(() => {
  console.log('done')
}))