for node.js
var eventproxy = require('eventproxy'),
superagent = require('superagent'),
cheerio = require('cheerio'),
url = require('url'),
extend = require('node.extend'),
fs = require("fs"),
async = require("async");
//全部执行完成
ep.after(events.getAllAnimalUrlDoneEvent, animalGroupUrls.length, function (res) {
console.log("所有物种url获取完成")
// opt.siteUrls.push(...res.map(x => { return x[1] }).filter(x => { return x != undefined }));
opt.siteUrls = opt.siteUrls.filter(x => { return x != undefined });
ep.emit(events.loadUrlsDoneEvent);
})
//控制并发, 避免被封ip
async.mapLimit(animalGroupUrls, 1, function (_url, callback) {
console.log("开始获取目页面 {url}".format({ url: _url }));
var enurl = _encodeUrl(_url);
superagent.get(enurl || _url).end(function (err, res) {
if (err) {
ep.emit(events.getAllAnimalUrlDoneEvent, [_url])
return console.log(err)
}
console.log("目页面获取成功 {url}".format({ url: _url }));
var $ = cheerio.load(res.text);
var animalGroupUrls = [];
//取出root下所有目的url,放到animalGroupUrls里
//触发一下
callback(null,_url);
ep.emit(events.getAllAnimalUrlDoneEvent, [_url])
})
{
"scripts": {
// browser 服务器启动
"browser": "browser-sync start --server --directory --port 50032 --files \"**/*.css, **/*.html , **/*.js\""
},
}
{
"devDependencies": {
"browser-sync": "^2.18.13",
"cheerio": "^1.0.0-rc.2", //node类jquery
"connect": "^3.6.5",
"eventproxy": "^1.0.0", //基于事件控制器,可以控制并发
"superagent": "^3.8.1" //http get post..
"browser-sync": "^2.18.13", //服务器
},
"dependencies": {
"node.extend": "^2.0.0" //object deep copy
}
}