优品ppt下载
const fetch = require("node-fetch");
const http = require('http');
const cheerio = require('cheerio');
const fs = require('fs');
const Unrar = require('unrar');
//http://www.ypppt.com/gushi
const baseUrl = 'http://www.ypppt.com'
const doScrap = function(url){
fetch(url)
.then(resp=>resp.text())
.then(body => {
const $ = cheerio.load(body)
$('body > div.wrapper > ul > li').each((i,item)=>{
const fileName = $(item).children('a').text().trim()
const eachPage = baseUrl+$(item).children('a').attr('href')
fetch(eachPage).then(r=>r.text()).then(b=>{
const $ = cheerio.load(b)
const downloadUrl = $('body > div.wwrapper.clear > div.info > div > div.infoss > div.button > a').attr('href')
download(downloadUrl,fileName,(e)=>e&&console.error(e))
})
})
}
)
}
const download = function(url, dest, cb) {
console.log(`download ${dest} from ${url}`)
const file = fs.createWriteStream(dest);
const request = http.get(url, function(response) {
response.pipe(file);
file.on('finish', function() {
extract(dest)
file.close(cb); // close() is async, call cb after close completes.
});
}).on('error', function(err) {
fs.unlink(dest); // Delete the file async. (But we don't check the result)
if (cb) cb(err.message);
});
};
const extract = function(file) {
console.log(`extracting ${file}...`)
const archive = new Unrar(file);
archive.list(function(err, entries) {
if(entries) {
for (let i = 0; i < entries.length; i++) {
const name = entries[i].name;
const stream = archive.stream(name);
const destFile = fs.createWriteStream(name)
stream.on('error', console.error);
stream.on('end', ()=>{
console.log(`delete ${file}...`)
//close write stream
destFile.close()
//delete rar file
fs.unlink(file, console.error)
});
stream.pipe(destFile);
}
} else {
// invalid rar...
console.error(`invalid file ${file}`)
fs.unlink(file, console.error)
}
});
}
// 2~34 pages
for(let i=2;i<=34;i++){
const url = `http://www.ypppt.com/gushi/list-${i}.html`
try{
doScrap(url)
}catch(err){
console.error(err)
continue
}
}