crawler by userAccount
{
"name": "crawler-t",
"main": "crawler.js",
"scripts": {
"test": "echo \"Error: no test specified\" && exit 1"
},
"dependencies": {
"bluebird": "3.5.1",
"lodash": "4.17.4",
"twitter-crawler": "1.0.4",
"winston": "2.4.0"
}
}
var fs = require('fs');
var _ = require('lodash');
function saveOutput(obj, filename) {
fs.writeFile(`${__dirname}/output/${filename}`, JSON.stringify(obj, null, ' '));
}
var obj;
fs.readFile(`${__dirname}/output/DataCamp_tweets.json`, 'utf8', function (err, data) {
if (err) throw err;
obj = JSON.parse(data);
console.log(obj.length)
var result = []
obj.forEach((t, idx)=>{
var text={}
var mediaUrl=[];
var mediaUrlVid = [];
text = _.pick(t, ['text']).text
if(t.entities === undefined) {
mediaUrl = {}
}else{
if(t.entities.media){
t.entities.media.forEach((media)=>{
mediaUrl.push(_.pick(media, ['media_url']).media_url)
})
}
}
if(t.extended_entities === undefined) {
mediaUrl = {}
}else{
if(t.extended_entities.media){
t.extended_entities.media.forEach((media)=>{
if(media.video_info){
media.video_info.variants.forEach((vari)=>{
mediaUrlVid.push(vari.url)
})}
})
}
}
result.push({'text': text, 'mediaUrl': mediaUrl, 'mediaUrlVid':mediaUrlVid})
})
saveOutput(result, `result.json`);
});
// Create Docker
// Run Docker
// Grab Twitter
// save to firebase
// send link
// shut it down
const TwitterCrawler = require('twitter-crawler');
const fs = require('fs');
const log = require('winston');
const Promise = require('bluebird');
const getEnvCredentials = () =>
process.env.TWITTER_CREDENTIALS ? JSON.parse(process.env.TWITTER_CREDENTIALS) : [];
const credentials = [{
consumer_key: '...',
consumer_secret: '...',
access_token_key: '...',
access_token_secret: '...',
enabled: true
}].concat(getEnvCredentials());
const bind = (object, method) => object[method].bind(object);
function saveOutput(obj, filename) {
fs.writeFile(`${__dirname}/output/${filename}`, JSON.stringify(obj, null, ' '));
}
const crawler = new TwitterCrawler(credentials);
const crawlList = ["DataCamp"];
module.exports = Promise.all(crawlList.map((twitterHandle) => {
// Get user
log.info(`Obtaining user with id ${twitterHandle}...`);
return crawler.getUser(twitterHandle)
.then((user) => {
log.info(
`Obtained info for user ${user.name} (${user.id}). ` +
`Storing in output/${twitterHandle}_user.json`
);
saveOutput(user, `${twitterHandle}_user.json`);
// Crawl tweets
log.info('Obtaining tweets...');
return crawler.getTweets(twitterHandle, {})
.then((tweets) => {
log.info(
`Obtained ${tweets.length} tweets for user ${user.name} (${user.id}). ` +
`Storing in output/${twitterHandle}_tweets.json`
);
saveOutput(tweets, `${twitterHandle}_tweets.json`);
log.info('Crawling finished.');
});
})
.catch(bind(log, 'error'));
}));