donfanning
8/15/2018 - 12:05 PM

crawler by userAccount

crawler by userAccount

{
  "name": "crawler-t",
  "main": "crawler.js",
  "scripts": {
    "test": "echo \"Error: no test specified\" && exit 1"
  },
  "dependencies": {
    "bluebird": "3.5.1",
    "lodash": "4.17.4",
    "twitter-crawler": "1.0.4",
    "winston": "2.4.0"
  }
}
var fs = require('fs');
var _ = require('lodash');

function saveOutput(obj, filename) {
    fs.writeFile(`${__dirname}/output/${filename}`, JSON.stringify(obj, null, '  '));
}

var obj;
fs.readFile(`${__dirname}/output/DataCamp_tweets.json`, 'utf8', function (err, data) {
  if (err) throw err;
  obj = JSON.parse(data);
  console.log(obj.length)
  var result = []  
  obj.forEach((t, idx)=>{
    var text={}
    var mediaUrl=[];
    var mediaUrlVid = [];
    text = _.pick(t, ['text']).text
    
    if(t.entities === undefined) {
        mediaUrl = {}
    }else{
        if(t.entities.media){
            t.entities.media.forEach((media)=>{
                mediaUrl.push(_.pick(media, ['media_url']).media_url)
            })
            
        }
    }
    
    if(t.extended_entities === undefined) {
        mediaUrl = {}
    }else{
        if(t.extended_entities.media){
            t.extended_entities.media.forEach((media)=>{
                if(media.video_info){
                media.video_info.variants.forEach((vari)=>{
                    mediaUrlVid.push(vari.url)
                })}
                
            })
            
        }
    }

    

    result.push({'text': text, 'mediaUrl': mediaUrl, 'mediaUrlVid':mediaUrlVid})
  })
  saveOutput(result, `result.json`);
});
// Create Docker
// Run Docker 
// Grab Twitter
// save to firebase 
// send link 
// shut it down 

const TwitterCrawler = require('twitter-crawler');
const fs = require('fs');
const log = require('winston');
const Promise = require('bluebird');

const getEnvCredentials = () =>
  process.env.TWITTER_CREDENTIALS ? JSON.parse(process.env.TWITTER_CREDENTIALS) : [];

const credentials = [{
  consumer_key: '...',
  consumer_secret: '...',
  access_token_key: '...',
  access_token_secret: '...',
  enabled: true
}].concat(getEnvCredentials());


const bind = (object, method) => object[method].bind(object);

function saveOutput(obj, filename) {
  fs.writeFile(`${__dirname}/output/${filename}`, JSON.stringify(obj, null, '  '));
}

const crawler = new TwitterCrawler(credentials);
const crawlList = ["DataCamp"];

module.exports = Promise.all(crawlList.map((twitterHandle) => {
  // Get user
  log.info(`Obtaining user with id ${twitterHandle}...`);
  return crawler.getUser(twitterHandle)
    .then((user) => {
      log.info(
        `Obtained info for user ${user.name} (${user.id}). ` +
        `Storing in output/${twitterHandle}_user.json`
      );
      saveOutput(user, `${twitterHandle}_user.json`);
      // Crawl tweets
      log.info('Obtaining tweets...');
      return crawler.getTweets(twitterHandle, {})
        .then((tweets) => {
          log.info(
            `Obtained ${tweets.length} tweets for user ${user.name} (${user.id}). ` +
            `Storing in output/${twitterHandle}_tweets.json`
          );

          saveOutput(tweets, `${twitterHandle}_tweets.json`);
          log.info('Crawling finished.');
        });
    })
    .catch(bind(log, 'error'));
}));