MattSandy
10/27/2016 - 4:43 PM

Reddit Front Page Monitor

Reddit Front Page Monitor

var http = require('http');
var https = require('https');
var fs = require('fs');
var post_array = [];
var user_array = [];
fs.writeFile('posts.csv', 'Author,ID,Post Date,Comments,Score,Stickied,Pull,Subreddit\n', function(){console.log('Cleared posts.csv')});
fs.writeFile('users.csv', 'Author,Author Date\n', function(){console.log('Cleared users.csv')});

subreddits = ["all"]
for (var i=0;i<subreddits.length; i++) {
    var subreddit = subreddits[i];
    setTimeout(function(){ scrape_hot("",1,subreddit); }, 1000, subreddit);

}
function scrape_hot(after,page,subreddit) {
    var url = "https://www.reddit.com/r/" + subreddit + "/.json?after=" + after;
    https.get(url, function(res){
        var body = '';

        res.on('data', function(chunk){
            body += chunk;
        });

        res.on('end', function(){
            try {
                var response = JSON.parse(body);
                for(var i=0;i<response.data.children.length;i++) {
                    //if not logged already
                    if(post_array.indexOf(response.data.children[i].data.id) === -1) {
                        //save to array
                        post_array.push(response.data.children[i].data.id);
                        //setup line to write to file
                        var line = response.data.children[i].data.author + ',' + response.data.children[i].data.id + ',' +
                            format_date(response.data.children[i].data.created) + ',' + response.data.children[i].data.num_comments + ',' +
                            response.data.children[i].data.score + ',' + response.data.children[i].data.stickied + ',' +
                            'hot,' + response.data.children[i].data.subreddit + "\n";
                        console.log(line);
                        //get/write user information
                        scrape_user(response.data.children[i].data.author);
                        //write line
                        fs.appendFile('posts.csv', line, function (err) {
                            //error
                        });
                    }
                }
                if(page<40) {
                    scrape_hot(response.data.after,page+1,subreddit);
                } else {
                    setTimeout(function(){ scrape_hot("",1,subreddit); }, 1000*60*20, subreddit);
                }
            } catch (err) {
                console.log(err);
                scrape_hot(after,page,subreddit);
            }
        });
    }).on('error', function(e){
        scrape_hot(after,page,subreddit);
    });
}
function scrape_user(user) {
    if(user_array.indexOf(user) === -1) {
        user_array.push(user);
        var url = "https://www.reddit.com/user/" + user + "/about.json";
        https.get(url, function(res){
            var body = '';

            res.on('data', function(chunk){
                body += chunk;
            });

            res.on('end', function(){
                try {
                    var response = JSON.parse(body);
                    var line = user + "," + format_date(response.data.created) + "\n";
                    fs.appendFile('users.csv', line, function (err) {
                        //success
                    });
                } catch (err) {
                    scrape_user(user);
                }
            });
        }).on('error', function(e){
            console.log("Got an error: ", e);
            scrape_user(user);
        });
    }
}
function format_date(date) {
    var date = new Date(date*1000);
    var yyyy = date.getFullYear().toString();
    var mm = (date.getMonth()+1).toString(); // getMonth() is zero-based
    var dd  = date.getDate().toString();
    return yyyy + "-" + (mm[1]?mm:"0"+mm[0]) + "-" + (dd[1]?dd:"0"+dd[0]); // padding
}