code-vagabond
1/20/2014 - 5:40 PM

scrape facebook from R. Based off

scrape facebook from R. Based off

###############################################################################################
##                                                                                           ##
##           Setup                                                                           ##
##                                                                                           ##
###############################################################################################

# install.packages("Rfacebook")  # from CRAN
# install.packages("Rook")  # from CRAN
# install.packages("igraph") # from CRAN

setwd("//fs-home/home$/jab254/Desktop/Facebook R")

library(Rfacebook)
library(Rook)
library(igraph)
library(gtools)
library(reshape)
library(lubridate)
library(ggplot2)
library(scales)

#from https://developers.facebook.com/apps - setup an app and copy the stuff below
  #fb_oauth <- fbOAuth(app_id="APP ID", app_secret="APP SECRET")

#now we have our fb_oauth connection
#so we will just save them to be able to use them later
  #save(fb_oauth, file="fb_oauth")
 
#so if you want to connect to Facebook again you just have to call
  load("fb_oauth")

#the getUsers function return public information about one or more Facebook user
  me <- getUsers("me", token=fb_oauth)

  me$name # my name

  my_friends <- getFriends(token=fb_oauth, simplify=TRUE)
  head(my_friends, n=10) #first 10 friends - ID is the order they joined FB
  nrow(my_friends) # number of friends

###############################################################################################
##                                                                                           ##
##           Download info                                                                   ##
##                                                                                           ##
###############################################################################################

#Too many friends in one group gets rejected by FB API. 
#Break into groups of 80.
split_my_friends_1 <-  my_friends[1:80,]
split_my_friends_2 <-  my_friends[81:160,] 
split_my_friends_3 <-  my_friends[161:240,] 
split_my_friends_4 <-  my_friends[241:320,] 
split_my_friends_5 <-  my_friends[321:400,] 
split_my_friends_6 <-  my_friends[401:480,] 
split_my_friends_7 <-  my_friends[481:nrow(my_friends),] 

my_friends_info_1 <- getUsers(split_my_friends_1$id, token=fb_oauth, private_info=TRUE)
my_friends_info_2 <- getUsers(split_my_friends_2$id, token=fb_oauth, private_info=TRUE)
my_friends_info_3 <- getUsers(split_my_friends_3$id, token=fb_oauth, private_info=TRUE)
my_friends_info_4 <- getUsers(split_my_friends_4$id, token=fb_oauth, private_info=TRUE)
my_friends_info_5 <- getUsers(split_my_friends_5$id, token=fb_oauth, private_info=TRUE)
my_friends_info_6 <- getUsers(split_my_friends_6$id, token=fb_oauth, private_info=TRUE)
my_friends_info_7 <- getUsers(split_my_friends_7$id, token=fb_oauth, private_info=TRUE)

my_friends_info<- rbind.fill(my_friends_info_1,my_friends_info_2,
                               my_friends_info_3,my_friends_info_4,
                               my_friends_info_5,my_friends_info_6,
                               my_friends_info_7)

#SAVE IT!
  save(my_friends_info,file="my_friends_info.Rda")
  write.table(my_friends_info, "my_friends_info.txt", sep="\t") 

###############################################################################################
##                                                                                           ##
##           Take a gander                                                                   ##
##                                                                                           ##
###############################################################################################

#Summary
  table(my_friends_info$relationship_status)
  table(my_friends_info$gender)
  table(my_friends_info$location)
  table(my_friends_info$hometown)

#Age histogram
  # format year of birth
  my_friends_info$year <- mdy(my_friends_info$birthday)
    x <- my_friends_info$year
  pdf('bornhist.pdf')
      h<-hist(x, breaks=15, freq=T,
              col="blue", xlab="Year born (according to FB)",
              main="")
  dev.off()

# Not suprisingly it's the least mature/youngest people I know on FB with these statuses...
print(
  my_friends_info[
    which(my_friends_info$relationship_status == "It's complicated") ,
    c("name")
    ] )

print(
  my_friends_info[
    which(my_friends_info$relationship_status == "In an open relationship") ,
    c("name")
    ] )

###############################################################################################
##                                                                                           ##
##           prepare net                                                                     ##
##                                                                                           ##
###############################################################################################

# Download who is friends with who (within those friends with me!)
  my_network <- getNetwork(fb_oauth, format="adj.matrix")
# friends who are friends with me alone
  singletons <- rowSums(my_network)==0 

# remove singletons
  my_graph <- graph.adjacency(my_network[!singletons,!singletons])

# make connections one way
  my_graph_simple <- simplify(my_graph)

# set up plot
  #actual model
  layout <- layout.drl(my_graph_simple,options=list(simmer.attraction=0))

  #styling of plot
  E(my_graph_simple)$color <- rgb(.5, .5, 0, 0.15)
  E(my_graph_simple)$width <- 0.0001


#Plot 0 - leaving it on auto
pdf('0_auto.pdf')
  plot(my_graph_simple, layout=layout.auto,
     vertex.label=NA,)
dev.off()


#Plot 1 - with names
  pdf('1_default.pdf')
    plot(my_graph_simple, vertex.size=2, 
         #vertex.label=NA, 
         vertex.label.cex=0.2,
         edge.arrow.size=0, edge.curved=TRUE,layout=layout)
  dev.off()

#Plot 2 - no names
  pdf('2_default.pdf')
    plot(my_graph_simple, vertex.size=2, 
       vertex.label=NA, 
       vertex.label.cex=0.2,
       edge.arrow.size=0, edge.curved=TRUE,layout=layout)
  dev.off()

# Let's colour based on connectedness.
  #this is the number of "shortest paths" going 
  #through a particular individual
  hc4 <- heat.colors(10)
  g.bet <- betweenness(my_graph_simple)
  vcolors <- factor(cut(g.bet, quantile(g.bet), include.lowest = TRUE))
  vcolors <- quantcut(g.bet, q=seq(0,1,by=0.1))
  vcolors2 <- hc4[vcolors]

#Plot 3 - no names
pdf('3_coloured.pdf')
  plot(my_graph_simple, vertex.size=2, vertex.color=vcolors2,
     vertex.label=NA, 
     vertex.label.cex=2,
     edge.arrow.size=0, edge.curved=TRUE,layout=layout)
dev.off()

#Plot 4 - with names
pdf('4_coloured_withnames.pdf')
plot(my_graph_simple, vertex.size=2, vertex.color=vcolors2,
     #vertex.label=NA, 
     vertex.label.cex=0.5,
     edge.arrow.size=0, edge.curved=TRUE,layout=layout)
dev.off()

###############################################################################################
##                                                                                           ##
##           A facebook page                                                                 ##
##                                                                                           ##
###############################################################################################

# Our MCR social events page
  mcrents <- getPage("jesusmcrents", fb_oauth, n = 5000) 
# The pub on my street
  brewhouse <- getPage("TheCambridgeBrewHouse", fb_oauth, n = 5000)
# A brilliant cartoon
  archer <- getPage("ArcherFX", fb_oauth, n = 5000)


#number of likes
  mcrents[which.max(mcrents$likes_count), ] # most popular post ever had 3 likes and 0 comments
  brewhouse[which.max(brewhouse$likes_count), ] # most popular post ever had 16 likes and 2 comments
  archer[which.max(archer$likes_count), ] # most popular post ever had 73,013 likes and 2146 comments,
                                          # and 9315 shares and was this video 
                                          # https://www.facebook.com/photo.php?v=10200194370090771


# convert Facebook date format to R date format
  format.facebook.date <- function(datestring) {
    date <- as.POSIXct(datestring, format = "%Y-%m-%dT%H:%M:%S+0000", tz = "GMT")
  }
# aggregate metric counts over month
  aggregate.metric <- function(metric) {
    m <- aggregate(archer[[paste0(metric, "_count")]], list(month = archer$month), 
                   mean)
    m$month <- as.Date(paste0(m$month, "-15"))
    m$metric <- metric
    return(m)
  }
# create data frame with average metric counts per month
  archer$datetime <- format.facebook.date(archer$created_time)
  archer$month <- format(archer$datetime, "%Y-%m")
    df.list <- lapply(c("likes", "comments", "shares"), aggregate.metric)
    df <- do.call(rbind, df.list)
# visualize evolution in metric

#Plot last - archer page
pdf('archer.pdf')
  ggplot(df, aes(x = month, y = x, group = metric)) + geom_line(aes(color = metric)) + 
    scale_x_date(breaks = "years", labels = date_format("%Y")) + 
    scale_y_log10("Average count per post", 
                  breaks = c(10, 100, 1000, 10000, 50000)) + 
    theme_bw() + theme(axis.title.x = element_blank())
dev.off()

#########################################################################
#pull photos from facebook

# download small profile picture of each friend
dir.create("photos")
for (i in 1:length(friends.id))
  download.file(paste(friends.pic[i]),mode="wb",
                destfile=paste("photos/",friends.id[i],".jpg",sep=""))