Btibert3
11/3/2010 - 12:54 AM

Follow along with the results for the race

Follow along with the results for the race

# Parse MA Gubernatorial Race 2010

#############################
# Purpose:  use R to scrape data and plot results in real-time

# NOTE:  I am still new to R, so this is very basic!

# source of code snippet as help for this:
# http://stackoverflow.com/questions/1395528/scraping-html-tables-into-r-data-frames-using-the-xml-package
#############################

# set constants
URL <- "http://www.boston.com/news/special/politics/2010/governor/results/"

# load packages
library(XML)
library(stringr)

# grab the tables from the results page
tables <- readHTMLTable(URL)

# take only the largest one
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))
results.temp <- tables[[which.max(n.rows)]]

# take a peak
#str(results.temp)
#View(head(results.temp,n=25))

# lets clean up the data a little....
names(results.temp) <- c("city", "pctreport", "baker", "cahill", "patrick", "stein")

for(i in c(3:6)) {
	# convert to number, but need to remove the comma that gets pulled from the site
	results.temp[, i] <- as.numeric(as.character(gsub(",", "", results.temp[, i])))
}

results.temp$city <- as.character(results.temp$city)


# create a city/town detail dataset
results.detail <- results.temp[1:nrow(results.temp)-1, ]  # remove temp row

# create a "dataset" that has the totals added up for you
totals <- results.temp[nrow(results.temp), ]
totals

# a basic plot
plot.data <- as.vector(t(totals[,3:6]))
names(plot.data) <- c("Baker", "Cahill", "Patrick", "Stein")
race.plot <- barplot(plot.data,
		main="2010 Gubernatorial Results using R",
		xlab="Candidate")
text(c(1:4), y=20000, labels=plot.data)