Btibert3
11/13/2011 - 7:27 PM

Kiplingers 2011/12 Best Value Rankings

Kiplingers 2011/12 Best Value Rankings

## help
#http://stackoverflow.com/questions/1395528/scraping-html-tables-into-r-data-frames-using-the-xml-package

### load the libraries
library(RCurl)
library(XML)

## URL
URL.1 <- "http://www.kiplinger.com/tools/privatecolleges/index.php?table=prv_univ"
URL.2 <- "http://www.kiplinger.com/tools/privatecolleges/index.php?table=lib_arts"
URL <- c(URL.1, URL.2)



## scrape the data
theurl <- URL[1]
tables <- readHTMLTable(theurl)
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))
priv <- tables[[which.max(n.rows)]]

theurl <- URL[2]
tables <- readHTMLTable(theurl)
n.rows <- unlist(lapply(tables, function(t) dim(t)[1]))
la <- tables[[which.max(n.rows)]]


## clean up the data
dim(priv); head(priv);
dim(la); head(la);

cnames <- c("rank", "name", "state", "admit_rate", "stud_per_faculty", "4y_grad_rate", 
            "coa", "avg_need_aid", "avg_noneed_aid", "pct_noneed_aid", "avg_grad_debt")
length(cnames)


priv <- priv[2:nrow(priv), ]
la <- la[2:nrow(la), ]
names(priv) <- cnames
names(la) <- cnames

priv$type <- "Private"
la$type <- "Liberal Arts"


## put the two together
kiplingers_201112 <- rbind(priv, la)
head(kiplingers_201112)