Btibert3
1/31/2013 - 4:28 AM

US News National Rankings - Yield List

US News National Rankings - Yield List

###############################################################################
## Use R to scrape the US News College List and look at yield distribution
## @brocktibert
## jan 2013
###############################################################################


## load the necessary packages
require(XML)
require(RCurl)


## define the Inputs
## url of the data
URL = "http://goo.gl/39SFG"

## the benchmarked yield rate you want to look compare against US News
BM = .32

## read the table from the website
## when I ran the script, it was the only table on the page
tables = readHTMLTable(URL)


## get the rankings into a basic data frame
df = tables[[1]]
df[,1] = as.character(df[,1])
df[,2] = as.numeric(gsub(",", "", as.character(df[,2])))
df[,3] = as.numeric(gsub(",", "", as.character(df[,3])))
names(df) = c("school", "admits", "enroll", "yield")
df$yield = df$enroll / df$admits

## how many schools listed
nrow(df)

## summarise the data
summary(df)


## look at the basic distribution
hist(df$yield, 
     breaks = seq(from=0, to=1, by=.01),
     xlab = "Yield %",
     ylab = "# US News Schools", 
     main = "Yield Distribution of US News National",
     col  = "red")

## plot a line for your school
abline(v=BM)

## what is the percentile rank of the your school
## function from http://goo.gl/2aiZZ
perc.rank <- function(x, xo)  length(x[x <= xo])/length(x)*100
RANKR = perc.rank(df$yield, BM)
LB = paste0("Percentile Rank from Input is : ", round(RANKR, 0))
text(x=.8, 10, labels=LB)