Btibert3
6/24/2011 - 12:19 AM

Parse the 2010-11 Player Stats

Parse the 2010-11 Player Stats

#===================================================================
# load the libraries
#===================================================================
library(XML)
library(plyr)

#===================================================================
# grab the first page of data
#===================================================================
# U<-"http://www.nhl.com/ice/app?service=page&page=playerstats&fetchKey=20113ALLAASAll&viewName=summary&sort=points&pg=1"
# tables <- readHTMLTable(URL)
# names(tables)
# sapply(tables, length )
# tables[[4]]


#===================================================================
# set some basics before the loop
#===================================================================
U<-"http://www.nhl.com/ice/app?service=page&page=playerstats&fetchKey=20113ALLAASAll&viewName=summary&sort=points&pg="
stats2011 <- data.frame()

# loop and grab the data
for (p in 1:12) {
  
  # build the URL
  URL <- paste(U, p, sep="")
  
  # fetch the page
  temp <- readHTMLTable(URL)[[4]]
  
  # row bind the data and fill if other columns
  stats2011 <- rbind.fill(stats2011, temp)  
  print(paste("parsed and joined page ", p, sep=""))
  
  # pause the code
  Sys.sleep(2)
  
}

#===================================================================
# clean up the data -- could have kept the first row as the col names
#===================================================================
# head(stats2011)
stats2011 <- stats2011[stats2011$V3 != 'Team',]
stats2011$V1 <- NULL
stats2011$V14 <- NULL
# names(stats2011)
cnames <- c("player", "team", "pos", "gp", "g", "a", "p", "plusmin", "pim", "pp", "sh",
                       "gw", "ot", "s", "spct", "toig", "shftg", "fo")
# length(cnames) == length(names(stats2011))
names(stats2011) <- cnames
# rm(cnames)
# head(stats2011)