Btibert3
6/12/2013 - 12:45 AM

Cluster 2012-13 NHL Teams on Regular Season

Cluster 2012-13 NHL Teams on Regular Season

###############################################################################
## Compare the two 2012/13 Stanley Cup Teams based on basic data
###############################################################################

## load the basics
setwd()

## load the packages
library(XML)
library(RCurl)

## URL
U = "http://www.nhl.com/ice/teamstats.htm?fetchKey=20132ALLSAAALL&viewName=summary"

## read the tables
tables = readHTMLTable(U)
table = tables[[5]]
table = table[,2:ncol(table)]

## fix the colnames
colnames(table) = gsub("\n", "", colnames(table))
colnames(table) = tolower(colnames(table))

## create dataframe with the core data
df = table[,3:ncol(table)]
df = do.call(data.frame, lapply(df, function(x) as.numeric(as.character(x))))
row.names(df) = table$team
df = scale(df)

## create a distance matrix
dmat = dist(df)
labels(dmat)

## cluster the teams
hc1 = hclust(dmat)
hc2 = hclust(dmat, method="ward")
hc3 = hclust(dmat, method="average")

## plot the clustering
par(mfrow = c(1,3))
plot(hc1, xlab="Team", main=hc1$method)
plot(hc2, xlab="Team", main=hc2$method)
plot(hc3, xlab="Team", main=hc3$method)