mschecht
12/7/2018 - 10:41 AM

cophenetic distance clustering test

Test with clustering algorithm gives you the best cophenetic distance correlation.

pufm_cor <- cor(pufm_agg_v3_wide, method = "pearson")
pufm_cor <- as.dist(1 - pufm_cor)

hc_methods <- c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid")

coph <- function(hc_method, d = d, dist_method){
  hc <- hclust(d, method = hc_method)
  coph <- cor(cophenetic(hc), d)
  df <- data_frame(hc_method = hc_method, dist_method = dist_method, coph = coph)
}

pufm_cor_coph <- plyr::ldply(hc_methods, coph, d = pufm_cor, dist_method = "cor")

pufm_cor_coph %>% arrange(desc(coph))

ggplot(pufm_cor_coph, aes(hc_method, coph)) +
  geom_bar(stat = "identity") + facet_wrap(~dist_method) +
  ylab("Cophenetic correlation coefficient") +
  xlab("Hierarchichal clustering method") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))