Test with clustering algorithm gives you the best cophenetic distance correlation.
pufm_cor <- cor(pufm_agg_v3_wide, method = "pearson")
pufm_cor <- as.dist(1 - pufm_cor)
hc_methods <- c("ward.D", "ward.D2", "single", "complete", "average", "mcquitty", "median", "centroid")
coph <- function(hc_method, d = d, dist_method){
hc <- hclust(d, method = hc_method)
coph <- cor(cophenetic(hc), d)
df <- data_frame(hc_method = hc_method, dist_method = dist_method, coph = coph)
}
pufm_cor_coph <- plyr::ldply(hc_methods, coph, d = pufm_cor, dist_method = "cor")
pufm_cor_coph %>% arrange(desc(coph))
ggplot(pufm_cor_coph, aes(hc_method, coph)) +
geom_bar(stat = "identity") + facet_wrap(~dist_method) +
ylab("Cophenetic correlation coefficient") +
xlab("Hierarchichal clustering method") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, vjust = 1, hjust=1))