genomewalker
4/12/2017 - 8:06 AM

explore graphs

explore graphs

load("~/Downloads/module_graphs_2011.RData")

test_graph <- module_graphs_2011$`2`

# Use the absolute r value for the weights
test_graph <- set.edge.attribute(test_graph, name = "weight", value = abs(E(test_graph)$cor))

trim_components <- function(G, n) {
  cls <- igraph::clusters(G)
  smallcls <- which(cls$csize >= n)
  ids_to_remove <- which(cls$membership %in% smallcls)
  graph <- delete.vertices(G,ids_to_remove)
  n_comp <- count_components(G)
  return(list(graph = graph, n_comp = n_comp))
}

explore_graphs <- function(X, graph = graph){
  g <- graph
  graph <- delete_edges(graph, which(E(graph)$weight <= X))
  graph <- delete_vertices(graph, V(graph)[degree(graph) == 0])
  c2 <- trim_components(graph, 2)
  graph_comp_2 <- c2$n_comp - count_components(c2$graph)
  c3 <- trim_components(graph, 3)
  graph_comp_3 <- c3$n_comp - count_components(c3$graph)
  c5 <- trim_components(graph, 5)
  graph_comp_5 <- c5$n_comp - count_components(c5$graph)
  c10 <- trim_components(graph, 10)
  graph_comp_10 <- c10$n_comp - count_components(c10$graph)
  c25 <- trim_components(graph, 25)
  graph_comp_25 <- c25$n_comp - count_components(c25$graph)
  c30 <- trim_components(graph, 30)
  graph_comp_30 <- c30$n_comp - count_components(c30$graph)
  c40 <- trim_components(graph, 40)
  graph_comp_40 <- c40$n_comp - count_components(c40$graph)
  c50 <- trim_components(graph, 50)
  graph_comp_50 <- c50$n_comp - count_components(c50$graph)
  c60 <- trim_components(graph, 60)
  graph_comp_60 <- c60$n_comp - count_components(c60$graph)
  c100 <- trim_components(graph, 100)
  graph_comp_100 <- c100$n_comp - count_components(c100$graph)
  c200 <- trim_components(graph, 100)
  graph_comp_200 <- c200$n_comp - count_components(c200$graph)
  c300 <- trim_components(graph, 100)
  graph_comp_300 <- c300$n_comp - count_components(c300$graph)
  c400 <- trim_components(graph, 100)
  graph_comp_400 <- c400$n_comp - count_components(c400$graph)
  c500 <- trim_components(graph, 500)
  graph_comp_500 <- c500$n_comp - count_components(c500$graph)
  
  result <- data.frame(t = X, vertices = vcount(graph)/vcount(g), 
                       edges = ecount(graph), density = graph.density(graph), 
                       n_comp = count_components(graph),
                       comp_2 = graph_comp_2, comp_3 = graph_comp_3,
                       comp_5 = graph_comp_5, comp_10 = graph_comp_10,
                       comp_25 = graph_comp_25, comp_30 = graph_comp_30, 
                       comp_40 = graph_comp_40, comp_50 = graph_comp_50, 
                       comp_60 = graph_comp_60, comp_100 = graph_comp_100, 
                       comp_200 = graph_comp_200, comp_300 = graph_comp_300,
                       comp_500 = graph_comp_400, comp_400 = graph_comp_500)
  return(result)
}

w <- (seq(0,1, by = 0.01))
l <- bind_rows(plyr::llply(w, explore_graphs, graph = test_graph, .progress = "text", .parallel = F ))
l[is.na(l)] <- 0


l1 <- l %>%
  dplyr::select(t, vertices, density) %>%
  gather(variable, value, -t) %>%
  tbl_df()

v <- ggplot(l1, aes(x = t, y = value, color = variable)) +
  geom_line() +
  geom_point(shape = 19) +
  ylab("Proportion") +
  xlab("") + theme_bw() + scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title.y = element_text(size = 8),
        legend.justification = c(0, 0), 
        legend.position = c(0, 0),
        legend.background = element_blank()) +
  guides(color=guide_legend(nrow=1, title = "")) 

nc <- ggplot(l, aes(x = t, y = n_comp)) +
  geom_line() +
  geom_point(shape = 19) +
  ylab("# components") +
  xlab("") + theme_bw() + scale_y_continuous(labels = scales::comma) +
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title = element_text(size = 8), 
        axis.text.y = element_text(size = 8))

d <- ggplot(l, aes(x = t, y = density)) +
  geom_line() +
  geom_point(shape = 19) +
  xlab("") +
  ylab("Density") + theme_bw() + scale_y_continuous(labels = scales::comma) + 
  theme(axis.text.x = element_blank(),
        axis.ticks.x = element_blank(),
        axis.title = element_text(size = 8),
        axis.text.y = element_text(size = 8))

coms <- l %>% dplyr::select(t, comp_2, comp_3, comp_5, comp_10, comp_25, comp_30, comp_40, comp_50, comp_60, comp_100, comp_200, comp_300, comp_400, comp_500) %>% gather(t) %>% set_colnames(c("t", "com", "value"))

coms$com <- factor(coms$com, levels = rev(c("comp_2", "comp_3", "comp_5", "comp_10", "comp_25", "comp_30", "comp_40", "comp_50", "comp_60", "comp_100", "comp_200", "comp_300", "comp_400","comp_500")))

c <- ggplot(coms, aes(x = t, y = value, fill = com)) +
  geom_area() +
  xlab("sparcc correlation abs(r)") +
  ylab("# components >= component size") + 
  theme_bw() +
  theme(legend.position = "bottom", 
        axis.title = element_text(size = 8),
        axis.text.y = element_text(size = 8),
        axis.text.x = element_text(size = 8), 
        legend.text = element_text(size = 8)) +
  guides(fill=guide_legend(nrow=1)) +
  scale_fill_discrete("Component size", 
                      breaks = rev(c("comp_2", "comp_3", "comp_5", "comp_10", "comp_25", "comp_30", "comp_40", "comp_50", "comp_60", "comp_100", "comp_200", "comp_300", "comp_400","comp_500")), 
                      labels = c(500, 400, 300, 200, 100, 60, 50, 40, 30, 25, 10, 5, 3, 2))

grid.draw(egg::ggarrange(v, nc, c, ncol = 1))

l %>% filter(n_comp == 1, t > 0.5) %>% .$t