explore graphs
load("~/Downloads/module_graphs_2011.RData")
test_graph <- module_graphs_2011$`2`
# Use the absolute r value for the weights
test_graph <- set.edge.attribute(test_graph, name = "weight", value = abs(E(test_graph)$cor))
trim_components <- function(G, n) {
cls <- igraph::clusters(G)
smallcls <- which(cls$csize >= n)
ids_to_remove <- which(cls$membership %in% smallcls)
graph <- delete.vertices(G,ids_to_remove)
n_comp <- count_components(G)
return(list(graph = graph, n_comp = n_comp))
}
explore_graphs <- function(X, graph = graph){
g <- graph
graph <- delete_edges(graph, which(E(graph)$weight <= X))
graph <- delete_vertices(graph, V(graph)[degree(graph) == 0])
c2 <- trim_components(graph, 2)
graph_comp_2 <- c2$n_comp - count_components(c2$graph)
c3 <- trim_components(graph, 3)
graph_comp_3 <- c3$n_comp - count_components(c3$graph)
c5 <- trim_components(graph, 5)
graph_comp_5 <- c5$n_comp - count_components(c5$graph)
c10 <- trim_components(graph, 10)
graph_comp_10 <- c10$n_comp - count_components(c10$graph)
c25 <- trim_components(graph, 25)
graph_comp_25 <- c25$n_comp - count_components(c25$graph)
c30 <- trim_components(graph, 30)
graph_comp_30 <- c30$n_comp - count_components(c30$graph)
c40 <- trim_components(graph, 40)
graph_comp_40 <- c40$n_comp - count_components(c40$graph)
c50 <- trim_components(graph, 50)
graph_comp_50 <- c50$n_comp - count_components(c50$graph)
c60 <- trim_components(graph, 60)
graph_comp_60 <- c60$n_comp - count_components(c60$graph)
c100 <- trim_components(graph, 100)
graph_comp_100 <- c100$n_comp - count_components(c100$graph)
c200 <- trim_components(graph, 100)
graph_comp_200 <- c200$n_comp - count_components(c200$graph)
c300 <- trim_components(graph, 100)
graph_comp_300 <- c300$n_comp - count_components(c300$graph)
c400 <- trim_components(graph, 100)
graph_comp_400 <- c400$n_comp - count_components(c400$graph)
c500 <- trim_components(graph, 500)
graph_comp_500 <- c500$n_comp - count_components(c500$graph)
result <- data.frame(t = X, vertices = vcount(graph)/vcount(g),
edges = ecount(graph), density = graph.density(graph),
n_comp = count_components(graph),
comp_2 = graph_comp_2, comp_3 = graph_comp_3,
comp_5 = graph_comp_5, comp_10 = graph_comp_10,
comp_25 = graph_comp_25, comp_30 = graph_comp_30,
comp_40 = graph_comp_40, comp_50 = graph_comp_50,
comp_60 = graph_comp_60, comp_100 = graph_comp_100,
comp_200 = graph_comp_200, comp_300 = graph_comp_300,
comp_500 = graph_comp_400, comp_400 = graph_comp_500)
return(result)
}
w <- (seq(0,1, by = 0.01))
l <- bind_rows(plyr::llply(w, explore_graphs, graph = test_graph, .progress = "text", .parallel = F ))
l[is.na(l)] <- 0
l1 <- l %>%
dplyr::select(t, vertices, density) %>%
gather(variable, value, -t) %>%
tbl_df()
v <- ggplot(l1, aes(x = t, y = value, color = variable)) +
geom_line() +
geom_point(shape = 19) +
ylab("Proportion") +
xlab("") + theme_bw() + scale_y_continuous(labels = scales::comma) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title.y = element_text(size = 8),
legend.justification = c(0, 0),
legend.position = c(0, 0),
legend.background = element_blank()) +
guides(color=guide_legend(nrow=1, title = ""))
nc <- ggplot(l, aes(x = t, y = n_comp)) +
geom_line() +
geom_point(shape = 19) +
ylab("# components") +
xlab("") + theme_bw() + scale_y_continuous(labels = scales::comma) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_text(size = 8),
axis.text.y = element_text(size = 8))
d <- ggplot(l, aes(x = t, y = density)) +
geom_line() +
geom_point(shape = 19) +
xlab("") +
ylab("Density") + theme_bw() + scale_y_continuous(labels = scales::comma) +
theme(axis.text.x = element_blank(),
axis.ticks.x = element_blank(),
axis.title = element_text(size = 8),
axis.text.y = element_text(size = 8))
coms <- l %>% dplyr::select(t, comp_2, comp_3, comp_5, comp_10, comp_25, comp_30, comp_40, comp_50, comp_60, comp_100, comp_200, comp_300, comp_400, comp_500) %>% gather(t) %>% set_colnames(c("t", "com", "value"))
coms$com <- factor(coms$com, levels = rev(c("comp_2", "comp_3", "comp_5", "comp_10", "comp_25", "comp_30", "comp_40", "comp_50", "comp_60", "comp_100", "comp_200", "comp_300", "comp_400","comp_500")))
c <- ggplot(coms, aes(x = t, y = value, fill = com)) +
geom_area() +
xlab("sparcc correlation abs(r)") +
ylab("# components >= component size") +
theme_bw() +
theme(legend.position = "bottom",
axis.title = element_text(size = 8),
axis.text.y = element_text(size = 8),
axis.text.x = element_text(size = 8),
legend.text = element_text(size = 8)) +
guides(fill=guide_legend(nrow=1)) +
scale_fill_discrete("Component size",
breaks = rev(c("comp_2", "comp_3", "comp_5", "comp_10", "comp_25", "comp_30", "comp_40", "comp_50", "comp_60", "comp_100", "comp_200", "comp_300", "comp_400","comp_500")),
labels = c(500, 400, 300, 200, 100, 60, 50, 40, 30, 25, 10, 5, 3, 2))
grid.draw(egg::ggarrange(v, nc, c, ncol = 1))
l %>% filter(n_comp == 1, t > 0.5) %>% .$t