robertness
8/2/2015 - 7:29 PM

Download MAPK pathway phosphorylations into an igraph object in R

Download MAPK pathway phosphorylations into an igraph object in R

g_nell <- tempfile() %T>%
    {KEGGgraph::retrieveKGML("04010", organism="hsa", destfile=., method="curl", quiet=TRUE)} %>%
    {KEGGgraph::parseKGML2Graph(., expandGenes=FALSE)} 
vertex_list <- KEGGgraph::getKEGGnodeData(g_nell) %>%
    {data.frame(
      kegg = unlist(lapply(., function(item) item@name[1])),
      label = unlist(lapply(., function(item)
        strsplit(item@graphics@name, ",")[[1]][1])), stringsAsFactors = F)}
g_init <- igraph.from.graphNEL(g_nell) 
V(g_init)$name <- vertex_list$kegg 
vertex_list <- dplyr::filter(vertex_list, !duplicated(kegg))
edge_list <- KEGGgraph::getKEGGedgeData(g_nell) %>%
  lapply(function(item){
    if(length(item@subtype) > 0){
      subtype_info <- item@subtype
      # KEGG uses a hierarchy of term for describing terms
      # for example, the first edge type is "activation", the second is "phosphorylation"
      # where phosphorylation is a type of activation.  The second term is more specific than
      # the first, so when it is provided, use it in lieu of the first type.
      if(length(subtype_info) > 1) {
        return(subtype_info[[2]]@name)
      } else {
        return(subtype_info$subtype@name)
      }
    } 
    NA
    }) %>%
    unlist %>%
    {cbind(get.edgelist(g_init), type = .)} %>%
    data.frame %>%
    {dplyr::filter(.,type == "phosphorylation")}
edge_list <- edge_list %>%
    as.data.frame %>%
    unique
vertex_list <- vertex_list %>%
    unique %>%
    {dplyr::filter(., !duplicated(kegg))}
g <- graph.data.frame(edge_list, directed = TRUE, vertices = vertex_list)
V(g)$kid <- V(g)$name
V(g)$name <- V(g)$label
g <- g - V(g)[igraph::degree(g) == 0]
rm(vertex_list)
rm(edge_list)
rm(g_init)
rm(g_nell)