Question

Cannabis and KEGGgraph

0

Entering edit mode

fernanda.backsouza ▴ 30

@68c324c6

Last seen 11 months ago

Brazil

Hey.

I've been working with comparative genomics between hop and cannabis, and I trying to make some graphs using KEGGgraph. All my tries while using all types of plants doens't work, but I saw that package supports plants in general. What I doing wrong?

My script at least:

library(KEGGgraph)

tmp <- tempfile()

retrieveKGML("00900", organism="csav", destfile=tmp, method="auto", quiet=TRUE)

mapkKGML <- system.file("extdata", "csav00900.xml", package = "KEGGgraph")

mapkG <- parseKGML2Graph(mapkKGML,expandGenes=TRUE)

mapkG

I'm sure this rout exists on KEGG.

Thank you, Fernanda.

```

KEGGREST GenomicDistributionsData KEGGgraph KEGG • 661 views

ADD COMMENT • link updated 11 months ago by Guido Hooiveld ★ 4.1k • written 11 months ago by fernanda.backsouza ▴ 30

2

Entering edit mode

To access KEGG data, the developer of OmnipathR proposed an alternative way in the post below this one, but regarding your question on KEGGgraph: I would download the KGML file to a 'normal' folder first (and thus not use a TMP folder/file, nor save it in a R library system file [location])! Then load that downloaded file, and continue with the things you would like to do.

> ## load library
> library(KEGGgraph)
> 
> ## IMPORTANT: set working directory
> setwd("xx:\\MyFolder")
> 
> ## download KGML file
> retrieveKGML("00900", organism="csav", destfile="csav00900.kgml", method="auto")
trying URL 'https://rest.kegg.jp/get/csav00900/kgml'
downloaded 48 KB

> 
> ## load downloaded file into R
> toyGraph <- parseKGML2Graph("csav00900.kgml", genesOnly=FALSE)
> 
> ## check
> toyGraph
A graphNEL graph with directed edges
Number of Nodes = 129 
Number of Edges = 273 
> 
> nodes(toyGraph)
  [1] "cpd:C05859"     "cpd:C06081"     "path:csav00010" "path:csav00900"
  [5] "cpd:C00022"     "cpd:C00024"     "cpd:C00332"     "cpd:C00118"    
  [9] "cpd:C11434"     "cpd:C11435"     "cpd:C11436"     "cpd:C11453"    
 [13] "cpd:C11811"     "csav:115712335" "csav:115722576" "csav:115725622"
 [17] "csav:115709371" "csav:115709372" "csav:115714928" "csav:115721136"
 [21] "csav:115716460" "csav:115720893" "csav:115719471" "csav:115699135"
 [25] "csav:115707261" "csav:115716237" "ko:K00054"      "csav:115702836"
 [29] "csav:115711722" "cpd:C00356"     "cpd:C00418"     "cpd:C01107"    
 [33] "cpd:C01143"     "cpd:C02321"     "cpd:C00129"     "cpd:C00235"    
 [37] "cpd:C16521"     "cpd:C00341"     "cpd:C11437"     "cpd:C16826"    
 [41] "cpd:C04574"     "cpd:C17432"     "cpd:C00448"     "cpd:C04216"    
 [45] "cpd:C04146"     "cpd:C04145"     "cpd:C01230"     "cpd:C00353"    
 [49] "cpd:C05427"     "ko:K12742"      "csav:115703163" "ko:K01823"     
 [53] "csav:115705753" "csav:115716624" "csav:115697919" "csav:115700182"
 [57] "csav:115707243" "csav:115714435" "csav:115721984" "csav:115725388"
 [61] "csav:115725517" "ko:K12503"      "ko:K00806"      "path:csav00902"
 [65] "path:csav00909" "path:csav00906" "path:csav00904" "path:csav00130"
 [69] "csav:115709045" "csav:115718586" "ko:K00805"      "ko:K24873"     
 [73] "csav:115698215" "csav:115702614" "csav:115719484" "csav:115724518"
 [77] "ko:K12504"      "ko:K12505"      "path:csav00510" "ko:K02523"     
 [81] "path:csav00908" "ko:K05355"      "ko:K21274"      "ko:K21275"     
 [85] "ko:K21268"      "csav:115706724" "ko:K14215"      "cpd:C18321"    
 [89] "ko:K21273"      "ko:K15887"      "ko:K15888"      "csav:115701154"
 [93] "csav:115722947" "csav:115708811" "csav:115725416" "csav:115700498"
 [97] "csav:115701416" "csav:115697848" "ko:K15890"      "csav:115719607"
[101] "ko:K15793"      "cpd:C19852"     "cpd:C19853"     "cpd:C04506"    
[105] "cpd:C19760"     "cpd:C19691"     "cpd:C03461"     "cpd:C01126"    
[109] "cpd:C04748"     "cpd:C20120"     "csav:115706415" "cpd:C20121"    
[113] "path:csav00100" "ko:K17942"      "cpd:C20345"     "csav:115698781"
[117] "path:map00403"  "ko:K18689"      "cpd:C20847"     "ko:K18690"     
[121] "cpd:C20848"     "csav:115695988" "ko:K22813"      "cpd:C22453"    
[125] "ko:K25518"      "ko:K03186"      "ko:K25517"      "ko:K09128"     
[129] "path:csav00270"
> 
> 
> ## repeat, but with genesOnly=TRUE
> toyGraph2 <- parseKGML2Graph("csav00900.kgml", genesOnly=TRUE)
> 
> nodes(toyGraph2)
 [1] "csav:115712335" "csav:115722576" "csav:115725622" "csav:115709371"
 [5] "csav:115709372" "csav:115714928" "csav:115721136" "csav:115716460"
 [9] "csav:115720893" "csav:115719471" "csav:115699135" "csav:115707261"
[13] "csav:115716237" "csav:115702836" "csav:115711722" "csav:115703163"
[17] "csav:115705753" "csav:115716624" "csav:115697919" "csav:115700182"
[21] "csav:115707243" "csav:115714435" "csav:115721984" "csav:115725388"
[25] "csav:115725517" "csav:115709045" "csav:115718586" "csav:115698215"
[29] "csav:115702614" "csav:115719484" "csav:115724518" "csav:115706724"
[33] "csav:115701154" "csav:115722947" "csav:115708811" "csav:115725416"
[37] "csav:115700498" "csav:115701416" "csav:115697848" "csav:115719607"
[41] "csav:115706415" "csav:115698781" "csav:115695988"
> 
> plot(toyGraph2)
> 
>

enter image description here

ADD REPLY • link 11 months ago Guido Hooiveld ★ 4.1k

score 2 · Answer 1 · 2024-04-19

Hello, I'm a developer of the OmnipathR package, and I wrote a function to access certain KEGG data:

library(OmnipathR)

csav00900 <- kegg_pathway_download('csav00900', process = FALSE)

csav00900

$entries
# A tibble: 113 × 3
   kgml_id kegg_id        genesymbol                           
   <chr>   <chr>          <chr>                                
 1 65      cpd:C05859     C05859                               
 2 66      cpd:C06081     C06081                               
 3 67      path:csav00010 Glycolysis / Gluconeogenesis         
 4 68      path:csav00900 TITLE:Terpenoid backbone biosynthesis
 5 69      cpd:C00022     C00022                               
 6 70      cpd:C00024     C00024                               
 7 71      cpd:C00332     C00332                               
 8 72      cpd:C00118     C00118                               
 9 73      cpd:C11434     C11434                               
10 74      cpd:C11435     C11435                               
#  103 more rows
#  Use `print(n = ...)` to see more rows

$relations
# A tibble: 88 × 6
   source target type    effect   arrow relation_id 
   <chr>  <chr>  <chr>   <chr>    <chr> <chr>       
 1 67     85     maplink compound 70    csav00900:1 
 2 85     86     ECrel   compound 71    csav00900:2 
 3 67     86     maplink compound 70    csav00900:3 
 4 86     89     ECrel   compound 90    csav00900:4 
 5 89     84     ECrel   compound 91    csav00900:5 
 6 84     115    ECrel   compound 92    csav00900:6 
 7 115    114    ECrel   compound 93    csav00900:7 
 8 78     79     ECrel   compound 99    csav00900:8 
 9 67     78     maplink compound 72    csav00900:9 
10 79     80     ECrel   compound 73    csav00900:10
#  78 more rows
#  Use `print(n = ...)` to see more rows

I also recommend to install OmnipathR directly from github, because this is a large package where we release updates and bugfixes more often and faster than we manage to publish it here in BioconductoR. Though the BioC 3.19 version also should work fine.

library(remotes)
install_github('saezlab/OmnipathR')

I haven't checked if the data returned is alright and suitable for use, I'm not familiar with plants. I know if process = TRUE, an empty data frame is returned, probably because we attempt to translate identifiers and apparently for C. sativa our ID translation based on UniProt doesn't work; or maybe because we are not able to correctly translate metabolite IDs? If you see what should be translated here and how, please let me know.

There is also this function:

kegg_info('csav00900')
$id
[1] "csav00900"

$name
[1] "Terpenoid backbone biosynthesis - Cannabis sativa (hemp)"

$desc
[1] "Terpenoids, also known as isoprenoids, are a large class of natural products consisting of isoprene (C5) units. There are two biosynthetic pathways, the mevalonate pathway [MD:M00095] and the non-mevalonate pathway or the MEP/DOXP pathway [MD:M00096], for the terpenoid building blocks: isopentenyl diphosphate (IPP) and dimethylallyl diphosphate (DMAPP). The action of prenyltransferases then generates higher-order building blocks: geranyl diphosphate (GPP), farsenyl diphosphate (FPP), and geranylgeranyl diphosphate (GGPP), which are the precursors of monoterpenoids (C10), sesquiterpenoids (C15), and diterpenoids (C20), respectively. Condensation of these building blocks gives rise to the precursors of sterols (C30) and carotenoids (C40). The MEP/DOXP pathway is absent in higher animals and fungi, but in green plants the MEP/DOXP and mevalonate pathways co-exist in separate cellular compartments. The MEP/DOXP pathway, operating in the plastids, is responsible for the formation of essential oil monoterpenes and linalyl acetate, some sesquiterpenes, diterpenes, and carotenoids and phytol. The mevalonate pathway, operating in the cytosol, gives rise to triterpenes, sterols, and most sesquiterpenes."

$pubmed
[1] "12777052" "16262699" "9858571"  "24375100"

$diseases
NULL

$rel_pathways
 [1] "Glycolysis / Gluconeogenesis"                        "Steroid biosynthesis"                                "Ubiquinone and other terpenoid-quinone biosynthesis" "Cysteine and methionine metabolism"
 [5] "N-Glycan biosynthesis"                               "Monoterpenoid biosynthesis"                          "Diterpenoid biosynthesis"                            "Carotenoid biosynthesis"
 [9] "Zeatin biosynthesis"                                 "Sesquiterpenoid and triterpenoid biosynthesis"

$module
NULL

Meanwhile, I realized most of the IDs in the genesymbol column are KEGG compound IDs, for example C0589. Atm there is no function in OmnipathR that is able to translate these to other ID, such as PubChem. A simple but slow solution:

library(magrittr)
library(purrr)
library(stringr)
library(rvest)
library(dplyr)

dbget_url <- 'https://www.genome.jp/dbget-bin/www_bget?compound+%s'

compound_kegg2pubchem <- function(kegg_cid) {
    kegg_cid %>%
    {`if`(
        str_detect(., '^C\\d+$'),
        sprintf(dbget_url, .) %>%
        read_html %>%
        html_elements('table.w1') %>%
        keep(~str_detect(html_text2(.x), 'PubChem')) %>%
        html_element('a') %>%
        html_text2 %>%
        keep(~nchar(.x) > 0L),
        .
    )}
}

csav00900 <- kegg_pathway_download('csav00900', process = FALSE)
csav00900$entries %<>% mutate(pubchem = map_chr(genesymbol, compound_kegg2pubchem))