Hello
I am doing a de novo study using Trinity and RStudio and I am very new to all of this. I have a problem with the output of goseq. I get many GO terms in each category instead of getting one go term per category which is what I see in every paper. I believe that it might have something to do with the geneID2GO mapping file that I have constructed using Trinotate. I paste my code and some results and I would be very grateful if someone could explain what is wrong. This is the first time I am trying to do this kind of thing.
Many thanks in advance.
Juan
Code in R:
dataGLA_final
dataGLA_final <- read.table(file=" dataGLA_final.txt", sep="\t", header = TRUE)
dataGLA_final as shown below has the gene ids, the length of each gene and the third column is a vector of 0/1: 1 gene is differentially express, 0 not
geneID |
Length |
vec01 |
TRINITY_DN10003_c0_g1 |
363 |
0 |
TRINITY_DN10006_c0_g1 |
862 |
1 |
TRINITY_DN10007_c0_g1 |
978 |
0 |
TRINITY_DN10009_c1_g1 |
520 |
0 |
TRINITY_DN10014_c1_g1 |
1042 |
1 |
TRINITY_DN10016_c0_g1 |
806 |
0 |
# weighing function
pwf=nullp( dataGLA_final[,3],bias.data = dataGLA_final[,2])
rownames(pwf) <- dataGLA_final[,1]
geneID2GO <- read.table(file="geneID2GO.txt", sep="\t", header = TRUE)
geneID2GO:
geneID |
GOID |
TRINITY_DN10003_c0_g1 |
"GO:0005575" "GO:0005730" "GO:0006139" "GO:0006364" "GO:0006396" "GO:0006725" "GO:0006807" "GO:0008150" "GO:0008152" "GO:0009987" "GO:0016070" "GO:0016072" "GO:0030529" "GO:0032991" "GO:0034470" "GO:0034641" "GO:0034660" "GO:0043170" "GO:0043226" "GO:0043228" "GO:0043229" "GO:0043232" "GO:0044237" "GO:0044238" "GO:0044260" "GO:0044422" "GO:0044424" "GO:0044428" "GO:0044446" "GO:0044464" "GO:0046483" "GO:0071704" "GO:0090304" "GO:1901360" "GO:1990904" |
TRINITY_DN10006_c0_g1 |
"GO:0003674" "GO:0003774" "GO:0003824" "GO:0005575" "GO:0005737" "GO:0005875" "GO:0005929" "GO:0008150" "GO:0009987" "GO:0016043" "GO:0016462" "GO:0016787" "GO:0016817" "GO:0016818" "GO:0017111" "GO:0030030" "GO:0030286" "GO:0031514" "GO:0032991" "GO:0042995" "GO:0043226" "GO:0043234" "GO:0044422" "GO:0044424" "GO:0044430" "GO:0044446" "GO:0044464" "GO:0044699" "GO:0044763" "GO:0071840" "GO:0120025" "GO:1902494" |
TRINITY_DN10007_c0_g1 |
"GO:0003674" "GO:0005488" "GO:0005509" "GO:0005575" "GO:0005886" "GO:0007155" "GO:0007156" "GO:0008150" "GO:0016020" "GO:0016021" "GO:0022610" "GO:0031224" "GO:0043167" "GO:0043169" "GO:0044425" "GO:0044464" "GO:0046872" "GO:0098609" "GO:0098742" |
TRINITY_DN10009_c1_g1 |
"GO:0005575" "GO:0005576" |
TRINITY_DN10014_c1_g1 |
"GO:0001510" "GO:0002128" "GO:0002181" "GO:0003674" "GO:0003824" "GO:0005575" "GO:0005737" "GO:0006139" "GO:0006396" "GO:0006399" "GO:0006400" "GO:0006412" "GO:0006518" "GO:0006725" "GO:0006807" "GO:0008033" "GO:0008150" "GO:0008152" "GO:0008168" "GO:0008173" "GO:0008175" "GO:0009058" "GO:0009059" "GO:0009451" "GO:0009987" "GO:0016070" "GO:0016740" "GO:0016741" "GO:0019538" "GO:0030488" "GO:0032259" "GO:0034470" "GO:0034641" "GO:0034645" "GO:0034660" "GO:0043043" "GO:0043170" "GO:0043412" "GO:0043414" "GO:0043603" "GO:0043604" "GO:0044237" "GO:0044238" "GO:0044249" "GO:0044260" "GO:0044267" "GO:0044271" "GO:0044424" "GO:0044464" "GO:0046483" "GO:0071704" "GO:0090304" "GO:1901360" "GO:1901564" "GO:1901566" "GO:1901576" |
TRINITY_DN10016_c0_g1 |
"GO:0000280" "GO:0003674" "GO:0003824" "GO:0004721" "GO:0004722" "GO:0004723" "GO:0005488" "GO:0005515" "GO:0005516" "GO:0005575" "GO:0005955" "GO:0006464" "GO:0006470" "GO:0006793" "GO:0006796" "GO:0006807" "GO:0006996" "GO:0007049" "GO:0007126" "GO:0007143" "GO:0007444" "GO:0008150" "GO:0008152" "GO:0008287" "GO:0009888" "GO:0009987" "GO:0016043" "GO:0016311" "GO:0016787" "GO:0016788" "GO:0016791" "GO:0019538" "GO:0022402" "GO:0022414" "GO:0030431" "GO:0032501" "GO:0032502" "GO:0032991" "GO:0033192" "GO:0035220" "GO:0035295" "GO:0036211" "GO:0042578" "GO:0043167" "GO:0043169" "GO:0043170" "GO:0043412" "GO:0044237" "GO:0044238" "GO:0044260" "GO:0044267" "GO:0044424" "GO:0044464" "GO:0044699" "GO:0044702" "GO:0044707" "GO:0044763" "GO:0044767" "GO:0046872" "GO:0048285" "GO:0048513" "GO:0048856" "GO:0060429" "GO:0071704" "GO:0071840" "GO:0140013" "GO:1901564" "GO:1902494" "GO:1903046" "GO:1903293" |
GO.wall=goseq(pwf, gene2cat = geneID2GO, test.cats=c("GO:CC", "GO:BP", "GO:MF"), method = "Wallenius", use_genes_without_cat=FALSE)
k <- as.data.frame(GO.wall) # enriched GOs
k$bh_adjust <- p.adjust(k$over_represented_pvalue,method="BH") #add adjusted p-values
enr <- subset(k, k$bh_adjust < 0.05) #get enriched GO categories
enr:
category |
over_represented_pvalue |
under_represented_pvalue |
numDEInCat |
numInCat |
bh_adjust |
|
|
|
|
|
|
"GO:0000166" "GO:0001882" "GO:0001883" "GO:0003674" "GO:0003824" "GO:0003924" "GO:0005198" "GO:0005200" "GO:0005488" "GO:0005525" "GO:0005575" "GO:0005737" "GO:0005874" "GO:0006461" "GO:0007017" "GO:0008150" "GO:0009987" "GO:0016043" "GO:0016462" "GO:0016787" "GO:0016817" "GO:0016818" "GO:0017076" "GO:0017111" "GO:0019001" "GO:0022607" "GO:0032549" "GO:0032550" "GO:0032553" "GO:0032555" "GO:0032561" "GO:0034622" "GO:0035639" "GO:0036094" "GO:0043167" "GO:0043168" "GO:0043623" "GO:0043933" "GO:0044422" "GO:0044424" "GO:0044430" "GO:0044446" "GO:0044464" "GO:0051258" "GO:0065003" "GO:0071822" "GO:0071840" "GO:0097159" "GO:0097367" "GO:0099080" "GO:0099081" "GO:0099512" "GO:0099513" "GO:1901265" "GO:1901363" |
4.17E-07 |
1 |
9 |
30 |
0.0022099 |
"GO:0003674" "GO:0003824" "GO:0005488" "GO:0008168" "GO:0016740" "GO:0016741" "GO:0043167" "GO:0043169" "GO:0046872" |
1.72E-05 |
0.9999991 |
6 |
16 |
0.0454186
|