EGSEA - failing to write report
1
0
Entering edit mode
mforde84 ▴ 20
@mforde84-12150
Last seen 7.3 years ago

Hi,

I'm having some difficulty with the ESGEA package. The esgea() command is unable to write it's report to disk. I've tried on three separate computers with and without sudoer privledges and I get the same error.

> gsa <- egsea(voom.results=dge,
+  contrasts=contrasts,
+  gs.annots=gene_idx,
+  baseGSEAs=egsea.base(),
+  egsea.dir="~/Desktop/custom_egsea",
+  sort.by="avg.rank",
+  num.threads=2,
+  report=TRUE)
[1] "EGSEA analysis has started"
[1] "Log fold changes are estimated using limma package ... "
[1] "EGSEA is running on the provided data and custom gene sets"
.....camera*...roast*..safe*....gage*...plage*..padog*....zscore*...gsva*....globaltest*...fry*ssgsea*...ora*
[1] "The top gene sets for contrast groups1 - (groups2 + groups3)/2 are:"
                              ID         p.adj
IMMUNE_ESTIMATE          custom2 3.888136e-184
PD1_REACTOME            custom65  1.353047e-69
IMMUNE_RESP_GO_BP       custom59 5.924559e-150
JAK_STAT_KEGG           custom32  2.778769e-46
TGFB_1                  custom10  3.449230e-39
STROMAL_ESTIMATE         custom1 7.961888e-157
MATRIX_REMODEL_REACTOME custom13 6.235033e-111
TGFB_2                  custom11  3.954068e-45
KEGG_CELL_CYCLE         custom83  4.727950e-40
IMMUNE_MDSC_CERWENKA    custom80  1.049228e-34
Writing out the top-ranked gene sets for each contrast ..CUSTOM gene sets
Error in file(file, ifelse(append, "a", "w")) :
  cannot open the connection
> sessionInfo()

R version 3.3.2 (2016-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 16.04.1 LTS

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8   
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets
[8] methods   base     

other attached packages:
 [1] EGSEA_1.2.0          pathview_1.14.0      topGO_2.26.0        
 [4] SparseM_1.7          GO.db_3.4.0          gage_2.24.0         
 [7] edgeR_3.16.5         rlecuyer_0.3-4       snow_0.4-2          
[10] GSVAdata_1.10.0      hgu95a.db_3.2.3      org.Hs.eg.db_3.4.0  
[13] GSEABase_1.36.0      graph_1.52.0         annotate_1.52.1     
[16] XML_3.98-1.5         AnnotationDbi_1.36.2 IRanges_2.8.1       
[19] S4Vectors_0.12.1     Biobase_2.34.0       BiocGenerics_0.20.0
[22] GSVA_1.22.4          limma_3.30.9        

loaded via a namespace (and not attached):
 [1] httr_1.2.1               org.Rn.eg.db_3.4.0       splines_3.3.2           
 [4] foreach_1.4.3            gtools_3.5.0             hgu133a.db_3.2.3        
 [7] assertthat_0.1           HTMLUtils_0.1.7          doRNG_1.6               
[10] globaltest_5.28.0        RSQLite_1.1-2            lattice_0.20-34         
[13] digest_0.6.12            RColorBrewer_1.1-2       XVector_0.14.0          
[16] R2HTML_2.3.2             EGSEAdata_1.2.0          PADOG_1.16.0            
[19] colorspace_1.3-2         Matrix_1.2-7.1           plyr_1.8.4              
[22] safe_3.14.0              org.Mm.eg.db_3.4.0       zlibbioc_1.20.0         
[25] xtable_1.8-2             KEGG.db_3.2.3            scales_0.4.1            
[28] gdata_2.17.0             KEGGdzPathwaysGEO_1.12.0 tibble_1.2              
[31] KEGGREST_1.14.0          pkgmaker_0.22            ggplot2_2.2.1           
[34] lazyeval_0.2.0           survival_2.40-1          magrittr_1.5            
[37] memoise_1.0.0            KEGGgraph_1.32.0         nlme_3.1-128            
[40] gplots_3.0.1             GSA_1.03                 hwriter_1.3.2           
[43] tools_3.3.2              registry_0.3             matrixStats_0.51.0      
[46] stringr_1.1.0            munsell_0.4.3            locfit_1.5-9.1          
[49] rngtools_1.2.4           Biostrings_2.42.1        caTools_1.17.1          
[52] grid_3.3.2               RCurl_1.95-4.8           iterators_1.0.8         
[55] bitops_1.0-6             gtable_0.2.0             codetools_0.2-15        
[58] DBI_0.5-1                R6_2.2.0                 hgu133plus2.db_3.2.3    
[61] metap_0.8                KernSmooth_2.23-15       Rgraphviz_2.18.0        
[64] stringi_1.1.2            Rcpp_0.12.9              png_0.1-7
EGSEA write • 2.0k views
ADD COMMENT
0
Entering edit mode

I was thinking that maybe it has something to do with the contrasts I'm setting up (in bold). I'm going to trying forming the contrasts without white space and see if this helps.

#pipeline for RNAseq GSVA analysis

##install libraries
#source("https://bioconductor.org/biocLite.R")
#biocLite("GSVA")
##

#load libraries
library(limma)
library(EGSEA)
library(org.Hs.eg.db)
library(GSEABase)
library(snow)
library(rlecuyer)
library(edgeR)
 
##make_unique(df)
#usage: analysis_ready_data <- make_unique(data_frame)
#converts ensembl to entrez id and collapses redundant ids by greatest IQR
make_unique <- function(input.data) {
 #annotate entrez id
 input.data <- data.frame(cbind(input.data, entrez=as.numeric(mapIds(org.Hs.eg.db, keys=gsub("\\..*","",rownames(input.data)), column="ENTREZID", keytype="ENSEMBL",multiVals="first"))))
 #remove NA
 input.data <- input.data[!is.na(input.data$entrez),]
 #parse duplicated entrez id
 id.dup <- input.data[duplicated(input.data$entrez) | duplicated(input.data$entrez, fromLast = TRUE),ncol(input.data)]
 data.dup <- as.matrix(input.data[duplicated(input.data$entrez) | duplicated(input.data$entrez, fromLast = TRUE),])
 ezid.dup <- unique(id.dup)
 data.unique <- input.data[!input.data$entrez %in% id.dup,]
 rownames(data.unique) <- data.unique$entrez
 data.unique$entrez = NULL
 #assess IQR for duplicates
 data.dup2 <- lapply(ezid.dup, function(x) {
  expr <- data.dup[id.dup == x,]
  if(is.matrix(expr)){
   sd.values <- apply(expr[,1:ncol(input.data)-1], 1, sd)
   mean.values <- apply (expr[,1:ncol(input.data)-1], 1, mean)
   CV.values <- sd.values/mean.values
   CV.values <- gsub("NaN","0",CV.values)
   expr <- expr[which(CV.values == max(CV.values))[[1]],]
  } else {
   expr
  }
 })
 #merge data
 merger <- data.frame(do.call(rbind, data.dup2))
 rownames(merger) <- merger$entrez
 merger$entrez = NULL
 eset <- as.matrix(rbind(data.unique, merger))
 return(eset)
}

setwd("~/Desktop")
#load RNAseq sets
counts.gene <- get(load("CRC_93_samples_RNASeq_expression_data.RData")[1])
counts.gene = make_unique(counts.gene)

#load omics
ftable <- read.delim("crc-omics.csv", header=TRUE,sep="\t")
factor.table <- read.delim("crc-omics.csv", header=TRUE,sep="\t")
groups <- factor(factor.table$group)
institution <- factor(factor.table$Institution)
lane <- factor(factor.table$Lane)
group.contrasts <- model.matrix(~0 + groups + institution + lane)
contrasts <- makeContrasts(
groups1 - (groups2 + groups3) / 2,
groups2 - (groups1 + groups3) / 2,
groups3 - (groups1 + groups2) / 2, levels=group.contrasts)

#input custom gene sets
gmt <- read.csv("geneset.gmt.csv",header=FALSE, row.names=1, sep="\t")
gmt$V2=NULL
gset=NULL
for (i in 1:dim(gmt)[[1]]){
 hold=NULL
 hold <- list(gmt[i,gmt[i,]!=""])
 hold <- as.character(mapIds(org.Hs.eg.db,
  keys=as.character(as.matrix(hold[[1]])),
  column="ENTREZID",
  keytype="SYMBOL",
  multiVals="first"))
 hold <- hold[!is.na(hold)]
 gset[[i]] <- hold
}
names(gset) <- rownames(gmt)

#Lei's method for voom generation
y <- DGEList(counts = counts.gene, group = ftable$factors)
table(rowSums(y$counts == 0) == ncol(counts.gene))
y <- y[!rowSums(y$counts == 0) == ncol(counts.gene),]
y2 <- calcNormFactors(y, method = "TMM")
logCPM <- cpm(y2, log=TRUE)
min.sample.size <- min(as.vector(table(ftable$factors)))
keep <- rowSums(cpm(y) > 1) >= min.sample.size
y2 <- y[keep, keep.lib.sizes=FALSE]
dge <- voomWithQualityWeights(y2,
 design=group.contrasts,
 normalization="quantile",
 plot=FALSE)
 
#generate custom geneset for analysis
gene_idx <- buildCustomIdx(entrezIDs=rownames(dge$E),
 gset,
 label="custom",
 name="Consensus_gene_sets",
 species="Human",
 min.size=1)

#egsea analysis
gsa <- egsea(voom.results=dge,
 contrasts=contrasts,
 gs.annots=gene_idx,
 baseGSEAs=egsea.base(),
 egsea.dir="~/Desktop/custom_egsea",
 sort.by="avg.rank",
 num.threads=2,
 report=TRUE)

 

ADD REPLY
2
Entering edit mode
@monther-alhamdoosh-10001
Last seen 5.4 years ago
Australia/Melbourne/CSL Limited

Hi,

Please rename your contrasts and remove the "/" character. I think this is the cause of your problem.

Cheers,

Monther 

ADD COMMENT
0
Entering edit mode

Yea, I figured this was the problem. However how do I remove the / from the contrast, without losing the contrast? e.g., grp1-(grp2+grp3) is not the same as grp1-(grp2+grp3)/2
 

ADD REPLY
0
Entering edit mode

You just need to the remove it from the column names of the contrast matrix as follows

colnames(contrasts) = c("contrast1", "contrast2", "contrast3")

ADD REPLY
0
Entering edit mode

Ahhh, ok let me try real quick.

ADD REPLY
0
Entering edit mode

Removing the forward slash doesn't help either.

> contrasts <- makeContrasts(
+ groups1-(groups2+groups3),
+ groups2-(groups1+groups3),
+ groups3-(groups1+groups2), levels=group.contrasts)
> gsa <- egsea(voom.results=dge,
+  contrasts=contrasts,
+  gs.annots=gene_idx,
+  baseGSEAs=egsea.base(),
+  egsea.dir="~/Desktop/custom_egsea",
+  sort.by="avg.rank",
+  num.threads=2,
+  report=TRUE)
[1] "EGSEA analysis has started"
[1] "Log fold changes are estimated using limma package ... "
[1] "EGSEA is running on the provided data and custom gene sets"
The EGSEA results have been loaded from
~/Desktop/custom_egsea/custom-fisher-egsea-results.rda
If you want to re-run the EGSEA test, please remove this
file or change the egsea.dir value.
[1] "The top gene sets for contrast groups1 - (groups2 + groups3)/2 are:"
                              ID         p.adj
IMMUNE_ESTIMATE          custom2 3.888136e-184
PD1_REACTOME            custom65  1.353047e-69
IMMUNE_RESP_GO_BP       custom59 5.924559e-150
JAK_STAT_KEGG           custom32  2.778769e-46
TGFB_1                  custom10  3.449230e-39
STROMAL_ESTIMATE         custom1 7.961888e-157
MATRIX_REMODEL_REACTOME custom13 6.235033e-111
TGFB_2                  custom11  3.954068e-45
KEGG_CELL_CYCLE         custom83  4.727950e-40
IMMUNE_MDSC_CERWENKA    custom80  1.049228e-34
Writing out the top-ranked gene sets for each contrast ..CUSTOM gene sets
Error in file(file, ifelse(append, "a", "w")) :
  cannot open the connection
ADD REPLY
0
Entering edit mode

Sorry forgot to mention. You should remove brackets as well from the column names. The reason behind is that EGSEA uses the contrast names as part of the file names. So, the contrast name should not have invalid characters. Please try renaming your contrast names following my previous reply. 

ADD REPLY

Login before adding your answer.

Traffic: 327 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6