T.fit function of maSigPro returns error message
0
0
Entering edit mode
hanna.g • 0
@95a206e5
Last seen 13 months ago
Germany

I was trying to recreate the code from the user guide (https://www.bioconductor.org/packages/devel/bioc/vignettes/maSigPro/inst/doc/maSigProUsersGuide.pdf) but for my own data. Everything goes fine until I reach the T.fit function. This does not happen for the example data set, so the problem must be with my data. I am not sure how or if my data structure differs from the example data set or what exactly causes the problem and how to solve it. Has anybody experienced similar problems? Hopefully, I provided enough information to understand the issue, at some places I had to leave out some of the output.

> CAM <- ifelse(samples_tf$Factor..Photosynthesis.mode. == "CAM", 1, 0)
> reC3 <- ifelse(samples_tf$Factor..Photosynthesis.mode. == "reC3", 1, 0)
> Time <- as.integer(samples_tf$Factor..timepoint.ZT.)
> CAM <- as.integer(CAM)
> reC3 <- as.integer(reC3)
> Replicate <- as.integer(c(1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 1, 1))
> edesign.drought <- matrix(c(Time, Replicate, CAM, reC3), ncol = 4, nrow=24)
> colnames(edesign.drought) <- c("Time", "Replicate", "CAM", "reC3")
> typeof(edesign.drought)
[1] "integer"
> class(edesign.drought)
[1] "matrix" "array" 
> Sample.Name.3 <- samples_tf$Sample.Name.3
> rownames(edesign.drought) <- Sample.Name.3
> selected_columns <- c("target_id", names(data.kallisto)[startsWith(names(data.kallisto), "DB_")])
> data.drought <- data.kallisto[, selected_columns]
> rownames(data.drought) <- data.drought$target_id
> data.drought <- data.drought[, -1]
> rownames(edesign.drought)
 [1] "DB_103_rna" "DB_111_rna" "DB_113_rna" "DB_115_rna" "DB_121_rna"
 [6] "DB_123_rna" "DB_125_rna" "DB_137_rna" "DB_139_rna" "DB_143_rna"
[11] "DB_161_rna" "DB_163_rna" "DB_165_rna" "DB_173_rna" "DB_175_rna"
[16] "DB_177_rna" "DB_185_rna" "DB_189_rna" "DB_191_rna" "DB_201_rna"
[21] "DB_203_rna" "DB_205_rna" "DB_97_rna"  "DB_99_rna" 
> colnames(edesign.drought)
[1] "Time"      "Replicate" "CAM"       "reC3"     
> rownames(data.drought)
   [1] "Tf_contig_001_000001" "Tf_contig_001_000002" "Tf_contig_001_000003"
   [4] "Tf_contig_001_000004" "Tf_contig_001_000005" "Tf_contig_001_000006"
   [7] "Tf_contig_001_000007" "Tf_contig_001_000008" "Tf_contig_001_000686"

 [ reached getOption("max.print") -- omitted 40306 entries ]
> colnames(data.drought)
 [1] "DB_103_rna" "DB_111_rna" "DB_113_rna" "DB_115_rna" "DB_121_rna"
 [6] "DB_123_rna" "DB_125_rna" "DB_137_rna" "DB_139_rna" "DB_143_rna"
[11] "DB_161_rna" "DB_163_rna" "DB_165_rna" "DB_173_rna" "DB_175_rna"
[16] "DB_177_rna" "DB_185_rna" "DB_189_rna" "DB_191_rna" "DB_201_rna"
[21] "DB_203_rna" "DB_205_rna" "DB_97_rna"  "DB_99_rna" 
> dim(edesign.drought)
[1] 24  4
> str(edesign.drought)
 int [1:24, 1:4] 6 12 12 12 18 18 18 24 24 24 ...
 - attr(*, "dimnames")=List of 2
  ..$ : chr [1:24] "DB_103_rna" "DB_111_rna" "DB_113_rna" "DB_115_rna" ...
  ..$ : chr [1:4] "Time" "Replicate" "CAM" "reC3"



> design.drought <- make.design.matrix(edesign.drought, degree = 2)
> design.drought$groups.vector
[1] "reC3vsCAM" "CAM"       "reC3vsCAM" "CAM"       "reC3vsCAM" "CAM"      
[7] "reC3vsCAM"

> # 3.2 finding significant genes
> fit_drought <- p.vector(data.drought, design.drought, Q = 0.01, MT.adjust = "BH", theta = 10)
[1] "fitting  gene 100 out of 33342"
[1] "fitting  gene 200 out of 33342"
[1] "fitting  gene 300 out of 33342"
[1] "fitting  gene 400 out of 33342"
[1] "fitting  gene 500 out of 33342"
[1] "fitting  gene 600 out of 33342"
[1] "fitting  gene 700 out of 33342"
[1] "fitting  gene 800 out of 33342"
[1] "fitting  gene 900 out of 33342"

[1] "fitting  gene 33300 out of 33342"
> fit_drought$i # returns the number of significant genes
[1] 20680
> fit_drought$alfa # gives p-value at the Q false discovery control level
NULL
> fit_drought$SELEC # is a matrix with the significant genes and their expression values
                      DB_103_rna  DB_111_rna  DB_113_rna  DB_115_rna
Tf_contig_001_000001 1.07416e+03 1.38325e+03 1.34822e+03 1.35860e+03
Tf_contig_001_000002 1.07416e+03 1.38325e+03 1.34822e+03 1.35860e+03
Tf_contig_001_000003 2.85941e+01 3.08301e+01 3.57738e+01 3.87580e+01
Tf_contig_001_000004 3.39362e+02 4.22439e+02 4.88476e+02 4.56101e+02
Tf_contig_001_000005 1.65506e+01 0.00000e+00 7.11780e+00 2.63698e+01

 [ reached getOption("max.print") -- omitted 20639 rows ]


> tstep.drought <- T.fit(fit_drought, step.method = "two.ways.backward", min.obs = 10, alfa = 0.05)
[1] "fitting  gene 100 out of 20680"
[1] "fitting  gene 200 out of 20680"
[1] "fitting  gene 300 out of 20680"
[1] "fitting  gene 400 out of 20680"
[1] "fitting  gene 500 out of 20680"
[1] "fitting  gene 600 out of 20680"
[1] "fitting  gene 700 out of 20680"
[1] "fitting  gene 800 out of 20680"
[1] "fitting  gene 900 out of 20680"
[1] "fitting  gene 1000 out of 20680"
[1] "fitting  gene 1100 out of 20680"
[1] "fitting  gene 1200 out of 20680"
[1] "fitting  gene 1300 out of 20680"
[1] "fitting  gene 1400 out of 20680"
[1] "fitting  gene 1500 out of 20680"
[1] "fitting  gene 1600 out of 20680"
[1] "fitting  gene 1700 out of 20680"
Error in colnames(OUT)[x] <- colnames(d)[pos] : 
  replacement has length zero
In addition: Warning messages:
1: In a == vari :
  longer object length is not a multiple of shorter object length
2: In a == vari :
  longer object length is not a multiple of shorter object length
3: In a == vari :
  longer object length is not a multiple of shorter object length
4: In a == vari :
  longer object length is not a multiple of shorter object length



sessionInfo( )

R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.5.2

Matrix products: default
BLAS:   /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib 
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib;  LAPACK version 3.11.0

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

time zone: Europe/Berlin
tzcode source: internal

attached base packages:
[1] stats4    stats     graphics  grDevices utils     datasets  methods  
[8] base     

other attached packages:
 [1] BiocManager_1.30.22         maSigPro_1.73.0            
 [3] EDASeq_2.35.0               ShortRead_1.59.0           
 [5] GenomicAlignments_1.37.0    SummarizedExperiment_1.31.1
 [7] MatrixGenerics_1.13.1       matrixStats_1.0.0          
 [9] Rsamtools_2.17.0            GenomicRanges_1.53.1       
[11] Biostrings_2.69.2           GenomeInfoDb_1.37.4        
[13] XVector_0.41.1              IRanges_2.35.2             
[15] S4Vectors_0.39.1            BiocParallel_1.35.4        
[17] Biobase_2.61.0              BiocGenerics_0.47.0        

loaded via a namespace (and not attached):
 [1] DBI_1.1.3               bitops_1.0-7            deldir_1.0-9           
 [4] biomaRt_2.57.1          rlang_1.1.1             magrittr_2.0.3         
 [7] compiler_4.3.0          RSQLite_2.3.1           GenomicFeatures_1.53.2 
[10] venn_1.11               png_0.1-8               vctrs_0.6.3            
[13] stringr_1.5.0           pkgconfig_2.0.3         crayon_1.5.2           
[16] fastmap_1.1.1           dbplyr_2.3.3            utf8_1.2.3             
[19] bit_4.0.5               zlibbioc_1.47.0         cachem_1.0.8           
[22] progress_1.2.2          blob_1.2.4              DelayedArray_0.27.10   
[25] jpeg_0.1-10             parallel_4.3.0          prettyunits_1.1.1      
[28] R6_2.5.1                stringi_1.7.12          RColorBrewer_1.1-3     
[31] rtracklayer_1.61.1      Rcpp_1.0.11             R.utils_2.12.2         
[34] Matrix_1.6-1            tidyselect_1.2.0        rstudioapi_0.15.0      
[37] abind_1.4-5             yaml_2.3.7              codetools_0.2-19       
[40] hwriter_1.3.2.1         curl_5.0.2              admisc_0.33            
[43] lattice_0.21-8          tibble_3.2.1            KEGGREST_1.41.0        
[46] BiocFileCache_2.9.1     xml2_1.3.5              mclust_6.0.0           
[49] pillar_1.9.0            filelock_1.0.2          generics_0.1.3         
[52] RCurl_1.98-1.12         hms_1.1.3               ggplot2_3.4.3          
[55] munsell_0.5.0           scales_1.2.1            glue_1.6.2             
[58] tools_4.3.0             interp_1.1-4            BiocIO_1.11.0          
[61] locfit_1.5-9.8          XML_3.99-0.14           grid_4.3.0             
[64] latticeExtra_0.6-30     AnnotationDbi_1.63.2    colorspace_2.1-0       
[67] GenomeInfoDbData_1.2.10 restfulr_0.0.15         cli_3.6.1              
[70] rappdirs_0.3.3          fansi_1.0.4             S4Arrays_1.1.6         
[73] dplyr_1.1.3             gtable_0.3.4            R.methodsS3_1.8.2      
[76] DESeq2_1.41.2           digest_0.6.33           aroma.light_3.31.1     
[79] SparseArray_1.1.12      rjson_0.2.21            memoise_2.0.1          
[82] R.oo_1.25.0             lifecycle_1.0.3         httr_1.4.7             
[85] MASS_7.3-60
maSigPro • 566 views
ADD COMMENT
0
Entering edit mode

Addition to the issue above: the T.fit function works only when choosing the step.method = "forward", for whatever reason. Maybe this clue can help solving this mystery.

ADD REPLY

Login before adding your answer.

Traffic: 959 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6