Entering edit mode
I was trying to recreate the code from the user guide (https://www.bioconductor.org/packages/devel/bioc/vignettes/maSigPro/inst/doc/maSigProUsersGuide.pdf) but for my own data. Everything goes fine until I reach the T.fit function. This does not happen for the example data set, so the problem must be with my data. I am not sure how or if my data structure differs from the example data set or what exactly causes the problem and how to solve it. Has anybody experienced similar problems? Hopefully, I provided enough information to understand the issue, at some places I had to leave out some of the output.
> CAM <- ifelse(samples_tf$Factor..Photosynthesis.mode. == "CAM", 1, 0)
> reC3 <- ifelse(samples_tf$Factor..Photosynthesis.mode. == "reC3", 1, 0)
> Time <- as.integer(samples_tf$Factor..timepoint.ZT.)
> CAM <- as.integer(CAM)
> reC3 <- as.integer(reC3)
> Replicate <- as.integer(c(1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, 8, 8, 8, 1, 1))
> edesign.drought <- matrix(c(Time, Replicate, CAM, reC3), ncol = 4, nrow=24)
> colnames(edesign.drought) <- c("Time", "Replicate", "CAM", "reC3")
> typeof(edesign.drought)
[1] "integer"
> class(edesign.drought)
[1] "matrix" "array"
> Sample.Name.3 <- samples_tf$Sample.Name.3
> rownames(edesign.drought) <- Sample.Name.3
> selected_columns <- c("target_id", names(data.kallisto)[startsWith(names(data.kallisto), "DB_")])
> data.drought <- data.kallisto[, selected_columns]
> rownames(data.drought) <- data.drought$target_id
> data.drought <- data.drought[, -1]
> rownames(edesign.drought)
[1] "DB_103_rna" "DB_111_rna" "DB_113_rna" "DB_115_rna" "DB_121_rna"
[6] "DB_123_rna" "DB_125_rna" "DB_137_rna" "DB_139_rna" "DB_143_rna"
[11] "DB_161_rna" "DB_163_rna" "DB_165_rna" "DB_173_rna" "DB_175_rna"
[16] "DB_177_rna" "DB_185_rna" "DB_189_rna" "DB_191_rna" "DB_201_rna"
[21] "DB_203_rna" "DB_205_rna" "DB_97_rna" "DB_99_rna"
> colnames(edesign.drought)
[1] "Time" "Replicate" "CAM" "reC3"
> rownames(data.drought)
[1] "Tf_contig_001_000001" "Tf_contig_001_000002" "Tf_contig_001_000003"
[4] "Tf_contig_001_000004" "Tf_contig_001_000005" "Tf_contig_001_000006"
[7] "Tf_contig_001_000007" "Tf_contig_001_000008" "Tf_contig_001_000686"
[ reached getOption("max.print") -- omitted 40306 entries ]
> colnames(data.drought)
[1] "DB_103_rna" "DB_111_rna" "DB_113_rna" "DB_115_rna" "DB_121_rna"
[6] "DB_123_rna" "DB_125_rna" "DB_137_rna" "DB_139_rna" "DB_143_rna"
[11] "DB_161_rna" "DB_163_rna" "DB_165_rna" "DB_173_rna" "DB_175_rna"
[16] "DB_177_rna" "DB_185_rna" "DB_189_rna" "DB_191_rna" "DB_201_rna"
[21] "DB_203_rna" "DB_205_rna" "DB_97_rna" "DB_99_rna"
> dim(edesign.drought)
[1] 24 4
> str(edesign.drought)
int [1:24, 1:4] 6 12 12 12 18 18 18 24 24 24 ...
- attr(*, "dimnames")=List of 2
..$ : chr [1:24] "DB_103_rna" "DB_111_rna" "DB_113_rna" "DB_115_rna" ...
..$ : chr [1:4] "Time" "Replicate" "CAM" "reC3"
> design.drought <- make.design.matrix(edesign.drought, degree = 2)
> design.drought$groups.vector
[1] "reC3vsCAM" "CAM" "reC3vsCAM" "CAM" "reC3vsCAM" "CAM"
[7] "reC3vsCAM"
> # 3.2 finding significant genes
> fit_drought <- p.vector(data.drought, design.drought, Q = 0.01, MT.adjust = "BH", theta = 10)
[1] "fitting gene 100 out of 33342"
[1] "fitting gene 200 out of 33342"
[1] "fitting gene 300 out of 33342"
[1] "fitting gene 400 out of 33342"
[1] "fitting gene 500 out of 33342"
[1] "fitting gene 600 out of 33342"
[1] "fitting gene 700 out of 33342"
[1] "fitting gene 800 out of 33342"
[1] "fitting gene 900 out of 33342"
[1] "fitting gene 33300 out of 33342"
> fit_drought$i # returns the number of significant genes
[1] 20680
> fit_drought$alfa # gives p-value at the Q false discovery control level
NULL
> fit_drought$SELEC # is a matrix with the significant genes and their expression values
DB_103_rna DB_111_rna DB_113_rna DB_115_rna
Tf_contig_001_000001 1.07416e+03 1.38325e+03 1.34822e+03 1.35860e+03
Tf_contig_001_000002 1.07416e+03 1.38325e+03 1.34822e+03 1.35860e+03
Tf_contig_001_000003 2.85941e+01 3.08301e+01 3.57738e+01 3.87580e+01
Tf_contig_001_000004 3.39362e+02 4.22439e+02 4.88476e+02 4.56101e+02
Tf_contig_001_000005 1.65506e+01 0.00000e+00 7.11780e+00 2.63698e+01
[ reached getOption("max.print") -- omitted 20639 rows ]
> tstep.drought <- T.fit(fit_drought, step.method = "two.ways.backward", min.obs = 10, alfa = 0.05)
[1] "fitting gene 100 out of 20680"
[1] "fitting gene 200 out of 20680"
[1] "fitting gene 300 out of 20680"
[1] "fitting gene 400 out of 20680"
[1] "fitting gene 500 out of 20680"
[1] "fitting gene 600 out of 20680"
[1] "fitting gene 700 out of 20680"
[1] "fitting gene 800 out of 20680"
[1] "fitting gene 900 out of 20680"
[1] "fitting gene 1000 out of 20680"
[1] "fitting gene 1100 out of 20680"
[1] "fitting gene 1200 out of 20680"
[1] "fitting gene 1300 out of 20680"
[1] "fitting gene 1400 out of 20680"
[1] "fitting gene 1500 out of 20680"
[1] "fitting gene 1600 out of 20680"
[1] "fitting gene 1700 out of 20680"
Error in colnames(OUT)[x] <- colnames(d)[pos] :
replacement has length zero
In addition: Warning messages:
1: In a == vari :
longer object length is not a multiple of shorter object length
2: In a == vari :
longer object length is not a multiple of shorter object length
3: In a == vari :
longer object length is not a multiple of shorter object length
4: In a == vari :
longer object length is not a multiple of shorter object length
sessionInfo( )
R version 4.3.0 (2023-04-21)
Platform: x86_64-apple-darwin20 (64-bit)
Running under: macOS Ventura 13.5.2
Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/4.3-x86_64/Resources/lib/libRlapack.dylib; LAPACK version 3.11.0
locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
time zone: Europe/Berlin
tzcode source: internal
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] BiocManager_1.30.22 maSigPro_1.73.0
[3] EDASeq_2.35.0 ShortRead_1.59.0
[5] GenomicAlignments_1.37.0 SummarizedExperiment_1.31.1
[7] MatrixGenerics_1.13.1 matrixStats_1.0.0
[9] Rsamtools_2.17.0 GenomicRanges_1.53.1
[11] Biostrings_2.69.2 GenomeInfoDb_1.37.4
[13] XVector_0.41.1 IRanges_2.35.2
[15] S4Vectors_0.39.1 BiocParallel_1.35.4
[17] Biobase_2.61.0 BiocGenerics_0.47.0
loaded via a namespace (and not attached):
[1] DBI_1.1.3 bitops_1.0-7 deldir_1.0-9
[4] biomaRt_2.57.1 rlang_1.1.1 magrittr_2.0.3
[7] compiler_4.3.0 RSQLite_2.3.1 GenomicFeatures_1.53.2
[10] venn_1.11 png_0.1-8 vctrs_0.6.3
[13] stringr_1.5.0 pkgconfig_2.0.3 crayon_1.5.2
[16] fastmap_1.1.1 dbplyr_2.3.3 utf8_1.2.3
[19] bit_4.0.5 zlibbioc_1.47.0 cachem_1.0.8
[22] progress_1.2.2 blob_1.2.4 DelayedArray_0.27.10
[25] jpeg_0.1-10 parallel_4.3.0 prettyunits_1.1.1
[28] R6_2.5.1 stringi_1.7.12 RColorBrewer_1.1-3
[31] rtracklayer_1.61.1 Rcpp_1.0.11 R.utils_2.12.2
[34] Matrix_1.6-1 tidyselect_1.2.0 rstudioapi_0.15.0
[37] abind_1.4-5 yaml_2.3.7 codetools_0.2-19
[40] hwriter_1.3.2.1 curl_5.0.2 admisc_0.33
[43] lattice_0.21-8 tibble_3.2.1 KEGGREST_1.41.0
[46] BiocFileCache_2.9.1 xml2_1.3.5 mclust_6.0.0
[49] pillar_1.9.0 filelock_1.0.2 generics_0.1.3
[52] RCurl_1.98-1.12 hms_1.1.3 ggplot2_3.4.3
[55] munsell_0.5.0 scales_1.2.1 glue_1.6.2
[58] tools_4.3.0 interp_1.1-4 BiocIO_1.11.0
[61] locfit_1.5-9.8 XML_3.99-0.14 grid_4.3.0
[64] latticeExtra_0.6-30 AnnotationDbi_1.63.2 colorspace_2.1-0
[67] GenomeInfoDbData_1.2.10 restfulr_0.0.15 cli_3.6.1
[70] rappdirs_0.3.3 fansi_1.0.4 S4Arrays_1.1.6
[73] dplyr_1.1.3 gtable_0.3.4 R.methodsS3_1.8.2
[76] DESeq2_1.41.2 digest_0.6.33 aroma.light_3.31.1
[79] SparseArray_1.1.12 rjson_0.2.21 memoise_2.0.1
[82] R.oo_1.25.0 lifecycle_1.0.3 httr_1.4.7
[85] MASS_7.3-60
Addition to the issue above: the T.fit function works only when choosing the step.method = "forward", for whatever reason. Maybe this clue can help solving this mystery.