What is the difference between:
A) running voom() + duplicateCorrelation() once or;
B) running voom() + duplicateCorrelation() + voom() + duplicateCorrelation(), twice each?
I'm getting more shrunken log2FC (shrunken towards 0) when running case B (see code below).
About the experiment
I have an unbalanced design in which I need to make both between-patient comparison (Form of disease) and within-patient comparison (factor: no. months receveing treat. dose, repeated measurements).
This is my formula and design matrix:
Code
design <- model.matrix(~libSize + Class + Sex + Doses, data = pdat)
design <- structure(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0), .Dim = c(61L, 10L), .Dimnames = list(c("1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "11", "12", "13", "14", "15", "16", "17", "18", "19", "20", "21", "22", "23", "24", "25", "26", "27", "28", "29", "30", "31", "32", "33", "34", "35", "36", "37", "38", "39", "40", "41", "42", "43", "44", "45", "46", "47", "48", "49", "50", "51", "52", "53", "54", "55", "56", "57", "58", "59", "60", "61"), c("(Intercept)", "libSize(17,32]", "ClassLL", "SexM", "Doses3", "Doses4", "Doses6", "Doses9", "Doses10", "Doses12" )), assign = c(0L, 1L, 2L, 3L, 4L, 4L, 4L, 4L, 4L, 4L), contrasts = list( libSize = "contr.treatment", Class = "contr.treatment", Sex = "contr.treatment", Doses = "contr.treatment"))
Case A)
y <- DGEList(txi$counts) y <- y[filterByExpr(y), , keep.lib.sizes = FALSE] y <- calcNormFactors(y) design <- model.matrix(~libSize + Class + Sex + Doses, data = pdat) v <- voom(y, design, plot = TRUE, normalize.method = "quantile") corfit <- duplicateCorrelation(v, design, block = pdat$Patient) fit <- lmFit(v, design, block = pdat$Patient, correlation = corfit$con) fit2 <- eBayes(fit, robust = TRUE) res <- topTable(fit2, coef = 3, number = Inf, adjust.method = "BH", confint = TRUE) hist(res$P.Value) summary(decideTests(fit2, adjust.method = "fdr", p.value = 0.1, lfc = 1))
case B)
y <- DGEList(txi$counts) y <- y[filterByExpr(y), , keep.lib.sizes = FALSE] y <- calcNormFactors(y) design <- model.matrix(~libSize + Class + Sex + Doses, data = pdat) v <- voom(y, design, plot = TRUE, normalize.method = "quantile") corfit <- duplicateCorrelation(v, design, block = pdat$Patient) v <- voom(v, design, plot = TRUE, block = pdat$Patient, correlation = corfit$consensus.correlation) corfit <- duplicateCorrelation(v, design, block = pdat$Patient) fit <- lmFit(v, design, block = pdat$Patient, correlation = corfit$con) fit2 <- eBayes(fit, robust = TRUE) res <- topTable(fit2, coef = 3, number = Inf, adjust.method = "BH", confint = TRUE) hist(res$P.Value) summary(decideTests(fit2, adjust.method = "fdr", p.value = 0.1, lfc = 1))
Number of DE according to:
summary(decideTests(fit2, adjust.method = "fdr", p.value = 0.1, lfc = 1))
A)
(Intercept) libSize(17,32] ClassLL SexM Doses3 Doses4 Doses6 Down 0 37 9 14 0 1 1 NotSig 800 14275 14298 14271 14235 14318 14126 Up 13519 7 12 34 84 0 192 Doses9 Doses10 Doses12 Down 7 115 1 NotSig 14309 13982 14318 Up 3 222 0
B)
(Intercept) libSize(17,32] ClassLL SexM Doses3 Doses4 Doses6 Down 0 0 1 1 1 0 0 NotSig 0 14242 14241 14235 14239 14241 14235 Up 14242 0 0 6 2 1 7 Doses9 Doses10 Doses12 Down 10 10 0 NotSig 14213 14197 14242 Up 19 35 0
Thank you.
Regards,
Thyago
------------------------------ > sessionInfo() R version 3.5.1 (2018-07-02) Platform: x86_64-pc-linux-gnu (64-bit) Running under: Ubuntu 18.04.1 LTS Matrix products: default BLAS: /usr/lib/x86_64-linux-gnu/blas/libblas.so.3.7.1 LAPACK: /usr/lib/x86_64-linux-gnu/lapack/liblapack.so.3.7.1 locale: [1] LC_CTYPE=pt_BR.UTF-8 LC_NUMERIC=C [3] LC_TIME=pt_BR.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=pt_BR.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=pt_BR.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] parallel stats4 stats graphics grDevices utils datasets [8] methods base other attached packages: [1] bindrcpp_0.2.2 pheatmap_1.0.10 [3] WriteXLS_4.0.0 cowplot_0.9.3 [5] ggplot2_3.1.0 dplyr_0.7.7 [7] DESeq2_1.20.0 SummarizedExperiment_1.10.1 [9] DelayedArray_0.6.6 BiocParallel_1.14.2 [11] matrixStats_0.54.0 Biobase_2.40.0 [13] GenomicRanges_1.32.7 GenomeInfoDb_1.16.0 [15] IRanges_2.14.12 S4Vectors_0.18.3 [17] BiocGenerics_0.26.0 readxl_1.1.0 [19] edgeR_3.22.5 limma_3.36.5 loaded via a namespace (and not attached): [1] bit64_0.9-7 splines_3.5.1 Formula_1.2-3 [4] assertthat_0.2.0 statmod_1.4.30 latticeExtra_0.6-28 [7] blob_1.1.1 GenomeInfoDbData_1.1.0 cellranger_1.1.0 [10] yaml_2.2.0 pillar_1.3.0 RSQLite_2.1.1 [13] backports_1.1.2 lattice_0.20-35 glue_1.3.0 [16] digest_0.6.18 RColorBrewer_1.1-2 XVector_0.20.0 [19] checkmate_1.8.5 colorspace_1.3-2 htmltools_0.3.6 [22] Matrix_1.2-14 plyr_1.8.4 XML_3.98-1.16 [25] pkgconfig_2.0.2 genefilter_1.62.0 zlibbioc_1.26.0 [28] purrr_0.2.5 xtable_1.8-3 scales_1.0.0 [31] htmlTable_1.12 tibble_1.4.2 annotate_1.58.0 [34] withr_2.1.2 nnet_7.3-12 lazyeval_0.2.1 [37] cli_1.0.1 survival_2.42-6 magrittr_1.5 [40] crayon_1.3.4 memoise_1.1.0 fansi_0.4.0 [43] foreign_0.8-71 tools_3.5.1 data.table_1.11.8 [46] stringr_1.3.1 munsell_0.5.0 locfit_1.5-9.1 [49] cluster_2.0.7-1 AnnotationDbi_1.42.1 compiler_3.5.1 [52] rlang_0.3.0.1 grid_3.5.1 RCurl_1.95-4.11 [55] rstudioapi_0.8 htmlwidgets_1.3 bitops_1.0-6 [58] base64enc_0.1-3 gtable_0.2.0 DBI_1.0.0 [61] R6_2.3.0 gridExtra_2.3 knitr_1.20 [64] utf8_1.1.4 bit_1.1-14 bindr_0.1.1 [67] Hmisc_4.1-1 stringi_1.2.4 Rcpp_0.12.19 [70] geneplotter_1.58.0 rpart_4.1-13 acepack_1.4.1 [73] tidyselect_0.2.5
Thank you, Aaron.