Hello. I am running deseq2 to do differential expression analysis on aggregated scRNAseq data. I know that the lfcSE is supposed to reflect the log2FC value, but be shrunk towards the fit line (at least that's what I'm gathering from my reading), but I'm only getting positive outputs for the lfcSE value. Our collaborator is insisting that the sign (positive or negative) of the lfcSE should be the same as the log2Fold change, yet my outputs ALWAYS show all lfcSE values as positive for every gene no matter how or on what I run the analysis. Am I misunderstanding something about these values or have I messed something up with respect to the way I'm coding this or outputting the data? Notably, I get a mixture of positive and negative values in my MA plots--so I think that the lfcSE is not reflecting what's in the MA plots.
I think that I may be wrong about what exactly the lfcSE is, because when I export the results to excel before performing the shrinkage calculation, I still get log2FC values and LFCSE values populated in those columns, which means that doing the shrinkage calculation is not the function that directly generates lfcSE. If it were, that column would be empty in the excel file if I exported before performing the calculation. The 2014 paper does not explicitly have the words lfcSE, so it is difficult for me to understand exactly where this term is explained. I read some of the other deseq2 help posts related to lfcSE and the deseq2 paper, but am still confused.
Thanks!
counts_ls_age <- list()
for(i in 1:length(age_names)) {
column_idx <- which(tstrsplit(colnames(aggr_counts), "_")[[2]] == age_names[i])
## Store corresponding sub-matrix as one element of a list
counts_ls_age[[i]] <- aggr_counts[, column_idx] #aggregating all the counts for a group.
names(counts_ls_age)[i] <- age_names[i]
}
counts_ls_age ###I'm pasting a subset from just one timepoint
$`120`
25432 x 6 sparse Matrix of class "dgCMatrix"
120-1-CTL-library-GRO2404A3_120_ctrl 120-1-DMUT-library-GRO2404A4_120_dmut 120-2CTL-LIBRARY-GRO2455A7_120_ctrl
ENSDARG00000009657 956 1494 1859
ENSDARG00000096156 1 2 7
ENSDARG00000076160 . 2 .
ENSDARG00000117163 72 74 92
ENSDARG00000096187 . 3 1
ENSDARG00000076014 484 618 730
ENSDARG00000098876 19 29 38
ENSDARG00000031930 2 2 2
ENSDARG00000103471 10 13 9
ENSDARG00000093665 4 2 4
ENSDARG00000098144 . . .
ENSDARG00000093658 222 448 497
metadata_ls_age_2 <- list()
for(i in 1:length(counts_ls_age)){
## Initiate a data frame for group i with one row per sample (matching column names in the counts matrix)
df <- data.frame(group_sample_id = colnames(counts_ls_age[[i]]))
## Use tstrsplit() to separate group and sample IDs, and add factor variables.
df$sample_name <- tstrsplit(df$group_sample_id, "_")[[1]]
df$group <- tstrsplit(df$group_sample_id, "_")[[3]]
df$age <- tstrsplit(df$group_sample_id, "_")[[2]]
rownames(df) <- df$group_sample_id
## Store complete metadata for group i in list
metadata_ls_age_2[[i]] <- df
names(metadata_ls_age_2)[i] <- unique(df$age)
str(metadata_ls_age_2)
}
#append in sample_number and ind.n data for each timepoint. 12-22-23. Specify these as factors.
metadata_ls_age_2$`120`$sample_number <- factor(c(1,1,2,2,3,3))
metadata_ls_age_2$`120`$ind.n <- factor(c(1,1,2,2,3,3))
metadata_ls_age_2$`120`$ind.s <- factor(c(1,2,3,4,5,6))
metadata_ls_age_2$`72`$sample_number <- factor(c(4,4,5,5))
metadata_ls_age_2$`72`$ind.n <- factor(c(1,1,2,2))
metadata_ls_age_2$`72`$ind.s <- factor(c(7,8,9,10))
metadata_ls_age_2$`48`$sample_number <- factor(c(6,6,7,7))
metadata_ls_age_2$`48`$ind.n <- factor(c(1,1,2,2))
metadata_ls_age_2$`48`$ind.s <- factor(c(11,12,13,14))
metadata_ls_age_2$`36`$sample_number <- factor(c(8,8,9,9))
metadata_ls_age_2$`36`$ind.n <- factor(c(1,1,2,2))
metadata_ls_age_2$`36`$ind.s <- factor(c(15,16,17,18))
metadata_ls_age_2
$`36`
group_sample_id sample_name group age sample_number ind.n ind.s
36-1CTL-LIBRARY-GRO2455A5_36_ctrl 36-1CTL-LIBRARY-GRO2455A5_36_ctrl 36-1CTL-LIBRARY-GRO2455A5 ctrl 36 8 1 15
36-1DMUT-LIBRARY-GRO2455A6_36_dmut 36-1DMUT-LIBRARY-GRO2455A6_36_dmut 36-1DMUT-LIBRARY-GRO2455A6 dmut 36 8 1 16
36-2-CTL-Library-GRO2554A1_36_ctrl 36-2-CTL-Library-GRO2554A1_36_ctrl 36-2-CTL-Library-GRO2554A1 ctrl 36 9 2 17
36-2-DMUT-Library-GRO2554A2_36_dmut 36-2-DMUT-Library-GRO2554A2_36_dmut 36-2-DMUT-Library-GRO2554A2 dmut 36 9 2 18
$`48`
group_sample_id sample_name group age sample_number ind.n ind.s
C48-1-GRO1775A15_48_ctrl C48-1-GRO1775A15_48_ctrl C48-1-GRO1775A15 ctrl 48 6 1 11
D48-1-GRO1775A16_48_dmut D48-1-GRO1775A16_48_dmut D48-1-GRO1775A16 dmut 48 6 1 12
C48-2-GRO1775A17_48_ctrl C48-2-GRO1775A17_48_ctrl C48-2-GRO1775A17 ctrl 48 7 2 13
D48-2-GRO1775A18_48_dmut D48-2-GRO1775A18_48_dmut D48-2-GRO1775A18 dmut 48 7 2 14
$`72`
group_sample_id sample_name group age sample_number ind.n ind.s
C72-1-GRO1775A19_72_ctrl C72-1-GRO1775A19_72_ctrl C72-1-GRO1775A19 ctrl 72 4 1 7
D72-1-GRO1775A20_72_dmut D72-1-GRO1775A20_72_dmut D72-1-GRO1775A20 dmut 72 4 1 8
72-2-CTL-Library-GRO2554A3_72_ctrl 72-2-CTL-Library-GRO2554A3_72_ctrl 72-2-CTL-Library-GRO2554A3 ctrl 72 5 2 9
72-2-DMUT-Library-GRO2554A4_72_dmut 72-2-DMUT-Library-GRO2554A4_72_dmut 72-2-DMUT-Library-GRO2554A4 dmut 72 5 2 10
$`120`
group_sample_id sample_name group age sample_number ind.n
120-1-CTL-library-GRO2404A3_120_ctrl 120-1-CTL-library-GRO2404A3_120_ctrl 120-1-CTL-library-GRO2404A3 ctrl 120 1 1
120-1-DMUT-library-GRO2404A4_120_dmut 120-1-DMUT-library-GRO2404A4_120_dmut 120-1-DMUT-library-GRO2404A4 dmut 120 1 1
120-2CTL-LIBRARY-GRO2455A7_120_ctrl 120-2CTL-LIBRARY-GRO2455A7_120_ctrl 120-2CTL-LIBRARY-GRO2455A7 ctrl 120 2 2
120-2DMUT-LIBRARY-GRO2455A8_120_dmut 120-2DMUT-LIBRARY-GRO2455A8_120_dmut 120-2DMUT-LIBRARY-GRO2455A8 dmut 120 2 2
120-3-CTL-Library-GRO2554A5_120_ctrl 120-3-CTL-Library-GRO2554A5_120_ctrl 120-3-CTL-Library-GRO2554A5 ctrl 120 3 3
120-3-DMUT-Library-GRO2554A6_120_dmut 120-3-DMUT-Library-GRO2554A6_120_dmut 120-3-DMUT-Library-GRO2554A6 dmut 120 3 3
ind.s
120-1-CTL-library-GRO2404A3_120_ctrl 1
120-1-DMUT-library-GRO2404A4_120_dmut 2
120-2CTL-LIBRARY-GRO2455A7_120_ctrl 3
120-2DMUT-LIBRARY-GRO2455A8_120_dmut 4
120-3-CTL-Library-GRO2554A5_120_ctrl 5
120-3-DMUT-Library-GRO2554A6_120_dmut 6
For each "age" (36,48,72,120), I set i-age and ran this whole script (ie separate deseq2 experiments for samples of each age).
idx <- which(names(counts_ls_age) == i)
age_counts <- counts_ls_age[[idx]]
age_metadata <- metadata_ls_age_2[[idx]]
#check that group_count and group_metadata capture information related to the same group. Make sure that columns of count matrix match the rows of the metadata. # Check contents of extracted objects
age_counts[1:6, 1:6]
head(age_metadata)
# Double-check that both lists have same names
all(colnames(age_counts)==rownames(age_metadata))
#[1] TRUE
# Create DESeq2 object. We want to measure the effect of group at each specific age. They did cell type and then designed by group. So we'll do age and then group. To look at sibCTL vs dmut at each age.
dds <- DESeqDataSetFromMatrix(age_counts,
colData = age_metadata,
design = ~ind.n + group)
set <- toString(paste0("dds",i))
assign(set,dds)
object<-.Last.value
# Transform counts for data visualization
rld <- rlog(object, blind=TRUE)
# Plot PCA
g <- DESeq2::plotPCA(rld, ntop = 500, intgroup = "sample_name")
save_file = paste0(sp,i,"_samplePCA.png")
ggsave(save_file,plot=g)
# Extract the rlog matrix from the object and compute pairwise correlation values
rld_mat <- assay(rld)
rld_cor <- cor(rld_mat)
# Plot heatmap
h <- pheatmap(rld_cor)#gives relationships between different samples.
save_file = paste0(sp,i,"_sample_relationship_heatmap.png")
ggsave(save_file, plot=h)
# Run DESeq2 differential expression analysis
dds <- DESeq(object)
# Check the coefficients for the comparison
resultsNames(dds)
# Generate results object
res <- results(dds,
name = "group_dmut_vs_ctrl",
alpha = 0.05)
#MA plot for data without shrinkage.... save manually
j <- DESeq2::plotMA(res, ylim=c(-10,10))
# Shrink the log2 fold changes to be more appropriate using the apeglm method - should cite [paper]() when using this method
res <- lfcShrink(dds,
coef = "group_dmut_vs_ctrl",
res=res,
type = "apeglm")
#MA plots for data with shrinkage.... save manually
k <- DESeq2::plotMA(res, ylim=c(-10,10))
# Turn the DESeq2 results object into a tibble for use with tidyverse functions
res_tbl <- res %>%
data.frame() %>%
rownames_to_column(var = "gene") %>%
as_tibble() %>%
dplyr::filter(padj<=0.05)
write_xlsx(res_tbl, path=paste0('path/TEST2_ind.n+group_lessstringent_Ensembl_res_tbl_',i,'.xlsx'))
### Resulting table is the typical deseq2 output, and the lowest value for lfcSE is 0.135
# include your problematic code here with any corresponding output
# please also include the results of running the following in an R session
sessionInfo( )
Thank you very much.
If this answers your question, it helps others to see that the question is answered by clicking the "checkmark" next to the answer.