Hi, i did DESeq2 analysis and before i run the DESeq, i did RUVg because i have spike-ins (ERCC) for negative control but i had problem with the results(some ERCC appear in important genes). So i tried to do EstimateSizeFactor before doing the DESeq, but the important DEGs that occured were few and my GO results too. Should i run my code without the RUVg, take only the raw count data and the ERCC_control_genes in my EstimateSizeFactor?
#Construct DESEQDataSet Object -converting counts to integer mode
dds<- DESeqDataSetFromMatrix(countData = combined_matrix, colData = coldata_df,design = ~Groups)
#let's see what this object looks like
dds
#Pre-filtering
patients_number<-ncol(dds)
keep <- apply(counts(dds),1,function(x) length(x[x>0])>=26)
dds<- dds[keep,]
dds
#factor levels
levels(dds$Groups)
dds$Groups <- relevel(dds$Groups, ref = "EMM_NEG")
levels(dds$Groups)
# Identify gene and spike rows
genes <- rownames(dds)[grep("^ENS", rownames(dds))]
spikes <- rownames(dds)[grep("^ERCC", rownames(dds))
#Create and Normalize newSeqExpressionSet
## Extract the colData from dds
pheno_data <- as.data.frame(colData(dds))
# Create the newSeqExpressionSet object
set <- newSeqExpressionSet(counts(dds),phenoData = pheno_data)
set <- betweenLaneNormalization(set, which="upper")
# Apply RUVg normalization
set1 <- RUVg(set,spikes, k=1)
pData(set1)
#Construct DESEQDataSet Object with normalized counts -converting counts to integer mode
dds_new<- DESeqDataSetFromMatrix(countData = counts(set1), colData = pData(set1),design = ~W_1 +Groups)
#let's see what this object looks like
dds_new
ercc_genes <- grepl("^ERCC", rownames(dds_new))
#estimatesizefactor->estimates the size factors using the "median ratio method"
dds_normcounts <- estimateSizeFactors(dds_new,controlGenes=ercc_genes ); #i have control genes!!
#Extract Normalized Counts
dds_counts<-counts(dds_normcounts, normalized=TRUE)
#heatmap after estimatesizefactor and before deseq
pheatmap(dds_counts[spikes, ],
cluster_rows = TRUE,
cluster_cols = TRUE,
show_rownames = TRUE,
show_colnames = TRUE,
color = colors2,
main = "Heatmap of ERCC Genes after estimateSizefactors (before DESEQ)")
# Perform the differential expression analysis
#Genes differentially expressed between conditions
dds_normcounts <- DESeq(dds_normcounts)
# Extract results
res<-results(dds_normcounts)
res
#build a results table
res <- results(dds_normcounts, contrast=c("Groups","MA_POS","EMM_NEG"),alpha = 0.05,filterFun=ihw)
res
res1<-results(dds_normcounts, contrast=c("Groups","MA_NEG","EMM_NEG"),alpha = 0.05,filterFun=ihw)
res1
res2<-results(dds_normcounts, contrast=c("Groups","E_NEG","EMM_NEG"),alpha = 0.05,filterFun=ihw)
res2