I find some funny stuff when I check out the DEG number by two ways in limma.
It's multiply time points study. TP1, TP2, TP3......
The first way is very conventional as:
# Create a design matrix
timecourse_lev <- c("Group1","Group2","Group3","Group4","Group5","Group6","Group7","Group8","Group9")
f_timecourse <- factor(design_info$Group)
timecourse_design <- model.matrix(~ 0 + f_timecourse)
colnames(timecourse_design) <- timecourse_lev
# Fit the linear model
timecourse_fit <- lmFit(normEset_cusCDF, timecourse_design)
# Define a contrast matrix
timecourse_contrast_matrix <- makeContrasts(TP1 = "Group2-Group1",TP2 = "Group3-Group1",
TP3 = "Group4-Group1",TP4 = "Group5-Group1",
TP5 = "Group6-Group1",TP6 = "Group7-Group1",
TP7 = "Group8-Group1",TP8 = "Group9-Group1",
levels = timecourse_design)
# Extract the linear model fit for the contrasts
timecourse_fit2 <- contrasts.fit(timecourse_fit ,timecourse_contrast_matrix)
# # eBay's estimation
timecourse_fit2eb <- eBayes(timecourse_fit2)
# list all genes in topTable
timecourse_DEG_TP1<-topTable(timecourse_fit2eb,adjust.method="BH",p.value=0.01,lfc=log2(1.5),number=Inf,coef=c("TP1"))
then TP2, TP3......
The second way is when I want to manually create a Volcano plot, I have to code as below:
for (i in 1:ncol(timecourse_fit2eb$coeff))
{
difference <- timecourse_fit2eb$coeff[,i]
pvalue <- timecourse_fit2eb$F.p.value
p_adj <- p.adjust(pvalue,method="BH")
lodd_adj <- -log10(p_adj)
# set threshold and seperate data into 3 parts by FC
nd_up <- (difference > log2(1.5))
nd_down <- (difference < -log2(1.5))
nd_null <- (difference <= log2(1.5) & difference >= -log2(1.5))
# Navigate position of each element
nd_up_site <- data.frame(which(nd_up =="TRUE"))
colnames(nd_up_site) <- c("Position")
nd_down_site <- data.frame(which(nd_down =="TRUE"))
colnames(nd_down_site) <- c("Position")
nd_null_site <- data.frame(which(nd_null =="TRUE"))
colnames(nd_null_site) <- c("Position")
# set p vaule to 2 paret by adj-p value 0.05
np_small <- p.adjust(pvalue,method="BH") < 0.01
np_large <- p.adjust(pvalue,method="BH") >= 0.01
# site of element in p value
np_small_site <- data.frame(which(np_small =="TRUE"))
colnames(np_small_site) <- c("Position")
np_large_site <- data.frame(which(np_large =="TRUE"))
colnames(np_large_site) <- c("Position")
other <- union(nd_null_site$Position,np_large_site$Position)
left <- intersect(nd_down_site$Position,np_small_site$Position)
right <- intersect(nd_up_site$Position,np_small_site$Position)
# Construct a volcano plot
plot(difference[other],lodd_adj[other],xlim=c(-5,5),ylim=range(lodd_adj),
pch=20,cex=0.5,xlab=c("log2(Fold Change)"),ylab=c("-log10(Adjust P-value)"),
main=paste("Differential Genes at",colnames(YFV_timecourse_fit2eb$coefficients)[i],"(FC = 1.5 and adjP = 0.01)"))
points(difference[left],lodd_adj[left],col="blue",pch=20)
points(difference[right],lodd_adj[right],col="red",pch=20)
abline(v=log2(1.5),lwd=1.5,col="darkgreen")
abline(v=-log2(1.5),lwd=1.5,col="darkgreen")
abline(h=-log10(0.01),lwd=1.5,col="darkgreen")
text(-3,-log10(0.01)+0.3,"Benjamini Hochberg Cut-Off")
text(-4,-log10(0.01)-0.3,"-log10(0.01)")
text(2,max(lodd_adj)-1,paste(length(right),"Up-regulation"),col="red")
text(-2,max(lodd_adj)-1,paste(length(left),"Down-regulation"),col="blue")
text(-1.5,-0.4,"-log2(1.5)")
text(1.5,-0.4,"log2(1.5)")
}
dev.off()
However, when I got the Volcano plot, I find the up- and down-regulated DEG number for each time point is not same as the number got from topTable(), slightly higher than topTable().
I wonder what had happened there, which result is correct?