I have conducted DESeq2 analyses in my MacBookPro (M1 processor) and in a Rocky Linux HPC and the results obtained, despite both systems using slightly different R and DESeq2 versions, are surprisingly VERY different. Here after number of differentially expressed (padj < 0.05) obtained with the exact same script and datasets in both systems.
Computer: MacBookPro M1 processor R version: 4.2.2 DESeq2 version: 1.38.3
719 kidney_ctrl_vs_DMV_11dpi_q0.05.tsv 421 kidney_ctrl_vs_DMV_4dpi_q0.05.tsv 393 kidney_ctrl_vs_Mass_11dpi_q0.05.tsv 596 kidney_ctrl_vs_Mass_4dpi_q0.05.tsv 1888 ovary_ctrl_vs_DMV_11dpi_q0.05.tsv 926 ovary_ctrl_vs_DMV_4dpi_q0.05.tsv 444 ovary_ctrl_vs_Mass_11dpi_q0.05.tsv 309 ovary_ctrl_vs_Mass_4dpi_q0.05.tsv 2606 oviduct_ctrl_vs_DMV_11dpi_q0.05.tsv 3943 oviduct_ctrl_vs_DMV_4dpi_q0.05.tsv 1865 oviduct_ctrl_vs_Mass_11dpi_q0.05.tsv 2066 oviduct_ctrl_vs_Mass_4dpi_q0.05.tsv
Rocky Linux HPC R version: 4.2.0 DESeq2 version: 1.36
737 kidney_ctrl_vs_DMV_11dpi_q0.05.tsv 483 kidney_ctrl_vs_DMV_4dpi_q0.05.tsv 451 kidney_ctrl_vs_Mass_11dpi_q0.05.tsv 681 kidney_ctrl_vs_Mass_4dpi_q0.05.tsv 1911 ovary_ctrl_vs_DMV_11dpi_q0.05.tsv 975 ovary_ctrl_vs_DMV_4dpi_q0.05.tsv 511 ovary_ctrl_vs_Mass_11dpi_q0.05.tsv 393 ovary_ctrl_vs_Mass_4dpi_q0.05.tsv 2613 oviduct_ctrl_vs_DMV_11dpi_q0.05.tsv 3964 oviduct_ctrl_vs_DMV_4dpi_q0.05.tsv 1899 oviduct_ctrl_vs_Mass_11dpi_q0.05.tsv 2111 oviduct_ctrl_vs_Mass_4dpi_q0.05.tsv
Here my script (sorry I cannot post the data because it does not belong to me)
library(DESeq2)
library(RColorBrewer)
library(ggplot2)
library(gplots)
library(pheatmap)
library(biomaRt)
library(EnhancedVolcano)
# If you wanna set specific size and resolution of plots,
# use this example:
# png(filename = "myplot.png", width = 800, height = 600, res = 300)
# 800x600 pixels at 300 dpi (dots per inch)
setwd('/Users/juanjovel/jj/data_analysis/mohammad_saleh/mRNA_by_tissue_virus_timePoint_230816/reTest')
# If you need a directory different from pwd, set it below
#setwd('/Users/juanjovel/jj/data_analysis/mohammad_saleh/')
data_file = 'Avian_mRNA_counts_10up.tsv'
metadata_file = 'Avian_mRNA_meta.tsv'
data = read.table(data_file, sep = '\t', header = T, row.names = 1)
metadata = read.table(metadata_file, sep = '\t', header = T, row.names = 1)
# Filter low abundance features
# Calculate the average expression for each gene across all samples
avgExpression <- rowMeans(data)
# Calculate the proportion of samples with non-zero expression for each gene
nonZeroProportion <- rowSums(data > 0) / ncol(data)
# Filter genes based on the criteria
selectedGenes <- avgExpression > 10 & nonZeroProportion >= 0.25
# Subset the data
filteredData <- data[selectedGenes, ]
data <- filteredData
metadata$infection_timePoint <- paste(metadata$infection, metadata$time_point, sep = "_")
head(metadata)
is_kidney <- metadata$tissue=="Kidney"
is_ovary <- metadata$tissue=="Ovary"
is_oviduct<- metadata$tissue=="Oviduct"
#create three sub data frames, one per tissue
kidney_df <- data[is_kidney]
ovary_df <- data[is_ovary]
oviduct_df <- data[is_oviduct]
#create three metadata tables corresponding to the three sub data frames
kidney_metadata <- subset(metadata, metadata$tissue =="Kidney")
ovary_metadata <- subset(metadata, metadata$tissue =="Ovary")
oviduct_metadata <- subset(metadata, metadata$tissue =="Oviduct")
# Function to create HC heatmap
kidney_data_matrix <- as.matrix(round(kidney_df, digits=0))
ovary_data_matrix <- as.matrix(round(ovary_df, digits=0))
oviduct_data_matrix <- as.matrix(round(oviduct_df, digits=0))
#Import data matrics into a DESeq2 object
dds_kidney<-DESeqDataSetFromMatrix(countData = kidney_data_matrix,
colData = kidney_metadata,
design = ~ infection_timePoint)
dds_ovary<-DESeqDataSetFromMatrix(countData = ovary_data_matrix,
colData = ovary_metadata,
design = ~ infection_timePoint)
dds_oviduct<-DESeqDataSetFromMatrix(countData = oviduct_data_matrix,
colData = oviduct_metadata,
design = ~ infection_timePoint)
list_dds <- list(dds_kidney, dds_ovary, dds_oviduct)
names(list_dds) <- c("dds_kidney", "dds_ovary", "dds_oviduct")
#calculate size factors
dds_kidney <- estimateSizeFactors(dds_kidney)
dds_ovary <- estimateSizeFactors(dds_ovary)
dds_oviduct <- estimateSizeFactors(dds_oviduct)
#normalize data by regularized logaritmic transformation
rld_kidney <- rlogTransformation(dds_kidney, fitType='local' )
rld_ovary <- rlogTransformation(dds_ovary, fitType='local' )
rld_oviduct <- rlogTransformation(dds_oviduct, fitType='local' )
#variance stabilizing transformation
#vsd <-vst(dds)
makeHCheatmap <- function(rld, metadata){
dist = dist(t(assay(rld)))
dist_matrix <- as.matrix(dist)
row.names(dist_matrix) <- metadata$group
# Retrieve the name of the dataframe
df_name <- deparse(substitute(metadata))
# Use gsub to create the new name
file_name <- gsub("_metadata$", "_HC_plot.png", df_name)
png(file_name)
pheatmap(dist_matrix,
color=colorRampPalette(brewer.pal(n=9,name = "RdYlBu"))(255),
clustering_distance_cols = dist, clustering_distance_rows = dist
)
dev.off()
}
makePCA <- function(rld, metadata){
df_name <- deparse(substitute(metadata))
prefix <- gsub("_metadata$", "", df_name)
# Use gsub to create the new name
file_name <- gsub("_metadata$", "_PCA_plot.png", df_name)
data <- plotPCA(rld, intgroup=c("infection"), returnData=T)
percentVar= round(100 *attr(data,"percentVar"))
PCA_EucDist = ggplot(data, aes(x=data$PC1, y=data$PC2, color = metadata$infection)) +
xlab(paste("PC1 :", percentVar[1], "% variance")) +
ylab(paste("PC2 :", percentVar[2], "% variance")) +
ggtitle(paste(prefix, "PCA", sep = ' '))
# Add the points (dots) layer, and define shape and size of dots
png(file_name)
print(PCA_EucDist + geom_point(shape = 20, size = 7) + theme_bw() +
geom_text(aes(label=rownames(metadata)), hjust=0.5, vjust=2, color="black"))
dev.off()
}
makeHCheatmap(rld_kidney, kidney_metadata)
makeHCheatmap(rld_ovary, ovary_metadata)
makeHCheatmap(rld_oviduct, oviduct_metadata)
makePCA(rld_kidney, kidney_metadata)
makePCA(rld_ovary, ovary_metadata)
makePCA(rld_oviduct, oviduct_metadata)
#
contrasts <- list(
ctrl_vs_DMV_4dpi = c(group = "infection_timePoint", "DMV_4dpi", "Cont_4dpi"),
ctrl_vs_DMV_11dpi = c(group = "infection_timePoint", "DMV_11dpi", "Cont_11dpi"),
ctrl_vs_Mass_4dpi = c(group = "infection_timePoint", "Mass_4dpi", "Cont_4dpi"),
ctrl_vs_Mass_11dpi = c(group = "infection_timePoint", "Mass_11dpi", "Cont_11dpi")
)
### biomaRt
library(biomaRt)
my_mart = useMart("ensembl", dataset = 'ggallus_gene_ensembl')
my_attributes = c("ensembl_transcript_id",
"ensembl_gene_id",
"entrezgene_id",
"external_gene_name",
"wikigene_description",
"name_1006",
"definition_1006",
"namespace_1003")
pullRecords <- function(attributes, mart, filter_values){
records <- getBM(attributes = attributes, filters = "ensembl_transcript_id",
values = filter_values, mart = mart)
return(records)
}
# Extract first hit only
getFirstMatch <- function(records, transcripts) {
first_annotation_df <- data.frame()
for (transcript in transcripts) {
hits <- which(records$ensembl_transcript_id == transcript)
if (length(hits) > 0) {
first <- hits[1]
first_annotation <- records[first,]
first_annotation_df <- rbind(first_annotation_df, first_annotation)
}
}
return(first_annotation_df)
}
renameColumns <- function(df){
colnames(df)[6] <- "GO_group"
colnames(df)[7] <- "GO_definition"
colnames(df)[8] <- "Ontology"
return(df)
}
makeVolcanoPlot <- function(df, vp_file){
keyvals <- ifelse(
df$log2FoldChange < -1, 'forestgreen',
ifelse(df$log2FoldChange > 1, 'firebrick1',
'dodgerblue1'))
keyvals[is.na(keyvals)] <- 'black'
names(keyvals)[keyvals == 'firebrick1'] <- 'High'
names(keyvals)[keyvals == 'dodgerblue1'] <- 'small FC'
names(keyvals)[keyvals == 'forestgreen'] <- 'Low'
evp <- EnhancedVolcano(df,
lab = df$external_gene_name,
x = 'log2FoldChange',
y = 'pvalue',
pCutoff = 0.01,
FCcutoff = 1,
colCustom = keyvals,
title = NULL,
subtitle = NULL,
colAlpha = 0.85,
shape = 20,
pointSize = 2,
labSize = 3)
png(vp_file)
print(evp)
dev.off()
}
exportTable <- function(df, filename){
write.table(df, filename, quote = F, sep='\t', row.names = F)
}
for (dds_name in names(list_dds)) {
tissue <- gsub("dds_", "", dds_name)
# Extract the object using its name from the list
object <- list_dds[[dds_name]]
object <- DESeq(object, fitType = 'local')
for (contrast_name in names(contrasts)) {
my_results <- paste(tissue, contrast_name, "res", sep = "_")
contrast_value <- contrasts[[contrast_name]] # Use double square brackets to access the inner list
my_result <- results(object, contrast = contrast_value)
resdata <- merge(as.data.frame(my_result), as.data.frame(counts(object, normalized=TRUE)), by="row.names", sort=FALSE)
names(resdata)[1] <- "transcript"
transcripts <- resdata$transcript
transcripts_clean <- gsub("\\.\\d+$", "", transcripts)
records <- pullRecords(my_attributes, my_mart, transcripts_clean)
annotation_df <- getFirstMatch(records, transcripts_clean)
annotation_df <- renameColumns(annotation_df)
results_annotated <- cbind(resdata, annotation_df)
outfile_norm_name <- paste(tissue, contrast_name, "allData.tsv", sep = "_")
exportTable(results_annotated, outfile_norm_name)
sign_results <- subset(results_annotated, padj < 0.05)
outfile_name <- paste(tissue, contrast_name, "q0.05.tsv", sep = "_")
exportTable(sign_results, outfile_name)
vp_file <- paste(tissue, contrast_name, "volcano_plot.png", sep = "_")
makeVolcanoPlot(sign_results_annotated, vp_file)
}
}
What you have provided is not particularly useful. That's like 200 lines of code using data that nobody else can use. Instead, as the FAQ states, you need to provide a reproducible example. I don't use a Mac, so can't say anything about that, but I do have a Windows box. Here is a very simple example.
Now on Linux
Now copy the linux file to Windows and check
Which indicates that the results on Windows and Linux vary less than
sqrt(.Machine$double.eps)
, which is 1.5e-8 on my Windows box.You have a much more complicated data set, and there may well be differences. But unless you can create a reproducible example using
makeExampleDESeqDataSet
to show the differences that other people can then corroborate, it's far more likely that you have made a coding mistake somewhere rather than there being an obvious problem withDESeq2
.