Hi. I'm very new to coding, but am trying to run the DESeq2 package for an expression analysis and am having trouble that I think should be relatively easily dealt with.
The design is as such: We have two types of tissue - Graves' (G) and normal (N). 10 Graves' samples, 9 normal. For each tissue sample, there are two treatments - ATRA or DMSO.
I would like to see the ATRAvsDMSO comparison for Graves tissue alone, and for normal tissue alone. The way I currently have it set up, my only options are for tissueGvsN, txATRAvsDMSO, or tissueG.txATRA. The way I understand it, this is comparing all the tissue types, regardless of treatment, all treatment regardless of tissue type, or a comparison within a comparison of the two factors.
I'm hoping someone can help me compare treatments within each tissue type separately. One thing I want to keep in mind is pairing - is that considered in the script? the treatments are paired, but the tissue samples are not.
Thanks in advance for any help
OTSCountTable <- read.delim("counts_raw.txt", header = TRUE ,row.names = 1, sep="\t")
head(OTSCountTable)
#Classify by txs
tx = factor( c( "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA"))
tissue = factor( c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "N", "N", "N", "N", "G", "G", "G", "G", "N", "N", "N", "N", "G", "G", "G", "G"))
#Normalization
library("DESeq")
library("DESeq2")
normalized = newCountDataSet(OTSCountTable, tx)
head(normalized)
normalized = estimateSizeFactors(normalized)
sizeFactors(normalized)
head(counts(normalized, normalized=TRUE))
normalized = estimateDispersions(normalized)
Normalized_coldata= data.frame(
row.names = colnames(normalized),
tissue = c("N", "N", "N", "N", "N", "N", "N", "N", "N", "N", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "G", "N", "N", "N", "N", "G", "G", "G", "G", "N", "N", "N", "N", "G", "G", "G", "G"),
tx = c("DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA", "DMSO", "ATRA")
)
Normalized_coldata
write.csv(normalized, file = "Normalized_Counts")
#set DMSO to untreated, reference group
library("magrittr")
Normalized_coldata$tx <- relevel(Normalized_coldata$tx, "DMSO")
Normalized_coldata$tx
#set N to reference group
Normalized_coldata$tissue <- relevel(Normalized_coldata$tissue, "N")
Normalized_coldata$tissue
#so now, OTSCountTable is a table with the fragment counts and Normalized_coldata is a table with information about the samples
ddsMat = DESeqDataSetFromMatrix(countData = OTSCountTable,
colData = Normalized_coldata,
design = ~tissue + tx + tissue:tx)
design(ddsMat) <- ~ tissue + tx + tissue:tx
dds = DESeq(ddsMat)
# find things to compare
resultsNames(dds)
[1] "Intercept" "tissue_G_vs_N" "tx_ATRA_vs_DMSO" "tissueG.txATRA"