Basically I want to run DEseq to analyze TCGA paired data. say in certain cancer type, there are 107 patients, 214 samples(paired). I used following code(pretty much default). By setting like this, I expect DEseq2 giving me a result using the pairing information as well as 107 patient as biological replicates, is this appropriate? I feel not confident because I use 107 different values for the factor-- patient.
library('DESeq2')
directory<-"tBRCA"
sampleFiles <-c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56","57","58","59","60","61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86","87","88","89","90","91","92","93","94","95","96","97","98","99","100","101","102","103","104","105","106","107" ....to "214")
#Above I use "1""2" to "214" as file name for 214 samples
samplePatient=rep(c("1","2","3","4","5","6","7","8","9","10","11","12","13","14","15","16","17","18","19","20","21","22","23","24","25","26","27","28","29","30","31","32","33","34","35","36","37","38","39","40","41","42","43","44","45","46","47","48","49","50","51","52","53","54","55","56","57","58","59","60","61","62","63","64","65","66","67","68","69","70","71","72","73","74","75","76","77","78","79","80","81","82","83","84","85","86","87","88","89","90","91","92","93","94","95","96","97","98","99","100","101","102","103","104","105","106","107"),2)
#Above I use "1" "2"... to "107" as patient name for 107 patients
sampleCondition<-rep(c("untreated","treated"),each=107)
sampleTable<-data.frame(sampleName=sampleFiles, fileName=sampleFiles, condition=sampleCondition,patient=samplePatient)
ddsHTSeq<-DESeqDataSetFromHTSeqCount(sampleTable=sampleTable, directory=directory,design=~patient+condition)
colData(ddsHTSeq)$condition<-factor(colData(ddsHTSeq)$condition, levels=c("untreated","treated"))
dds<-DESeq(ddsHTSeq)
res<-results(dds)
res<-res[order(res$padj),]
write.table(res, file = 'myfile.txt',sep='\t')
plotMA(dds,ylim=c(-2,2),main="DESeq2")
Just a tip to make your life easier. You could declare your
sampleFiles
andsamplePatient
variables like so:Many thanks for your tip!!!!