Entering edit mode
Hi,
I found different DEGs for legacy and harmonized dataset. I would like to know if my scripts are correct:
####### harmonized dataset
queryDown <- GDCquery(project = CancerProject,data.category = "Transcriptome Profiling",
data.type = "Gene Expression Quantification",workflow.type = "HTSeq - Counts",
barcode = c(samplesTN$Sample.ID,samplesTP$V1))
GDCdownload(query = queryDown)
dataPrep <- GDCprepare(query = queryDown)
dataProcessing <- TCGAanalyze_Preprocessing(object = dataPrep, cor.cut = 0.6, datatype = "HTSeq - Counts")
dataNorm <- TCGAanalyze_Normalization(tabDF = dataProcessing,geneInfo = geneInfoHT,method = "geneLength")
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm, method = "quantile", qnt.cut = 0.25)
dataPrep_raw <- UseRaw_afterFilter(dataPrep, dataFilt)
datasmTP <- dataPrep$barcode[grep("TP",dataPrep_raw$shortLetterCode)]
datasmTN <- dataPrep$barcode[grep("NT",dataPrep_raw$shortLetterCode)]
datadownDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,datasmTN], mat2 = dataFilt[,datasmTP],Cond1type = "Normal", Cond2type = "Tumor", fdr.cut = 0.01,logFC.cut = 1, method = 'glmLRT')
####legacy dataset
query_modelo_1 <- GDCquery(project = "TCGA-BRCA", data.category = "Gene expression",
data.type = "Gene expression quantification",
experimental.strategy = "RNA-Seq",
#sample.type = c("Primary Tumor", "Solid Tissue Normal"),
platform = "Illumina HiSeq",
file.type = "results",barcode= c(listSamples_tumor_modelo_1$Sample.ID,listSamples_normal_modelo_1$V1),
legacy = TRUE)
GDCdownload(query_modelo_1)
BRCARnaseqSE_modelo_1 <- GDCprepare(query_modelo_1)
BRCARnaseq_CorOutliers_modelo_1 <- TCGAanalyze_Preprocessing(BRCARnaseqSE_modelo_1,cor.cut = 0.6)
dataNorm_modelo_1 <- TCGAanalyze_Normalization(tabDF = BRCARnaseq_CorOutliers_modelo_1,method = "geneLength", geneInfo = geneInfo)
dataFilt_modelo_1 <- TCGAanalyze_Filtering(tabDF = dataNorm_modelo_1,method = "quantile",qnt.cut = 0.25)
samplesNT_modelo_1 <-BRCARnaseqSE_modelo_1$barcode[grep("NT",BRCARnaseqSE_modelo_1$shortLetterCode)]
samplesTP_modelo_1 <-BRCARnaseqSE_modelo_1$barcode[grep("TP",BRCARnaseqSE_modelo_1$shortLetterCode)]
# Diff.expr.analysis (DEA)
dataDEGs_modelo_1 <- TCGAanalyze_DEA(mat1 = dataFilt_modelo_1[,samplesNT_modelo_1],mat2 = dataFilt_modelo_1[,samplesTP_modelo_1],
Cond1type = "Normal",Cond2type = "Tumor",fdr.cut = 0.01,logFC.cut = 1,method = "glmLRT")
dataDEGsFiltLevel <- TCGAanalyze_LevelTab(dataDEGs_modelo_1, "Tumor", "Normal",dataFilt_modelo_1[,samplesNT_modelo_1], dataFilt_modelo_1[,samplesTP_modelo_1])
sessionInfo( )
Thanks