DEGs for legacy and harmonized dataset
0
0
Entering edit mode
Gisele • 0
@df6db70c
Last seen 3.4 years ago
Brazil

Hi,

I found different DEGs for legacy and harmonized dataset. I would like to know if my scripts are correct:

#######  harmonized dataset

queryDown <- GDCquery(project = CancerProject,data.category = "Transcriptome Profiling",
                                         data.type = "Gene Expression Quantification",workflow.type = "HTSeq - Counts",
                                         barcode = c(samplesTN$Sample.ID,samplesTP$V1))
GDCdownload(query = queryDown)
dataPrep <- GDCprepare(query = queryDown)

dataProcessing <- TCGAanalyze_Preprocessing(object = dataPrep, cor.cut = 0.6, datatype = "HTSeq - Counts")  

dataNorm <- TCGAanalyze_Normalization(tabDF = dataProcessing,geneInfo = geneInfoHT,method = "geneLength") 

dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm, method = "quantile", qnt.cut = 0.25)

dataPrep_raw <- UseRaw_afterFilter(dataPrep, dataFilt)

datasmTP <-  dataPrep$barcode[grep("TP",dataPrep_raw$shortLetterCode)]

datasmTN <- dataPrep$barcode[grep("NT",dataPrep_raw$shortLetterCode)]

datadownDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,datasmTN], mat2 = dataFilt[,datasmTP],Cond1type = "Normal", Cond2type = "Tumor", fdr.cut = 0.01,logFC.cut = 1, method = 'glmLRT')


####legacy dataset

query_modelo_1 <- GDCquery(project = "TCGA-BRCA", data.category = "Gene expression",
                  data.type = "Gene expression quantification", 
                  experimental.strategy = "RNA-Seq",
                  #sample.type = c("Primary Tumor", "Solid Tissue Normal"),
                  platform = "Illumina HiSeq",
                  file.type = "results",barcode= c(listSamples_tumor_modelo_1$Sample.ID,listSamples_normal_modelo_1$V1),
                  legacy = TRUE)


GDCdownload(query_modelo_1)


BRCARnaseqSE_modelo_1 <- GDCprepare(query_modelo_1)


BRCARnaseq_CorOutliers_modelo_1 <- TCGAanalyze_Preprocessing(BRCARnaseqSE_modelo_1,cor.cut = 0.6)

dataNorm_modelo_1 <- TCGAanalyze_Normalization(tabDF = BRCARnaseq_CorOutliers_modelo_1,method = "geneLength", geneInfo =  geneInfo)

dataFilt_modelo_1 <- TCGAanalyze_Filtering(tabDF = dataNorm_modelo_1,method = "quantile",qnt.cut =  0.25)

samplesNT_modelo_1 <-BRCARnaseqSE_modelo_1$barcode[grep("NT",BRCARnaseqSE_modelo_1$shortLetterCode)]
samplesTP_modelo_1 <-BRCARnaseqSE_modelo_1$barcode[grep("TP",BRCARnaseqSE_modelo_1$shortLetterCode)]


# Diff.expr.analysis (DEA)
dataDEGs_modelo_1 <- TCGAanalyze_DEA(mat1 = dataFilt_modelo_1[,samplesNT_modelo_1],mat2 = dataFilt_modelo_1[,samplesTP_modelo_1],
                                      Cond1type = "Normal",Cond2type = "Tumor",fdr.cut = 0.01,logFC.cut = 1,method = "glmLRT")


dataDEGsFiltLevel <- TCGAanalyze_LevelTab(dataDEGs_modelo_1, "Tumor", "Normal",dataFilt_modelo_1[,samplesNT_modelo_1], dataFilt_modelo_1[,samplesTP_modelo_1])




sessionInfo( )

Thanks

#DEGs #legacydataset #harmonizeddataset TCGAbiolinks • 952 views
ADD COMMENT

Login before adding your answer.

Traffic: 365 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6