I need to calculate the survival rate of TCGA databases
0
0
Entering edit mode
tirordep • 0
@a2675c82
Last seen 8 months ago
Brazil

I'm doing a study of two types of cancer using TCGA database. I have already used some scripts in R to find the upregulated and downregulated genes of each cancer and the both intersection.

Now I need to calculate de survival rate, and I'll use SPSS software. Then I need to do a table with the individual code, normalized dosage of genes, survival time and death.

But I don't know how I'll obtain this table.

Somebody could help me, please?

TCGAbiolinks Survival • 574 views
ADD COMMENT
0
Entering edit mode

Welcome to the site. This might not be the best place to get SPSS help, as the site is dedicated to support Bioconductor R pacakges. In any case it would help if you provide what have you tried: code attempted, pages visited that helped you, ...

ADD REPLY
0
Entering edit mode

Hello, I want to use R packages to create the table that I will use on SPSS. I found this code on Youtube, but there are some things that I didn't understand.

library(DESeq2)
library(TCGAbiolinks)
library(survminer)
library(survival)
library(SummarizedExperiment)
library(tidyverse)


clinical_HNSC <- GDCquery_clinic("TCGA-HNSC")
any(colnames(clinical_HNSC) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
which(colnames(clinical_HNSC) %in% c("vital_status", "days_to_last_follow_up", "days_to_death"))
clinical_HNSC[,c(9,43,48)]

table (clinical_HNSC$vital_status)

clinical_HNSC$deceased <- ifelse(clinical_HNSC$vital_status =="Alive",FALSE,TRUE)

clinical_HNSC$overall_survival <- ifelse(clinical_HNSC$vital_status == "Alive",
                                         clinical_HNSC$days_to_last_follow_up,
                                         clinical_HNSC$days_to_death)
write.csv(clinical_HNSC, "tabela.csv", row.names = FALSE)

query_HNSC_all <- GDCquery(
                      project = "TCGA-HNSC", 
                      data.category = "Transcriptome Profiling",
                      experimental.strategy = "RNA-Seq",
                      workflow.type = "STAR - Counts",
                      data.type = "Gene Expression Quantification",
                      sample.type = "Primary Tumor",
                      access="open"
                      )

output_HNSC <- getResults(query_HNSC_all)
tumor <- output_HNSC$cases

query_HNSC_all <- GDCquery(
  project = "TCGA-HNSC", 
  data.category = "Transcriptome Profiling",
  experimental.strategy = "RNA-Seq",
  workflow.type = "STAR - Counts",
  data.type = "Gene Expression Quantification",
  sample.type = "Primary Tumor",
  access="open",
  barcode=tumor
)

GDCdownload (query_HNSC_all)
tcga_hnsc_data <- GDCprepare(query_HNSC_all,summarizedExperiment = TRUE)
hnsc_matrix <- assay(tcga_hnsc_data, "unstranded")
hnsc_matrix[1:10,1:10]

gene_metadata <- as.data.frame(rowData(tcga_hnsc_data))
coldata <- as.data.frame (colData(tcga_hnsc_data))

dds <- DESeqDataSetFromMatrix(countData=hnsc_matrix, colData = coldata, design = ~1)

keep <- rowSums(counts(dds)) >=10
dds <- dds[keep,]

vsd <- vst(dds,blind=FALSE)
hnsc_matrix_vst <- assay (vsd)

hnsc_tp53 <- hnsc_matrix_vst %>% 
  as.data.frame() %>% 
  rownames_to_column(var = 'gene_id') %>% 
  gather(key = 'case_id', value = 'counts', -gene_id) %>% 
  left_join(., gene_metadata, by = "gene_id") %>% 
  filter(gene_name == "TP53")

median_value <- median(hnsc_tp53$counts)

hnsc_tp53$strata <- ifelse(hnsc_tp53$counts >= median_value, "HIGH", "LOW")

hnsc_tp53$case_id <- gsub('-01.*', '', hnsc_tp53$case_id)
hnsc_tp53 <- merge(hnsc_tp53, clinical_HNSC, by.x = 'case_id', by.y = 'submitter_id')


fit <- survfit(Surv(overall_survival, deceased) ~ strata, data = hnsc_tp53)
fit
ggsurvplot(fit,
           data = hnsc_tp53,
           pval = T,
           risk.table = T)


fit2 <- survdiff(Surv(overall_survival, deceased) ~ strata, data = hnsc_tp53)
ADD REPLY
0
Entering edit mode

Edit the question to add the relevant code and specify what do you not understand. Good luck!

ADD REPLY
0
Entering edit mode

I have already used this code to find what genes I will focus in

library(TCGAbiolinks)
library(SummarizedExperiment)
library(plyr)
library(dplyr)
library(biomaRt)
library(DT)
library(limma)
library(biomaRt)


listSamples <- c("TCGA-2W-A8YY", "TCGA-BI-A0VR", "TCGA-BI-A0VS", "TCGA-C5-A1BE", "TCGA-C5-A1BF", "TCGA-C5-A1BI", "TCGA-C5-A1BJ", "TCGA-C5-A1BL", "TCGA-C5-A1BM", "TCGA-C5-A1BN", "TCGA-C5-A1BQ", "TCGA-C5-A1M7", "TCGA-C5-A1ME", "TCGA-C5-A1MF", "TCGA-C5-A1MH", "TCGA-C5-A1MI", "TCGA-C5-A1ML", "TCGA-C5-A1MN", "TCGA-C5-A2LS", "TCGA-C5-A2LT", "TCGA-C5-A2LV", "TCGA-C5-A2LX", "TCGA-C5-A2LZ", "TCGA-C5-A2M2", "TCGA-C5-A3HD", "TCGA-C5-A3HF", "TCGA-C5-A7CJ", "TCGA-C5-A7CK", "TCGA-C5-A7CL", "TCGA-C5-A7CM", "TCGA-C5-A7CO", "TCGA-C5-A7UE", "TCGA-C5-A7UI", "TCGA-C5-A7X8", "TCGA-C5-A7XC", "TCGA-C5-A8XH", "TCGA-C5-A8XI", "TCGA-C5-A8YQ", "TCGA-C5-A8YT", "TCGA-C5-A8ZZ", "TCGA-C5-A901", "TCGA-C5-A902", "TCGA-C5-A907", "TCGA-DG-A2KH", "TCGA-DG-A2KK", "TCGA-DG-A2KL", "TCGA-DR-A0ZM", "TCGA-DS-A0VK", "TCGA-DS-A1O9", "TCGA-DS-A1OC", 
                 "TCGA-DS-A1OD", "TCGA-DS-A3LQ", "TCGA-DS-A5RQ", "TCGA-DS-A7WF", "TCGA-EA-A556", "TCGA-EK-A2RA", "TCGA-EK-A2RB", "TCGA-EX-A1H5", "TCGA-EX-A69M", "TCGA-FU-A2QG", "TCGA-FU-A3EO", "TCGA-FU-A3HY", "TCGA-FU-A3NI", "TCGA-FU-A3TQ", "TCGA-FU-A3TX", "TCGA-FU-A3WB", "TCGA-FU-A3YQ", "TCGA-FU-A5XV", "TCGA-FU-A770", "TCGA-GH-A9DA", "TCGA-HG-A2PA", "TCGA-HM-A3JK", "TCGA-HM-A4S6", "TCGA-HM-A6W2", "TCGA-HM-A6W2", "TCGA-IR-A3L7", "TCGA-IR-A3LB", "TCGA-IR-A3LH", "TCGA-JW-A5VI", "TCGA-JW-A5VK", "TCGA-JW-AAVH", "TCGA-JX-A3Q0", "TCGA-MA-AA3W", "TCGA-MA-AA3X", "TCGA-MA-AA41", "TCGA-MA-AA43", "TCGA-MU-A5YI", "TCGA-MU-A8JM", "TCGA-MY-A913", "TCGA-Q1-A6DW", "TCGA-Q1-A73Q", "TCGA-Q1-A73S", "TCGA-RA-A741", "TCGA-UC-A7PD", "TCGA-UC-A7PF", "TCGA-UC-A7PG", "TCGA-UC-A7PG", "TCGA-VS-A8EB", "TCGA-VS-A8EH", "TCGA-VS-A8QC", "TCGA-VS-A8QF", "TCGA-VS-A8QM", "TCGA-VS-A94Y", "TCGA-VS-A94Z", "TCGA-VS-A9UP", "TCGA-VS-A9UU", "TCGA-VS-A9V0", "TCGA-VS-A9V1", "TCGA-VS-A9V4", "TCGA-WL-A834", "TCGA-XS-A8TJ", "TCGA-ZJ-A8QQ", "TCGA-ZJ-A8QR", "TCGA-ZJ-AAX4", "TCGA-ZJ-AAXA", "TCGA-ZJ-AAXN", "TCGA-ZJ-AAXT", "TCGA-ZJ-AAXU", "TCGA-ZJ-AB0I", "TCGA-4J-AA1J", "TCGA-C5-A0TN", "TCGA-C5-A1BK", "TCGA-C5-A1M5", "TCGA-C5-A1M6", "TCGA-C5-A1M8", "TCGA-C5-A1M9", "TCGA-C5-A1MJ", "TCGA-C5-A1MK", "TCGA-C5-A1MP", "TCGA-C5-A1MQ", "TCGA-C5-A2LY", "TCGA-C5-A2M1", "TCGA-C5-A3HE", "TCGA-C5-A3HL", "TCGA-C5-A7CG", "TCGA-C5-A7CH", "TCGA-C5-A7UH", "TCGA-C5-A7X3", "TCGA-C5-A8XJ", "TCGA-C5-A8YR", "TCGA-DG-A2KM", "TCGA-DR-A0ZL", "TCGA-DS-A0VL", "TCGA-DS-A0VM", "TCGA-DS-A0VN", "TCGA-DS-A1OA", "TCGA-DS-A7WH", "TCGA-DS-A7WI", "TCGA-EA-A1QS", "TCGA-EA-A1QT", "TCGA-EA-A3HQ", "TCGA-EA-A3HR", "TCGA-EA-A3HS", "TCGA-EA-A3HT", "TCGA-EA-A3HU", "TCGA-EA-A3QD", "TCGA-EA-A3QE", "TCGA-EA-A3Y4", "TCGA-EA-A410", "TCGA-EA-A411", "TCGA-EA-A439", "TCGA-EA-A43B", "TCGA-EA-A44S", "TCGA-EA-A4BA", "TCGA-EA-A50E", "TCGA-EA-A5FO", "TCGA-EA-A5O9", "TCGA-EA-A5ZD", "TCGA-EA-A5ZE", "TCGA-EA-A5ZF", "TCGA-EA-A6QX", "TCGA-EA-A78R", "TCGA-EA-A97N", "TCGA-EK-A3GM", "TCGA-EX-A1H6", "TCGA-EX-A3L1", "TCGA-EX-A69L", "TCGA-EX-A8YF", "TCGA-FU-A23K", "TCGA-FU-A23L", "TCGA-FU-A3HZ", "TCGA-FU-A40J", "TCGA-FU-A57G", "TCGA-HG-A9SC", "TCGA-IR-A3LA", "TCGA-IR-A3LC", "TCGA-IR-A3LF", "TCGA-IR-A3LI", "TCGA-IR-A3LK", "TCGA-IR-A3LL", "TCGA-JW-A5VG", "TCGA-JW-A5VH", "TCGA-JW-A5VJ", "TCGA-JW-A5VL", "TCGA-JW-A69B", "TCGA-JW-A852", "TCGA-JX-A3PZ", "TCGA-JX-A3Q8", "TCGA-JX-A5QV", "TCGA-LP-A4AU", "TCGA-LP-A4AV", "TCGA-LP-A4AW", "TCGA-LP-A4AX", "TCGA-LP-A5U2", "TCGA-LP-A5U3", "TCGA-LP-A7HU", "TCGA-MA-AA3Y", "TCGA-MA-AA3Z", "TCGA-MA-AA42", "TCGA-MU-A51Y", "TCGA-MY-A5BD", "TCGA-MY-A5BE", "TCGA-MY-A5BF", "TCGA-Q1-A5R1", "TCGA-Q1-A5R2", "TCGA-Q1-A5R3", "TCGA-Q1-A6DT", "TCGA-Q1-A6DV", "TCGA-Q1-A73O", "TCGA-Q1-A73P", "TCGA-Q1-A73R", "TCGA-R2-A69V", "TCGA-UC-A7PI", "TCGA-VS-A8EG", "TCGA-VS-A8EI", "TCGA-VS-A8EJ", "TCGA-VS-A8EL", "TCGA-VS-A8Q8", "TCGA-VS-A8Q9", "TCGA-VS-A8QA", "TCGA-VS-A8QH", "TCGA-VS-A94W", "TCGA-VS-A94X", "TCGA-VS-A950", "TCGA-VS-A952", "TCGA-VS-A953", "TCGA-VS-A954", "TCGA-VS-A957", "TCGA-VS-A958", "TCGA-VS-A959", "TCGA-VS-A9U5", "TCGA-VS-A9U6", "TCGA-VS-A9U7", "TCGA-VS-A9UA", "TCGA-VS-A9UB", "TCGA-VS-A9UC", "TCGA-VS-A9UD", "TCGA-VS-A9UH", "TCGA-VS-A9UI", "TCGA-VS-A9UJ", "TCGA-VS-A9UL", "TCGA-VS-A9UM", "TCGA-VS-A9UO", "TCGA-VS-A9UQ", "TCGA-VS-A9UR", "TCGA-VS-A9UT", "TCGA-VS-A9UV", "TCGA-VS-A9UY", "TCGA-VS-A9UZ", "TCGA-VS-A9V2", "TCGA-VS-A9V3", "TCGA-VS-A9V5", "TCGA-VS-AA62", "TCGA-ZJ-AAXB", "TCGA-ZX-AA5X")

query.exp <- GDCquery(project = "TCGA-CESC", 
                      legacy = TRUE,
                      data.category = "Gene expression",
                      data.type = "Gene expression quantification",
                      platform = "Illumina HiSeq", 
                      file.type = "results",
                      barcode = listSamples,
                      experimental.strategy = "RNA-Seq",
                      sample.type = c("Primary Tumor","Solid Tissue Normal"))
GDCdownload(query.exp)

CESChabits.exp <- GDCprepare(query = query.exp, save = TRUE,
                             save.filename = "CESC_habitsExp.rda")
dataSubt <- TCGAquery_subtype(tumor = "CESC")
# get clinical data 
dataClin <- GDCquery_clinic(project = "TCGA-CESC","clinical") 
# Which samples are Primary Tumor
dataSmTP <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"TP") 
# which samples are solid tissue normal
dataSmNT <- TCGAquery_SampleTypes(getResults(query.exp,cols="cases"),"NT")
dataPrep <- TCGAanalyze_Preprocessing(object = CESChabits.exp, cor.cut = 0.6)                      
dataNorm <- TCGAanalyze_Normalization(tabDF = dataPrep, geneInfo = geneInfo, method = "gcContent") 
dataFilt <- TCGAanalyze_Filtering(tabDF = dataNorm,method = "quantile", qnt.cut =  0.25)   
dataDEGs <- TCGAanalyze_DEA(mat1 = dataFilt[,dataSmNT],mat2 = dataFilt[,dataSmTP], Cond1type = "Normal", Cond2type = "Tumor", fdr.cut = 0.05 ,logFC.cut = 1,  method = "glmLRT")  
write.table(dataDEGs, "habits_CESC_selected.txt", sep="\t")
TCGAVisualize_volcano(x = dataDEGs$logFC,
                      y = dataDEGs$FDR,
                      filename = "CESC_habits_selected_volcanoexp.png",
                      x.cut = 6,
                      y.cut = 5*10^-5,
                      names = rownames(dataDEGs),
                      color = c("black","red","darkgreen"),
                      names.size = 2,
                      xlab = " Gene expression fold change (Log2)",
                      legend = "State",
                      title = "Volcano plot (CIMP-high vs CIMP-low)",
                      width = 10)
ADD REPLY

Login before adding your answer.

Traffic: 577 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6