My experiment details for RNAseq data acquisition was this. This is human PBMC samples for 98 participants and two days day0 and day7 (corresponding to pre and post vaccination respectively) - in total 196 samples. 54 out of 98 were high responders and 44 were low responders. So i have two biological variable visit(day0 and day7) and response status(high/low responder)
I need to analyze data for following comparisons:
1: day0 : high versus low reponders
2: high responders ; See the difference at day7 in comparison to day0
3: low responders ; See the difference at day7 in comparison to day0
my code looks something like this and i wnat to know if there is a better approach and i am on right track or what?
#1. day0 : high versus low reponders
# Create DESeqDataSet object
dds <- DESeqDataSetFromMatrix(countData = readCount, colData = metadata, design = ~ visit + response_status)
#filtering by expression counts
#dds <- dds[rowSums(counts(dds))>min(table(metadata$visit, metadata$response_status)),]
# Calculate CPM values
cpm <- cpm(counts(dds), normalized.lib.sizes = TRUE)
# Count the number of samples with > 0.5 CPM for each gene
num_samples <- apply(cpm > 0.5, 1, sum)
# Filter out genes with fewer than x(smallest group) samples meeting the criterion
dds <- dds[num_samples >= 44, ]
# Filter data for day0 visit and LR vs HR comparison
dds_day0 <- dds[dds$visit == "day0" & dds$response_status %in% c("HR", "LR"), ]
# Convert response_status to factor
dds_day0$response_status <- factor(dds_day0$response_status)
# Set reference level for response_status
dds_day0$response_status <- relevel(dds_day0$response_status, ref = "LR")
# Run DESeq analysis
dds_day0 <- DESeq(dds_day0, test = "Wald")
# Get results for LR vs HR comparison
res_day0 <- results(dds_day0)
dim(subset(res_day0, pvalue < 0.05))
dim(subset(res_day0, padj < 0.05))
write.table(res_day0,"D0_responsestatus_PBMC.tsv", sep = "\t", row.names = TRUE)
# for 2 and 3 it will be like this ?
# Create DESeqDataSet object
dds <- DESeqDataSetFromMatrix(countData = readCount, colData = metadata, design = ~ visit + response_status)
#filtering by expression counts
# Calculate CPM values
cpm <- cpm(counts(dds), normalized.lib.sizes = TRUE)
# Count the number of samples with > 0.5 CPM for each gene
num_samples <- apply(cpm > 0.5, 1, sum)
# Filter out genes with fewer than five samples meeting the criterion
dds <- dds[num_samples >= 44, ]
# Filter data for day7 compared to day 0 in HR (high responder group)
dds_hr_day7day0 <- dds[dds$response_status %in% c("HR"), ]
# Convert visit to factor
dds_hr_day7day0 $visit <- factor(dds_hr_day7day0 $visit)
# Set reference level for response_status
dds_hr_day7day0 $visit <- relevel(dds_hr_day7day0 $visit, ref = "day0")
# Run DESeq analysis
dds_hr_day7day0 <- DESeq(dds_hr_day7day0 , test = "Wald")
# Get results for LR vs HR comparison
dds_hr_day7day0 <- results(dds_hr_day7day0 )