DESeq2 unpaired RNAseq analysis
1
0
Entering edit mode
adeler001 • 0
@adeler001-21743
Last seen 2.8 years ago
Canada

Hello I'm new to coding on Rstudio. I'm doing a RNA seq analysis to test for differential gene expression using deseq2, by using unpaired samples. I just wanted to know if I altered the following script template correctly to indicate the following : 1) the analysis will be using unpaired samples 2) the order of variable comparison will be affected versus unaffected

please see below the R studio script I used: # Import count table countdata <- read.table("family1301revisedRNA-seq.counts_fixed.txt", header=TRUE, row.names=1)

Remove .bam or .sam from filenames

colnames(countdata) <- gsub("\.[sb]am$", "", colnames(countdata))

Convert to matrix

countdata <- as.matrix(countdata) head(countdata)

Assign condition (affected versus unaffected)

condition <- factor(c("affected","affected","affected","unaffected","unaffected","unaffected"),levels=c("affected","unaffected")) condition <- relevel(condition, ref = "unaffected")

load DESeq2

library(DESeq2)

Create a coldata frame and instantiate the DESeqDataSet

coldata <- data.frame(row.names=colnames(countdata),condition)

dds <- DESeqDataSetFromMatrix(countData=countdata, colData=coldata, design = ~ condition) dds

pre-filtering to keep only rows that have at least 1 reads total

keep <- rowSums(counts(dds)) > 1 dds <- dds[keep,]

Run the DESeq

dds <- DESeq(dds)

Regularized log transformation for clustering/heatmaps

rld <- rlogTransformation(dds) head(assay(rld)) hist(assay(rld))

Colors for plots below

library(RColorBrewer) (mycols <- brewer.pal(8, "Dark2")[1:length(unique(condition))])

Sample distance heatmap

sampleDists <- as.matrix(dist(t(assay(rld)))) library(gplots) png("qc-heatmap_baker.png", w=1000, h=1000, pointsize=20) heatmap.2(as.matrix(sampleDists), key=F, trace="none", col=colorpanel(100, "black", "white"), ColSideColors=mycols[condition], RowSideColors=mycols[condition], margin=c(10, 10), main="Sample Distance Matrix") dev.off()

Principal components analysis

rldpca <- function (rld, intgroup = "condition", ntop = 500, colors=NULL, legendpos="bottomleft", main="PCA Biplot", textcx=1, ...) { require(genefilter) require(calibrate) require(RColorBrewer) rv = rowVars(assay(rld)) select = order(rv, decreasing = TRUE)[seqlen(min(ntop, length(rv)))] pca = prcomp(t(assay(rld)[select, ])) fac = factor(apply(as.data.frame(colData(rld)[, intgroup, drop = FALSE]), 1, paste, collapse = " : ")) if (is.null(colors)) { if (nlevels(fac) >= 3) { colors = brewer.pal(nlevels(fac), "Paired") } else { colors = c("black", "red") } } pc1var <- round(summary(pca)$importance[2,1]100, digits=1) pc2var <- round(summary(pca)$importance[2,2]100, digits=1) pc1lab <- paste0("PC1 (",as.character(pc1var),"%)") pc2lab <- paste0("PC2 (",as.character(pc2var),"%)") plot(PC2~PC1, data=as.data.frame(pca$x), bg=colors[fac], pch=21, xlab=pc1lab, ylab=pc2lab, main=main, ...) with(as.data.frame(pca$x), textxy(PC1, PC2, labs=rownames(as.data.frame(pca$x)), cex=textcx)) legend(legendpos, legend=levels(fac), col=colors, pch=20) # rldyplot(PC2 ~ PC1, groups = fac, data = as.data.frame(pca$rld), # pch = 16, cerld = 2, aspect = "iso", col = colours, main = draw.key(key = list(rect = list(col = colours), # terldt = list(levels(fac)), rep = FALSE))) } png("qc-pca.png", 1000, 1000, pointsize=20) rld_pca(rld, colors=mycols, intgroup="condition", xlim=c(-30, 30)) dev.off()

Get differential expression results

res <- results(dds) table(res$padj<0.05)

Order by adjusted p-value

res <- res[order(res$padj), ]

Merge with normalized count data

resdata <- merge(as.data.frame(res), as.data.frame(counts(dds, normalized=TRUE)), by="row.names", sort=FALSE) names(resdata)[1] <- "Gene" head(resdata)

get significant results (FDR<0.05)

Write results

write.csv(resdata, file="sig_diffexpr-results.csv")

MA plot

DESeq2::plotMA(dds, ylim=c(-1,1), cex=1)

Volcano plot with significant DE genes

volcanoplot <- function (res, lfcthresh=2, sigthresh=0.05, main="Volcano Plot", legendpos="bottomright", labelsig=TRUE, textcx=1, ...) { with(res, plot(log2FoldChange, -log10(padj), pch=20, main=main, ...)) with(subset(res, padj<sigthresh ),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="red" ,="" ...))="" with(subset(res,="" abs(log2foldchange)&gt;lfcthresh),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="orange" ,="" ...))="" with(subset(res,="" padj<sigthresh="" &amp;="" abs(log2foldchange)&gt;lfcthresh),="" points(log2foldchange,="" -log10(padj),="" pch="20," col="green" ,="" ...))="" if="" (labelsig)="" {="" require(calibrate)="" with(subset(res,="" padj<sigthresh="" &amp;="" abs(log2foldchange)&gt;lfcthresh),="" textxy(log2foldchange,="" -log10(padj),="" labs="Gene," cex="textcx," ...))="" }="" legend(legendpos,="" xjust="1," yjust="1," legend="c(paste("FDR&lt;",sigthresh,sep="")," paste("|logfc|&gt;",lfcthresh,sep="" ),="" "both"),="" pch="20," col="c("red","orange","green"))" }="" png("diffexpr-volcanoplot2.png",="" 1200,="" 1000,="" pointsize="20)" volcanoplot(resdata,="" lfcthresh="1," sigthresh="0.05," textcx=".5," xlim="c(-10.5," 14))="" dev.off()<="" p="">

deseq2 rnaseq • 1.6k views
ADD COMMENT
0
Entering edit mode
ADD REPLY
0
Entering edit mode
@mikelove
Last seen 3 days ago
United States

Yes, unpaired analysis requires ~condition, which you've specified.

ADD COMMENT
0
Entering edit mode

ok thanks for answering

ADD REPLY
0
Entering edit mode

ok thanks for answering

ADD REPLY

Login before adding your answer.

Traffic: 583 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6