I want to use the CODEX pipeline to analyze the copy number variants (CNV) of a sample of whole exon sequencing (WES) data.
- I installed the packages into R-Studio by the following commands:
source("https://bioconductor.org/biocLite.R") biocLite("CODEX")
-
I created a BED File (PDL1_exons.bed) as following:
chr9 5357966 5358360 uc003zjd.3_exon_0_0_chr9_5357967_r 0 - chr9 5361077 5361187 uc003zjd.3_exon_1_0_chr9_5361078_r 0 - chr9 5361757 5361888 uc003zjd.3_exon_2_0_chr9_5361758_r 0 - chr9 5431896 5431983 uc003zjd.3_exon_3_0_chr9_5431897_r 0 - chr9 5436568 5436694 uc003zjd.3_exon_4_0_chr9_5436569_r 0 - chr9 5439142 5439266 uc003zjd.3_exon_5_0_chr9_5439143_r 0 - chr9 5524535 5524629 uc003zjd.3_exon_6_0_chr9_5524536_r 0 - chr9 5629566 5630071 uc003zjd.3_exon_7_0_chr9_5629567_r 0 -
- And I executed the CODEX pipeline (here) adapted to my case with 3 samples:
library(CODEX) dirPath <- "/Users/njagiella/Documents/Projects/PID" bamFile <- list.files(dirPath, pattern = "*.bam$") bamdir <- file.path(dirPath, bamFile) sampname <- as.matrix(bamFile) sampname bedFile <- file.path(dirPath, "PDL1_exons.bed") chr <- "chr9" bambedObj <- getbambed(bamdir = bamdir, bedFile = bedFile, sampname = sampname, projectname = "TEST_WES", chr) bamdir <- bambedObj$bamdir; sampname <- bambedObj$sampname ref <- bambedObj$ref; projectname <- bambedObj$projectname; chr <- bambedObj$chr coverageObj <- getcoverage(bambedObj, mapqthres = 20) Y <- coverageObj$Y; readlength <- coverageObj$readlength gc <- getgc(chr, ref) mapp <- getmapp(chr, ref) qcObj <- qc(Y, sampname, chr, ref, mapp, gc, cov_thresh = c(20, 4000), length_thresh = c(20, 2000), mapp_thresh = 0.9, gc_thresh = c(20, 80))
The script stops with the following output:
> library(CODEX)
> dirPath <- "/Users/njagiella/Documents/Projects/PID"
> bamFile <- list.files(dirPath, pattern = "*.bam$")
> bamdir <- file.path(dirPath, bamFile)
> sampname <- as.matrix(bamFile)
> sampname
[,1]
[1,] "21.sorted.bam"
[2,] "24.sorted.bam"
[3,] "28.sorted.bam"
> bedFile <- file.path(dirPath, "PDL1_exons.bed")
> chr <- "chr9"
> bambedObj <- getbambed(bamdir = bamdir, bedFile = bedFile, sampname = sampname, projectname = "TEST_WES", chr)
> bamdir <- bambedObj$bamdir; sampname <- bambedObj$sampname
> ref <- bambedObj$ref; projectname <- bambedObj$projectname; chr <- bambedObj$chr
> coverageObj <- getcoverage(bambedObj, mapqthres = 20)
Getting coverage for sample 21.sorted.bam: read length 100.
Getting coverage for sample 24.sorted.bam: read length 100.
Getting coverage for sample 28.sorted.bam: read length 100.
> Y <- coverageObj$Y; readlength <- coverageObj$readlength
> gc <- getgc(chr, ref)
> mapp <- getmapp(chr, ref)
> qcObj <- qc(Y, sampname, chr, ref, mapp, gc, cov_thresh = c(20, 4000), length_thresh = c(20, 2000), mapp_thresh = 0.9, gc_thresh = c(20, 80))
Excluded NA exons due to extreme coverage.
Excluded 0 exons due to extreme exonic length.
Excluded 0 exons due to extreme mappability.
Excluded 0 exons due to extreme GC content.
After taking union, excluded NA out of 8 exons in QC.
Error in NSBS(i, x, exact = exact, upperBoundIsStrict = !allow.append) :
subscript contains NAs
13 stop("subscript contains NAs")
12 NSBS(i, x, exact = exact, upperBoundIsStrict = !allow.append)
11 NSBS(i, x, exact = exact, upperBoundIsStrict = !allow.append)
10 normalizeSingleBracketSubscript(i, x, as.NSBS = TRUE)
9 extractROWS(x, i)
8 extractROWS(x, i)
7 .nextMethod(x, i)
6 eval(expr, envir, enclos)
5 eval(call, callEnv)
4 callNextMethod(x, i)
3 ref[binfilter]
2 ref[binfilter] at qc.R#32
1 qc(Y, sampname, chr, ref, mapp, gc, cov_thresh = c(20, 4000),
length_thresh = c(20, 2000), mapp_thresh = 0.9, gc_thresh = c(20,
80))