Dear Gunter,
I am using the R code below for running cn.mops in parallel for EACH chromosome in a GERMLINE - TUMOR pair (it is inspired by one of your scripts on github). Please could you let me know if it looks ok, or if anything is missing. We are very much impressed by the speed : it takes 3-5 minutes per chromosome ;) thanks a lot !
#!/usr/bin/env Rscript library(cn.mops) library(parallel) library(snow) library(RUnit) library(BiocParallel) # registered() # register(MulticoreParam(workers = 1)) #### initially, we would need to enter the TUMOR FILE, the GERMLINE file, and the CHROMOSOME to run the analysis ON. args <- commandArgs(TRUE) TUMOR <- args[1] GERMLINE <- args[2] CHR <- args[3] ############### ## collecting the name of the file that is TUMOR ## collecting the name of the file that is GERMLINE ############### tumor <- getReadCountsFromBAM(TUMOR, refSeqName=CHR, WL=10000, mode="paired", parallel=12) normal <- getReadCountsFromBAM(GERMLINE, refSeqName=CHR, WL=10000, mode="paired", parallel=12) #### We need a special normalization i.e. POISSON because the tumor has made large CNVs X <- tumor values(X) <- cbind(values(tumor),values(normal)) #### NORMALIZE the DATA .. X.mode <- normalizeGenome(X, normType="poisson") ### Parameter settings for tumor: ### - norm=0, because we already have normalized ### - integer copy numbers higher than 8 allowed ### - DNAcopy as segmentation algorithm. ## the position 1 is for TUMOR, the position 2 is for NORMAL. ref_analysis_norm0 <- referencecn.mops(X.mode[,1], X.mode[,2], norm=0, I = c(0.025, 0.5, 1, 1.5, 2, 2.5, 3, 3.5, 4, 8, 16, 32, 64), classes = c("CN0", "CN1", "CN2", "CN3", "CN4", "CN5", "CN6", "CN7","CN8","CN16","CN32","CN64","CN128"), segAlgorithm="DNAcopy") resCNMOPS <- calcIntegerCopyNumbers(ref_analysis_norm0) resCNMOPS <- cn.mops:::.replaceNames(resCNMOPS, basename(TUMOR) ,"TUMOR") ############## ## name_file <- paste(args[1], args[2], args[3], ".png", sep="_") ## name_file_display <- paste("z.cn.mops.seqplot.", basename(TUMOR), ".", basename(GERMLINE), ".", basename(CHR), ".png") name_file_display <- paste("z.cn.mops.seqplot", basename(TUMOR), basename(GERMLINE), basename(CHR), "png", sep=".") png(name_file_display) #segplot(resCNMOPS) segplot(resCNMOPS, seqnames=CHR, zlcol="blue") dev.off() ############### ## cnvs(resCNMOPS) # look at individual CNV regions ## cnvr(resCNMOPS) # look at REGIONS ############### SAVING THE RESULTS : results.segm <- as.data.frame(segmentation(resCNMOPS)) results.CNVs <- as.data.frame(cnvs(resCNMOPS)) results.CNVRegions <- as.data.frame(cnvr(resCNMOPS)) ############### NAMING THE FILES : name_file_results_segm <- paste("z.cn.mops.segments.normalization.POISSON", basename(TUMOR), basename(GERMLINE), basename(CHR), "txt", sep=".") name_file_results.CNVs <- paste("z.cn.mops.cnvs_shorts.normalization.POISSON", basename(TUMOR), basename(GERMLINE), basename(CHR), "txt", sep=".") name_file_results.CNVRegions <- paste("z.cn.mops.cnvs_regions.normalization.POISSON", basename(TUMOR), basename(GERMLINE), basename(CHR), "txt", sep=".") ############### WRITING THE TABLES : write.table(results.segm, file=name_file_results_segm, sep="\t", row.names=F) write.table(results.CNVs, file=name_file_results.CNVs, sep="\t", row.names=F) write.table(results.CNVRegions, file=name_file_results.CNVRegions, sep="\t", row.names=F)
Also, if I may add a little question, for downstream analyses, shall I use :
results.CNVs <- as.data.frame(cnvs(resCNMOPS))
or
results.CNVRegions <- as.data.frame(cnvr(resCNMOPS))
thank you !
It's better if you use the CODE highlight for your code, rather than yellow highlighting, which just makes it unreadable.
Thanks James for your suggestion, yes, it looks good ;)