Entering edit mode
hi ,
When running the search for all promoters sequences in mice , with 1000 bp up /downstream, parameter the getPromoterSeq works fine, but increasing the up/downstream parameter to say 5000 or 2000 it errors (log bellow):
Any advice appreciated .
Branko
-------------------------------------------
library(GenomicFeatures) library(BSgenome.Mmusculus.UCSC.mm10) library(TxDb.Mmusculus.UCSC.mm10.knownGene) chr.loc = transcriptsBy (TxDb.Mmusculus.UCSC.mm10.knownGene, by = "gene") system.time ( prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream=2000, downstream=1000)) Error in loadFUN(x, seqname, ranges) : trying to load regions beyond the boundaries of non-circular sequence "chr4_JH584294_random" In addition: Warning messages: 1: In valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE) : GRanges object contains 1 out-of-bound range located on sequence chr4_JH584294_random. Note that only ranges located on a non-circular ........ -------------------- 14: stop("trying to load regions beyond the boundaries ", "of non-circular sequence \"", seqname, "\"") 13: loadFUN(x, seqname, ranges) 12: loadFUN(x, seqname, ranges) 11: loadSubseqsFromStrandedSequence(x@single_sequences, seqname, ranges(gr), strand(gr), is_circular) 10: FUN(1:35[[27L]], ...) 9: lapply(seq_len(length(grl)), function(i) { gr <- grl[[i]] if (length(gr) == 0L) return(DNAStringSet()) seqlevel <- grl_seqlevels[i] is_circular <- isCircular(x)[[seqlevel]] idx <- match(seqlevel, x@user_seqnames) if (is.na(idx)) stop("invalid sequence name: ", seqlevel) seqname <- names(x@user_seqnames)[[idx]] if (is.null(snplocs(x, seqname))) { subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE), silent = TRUE) if (is(subject, "try-error")) { ans <- loadSubseqsFromStrandedSequence(x@single_sequences, seqname, ranges(gr), strand(gr), is_circular) return(ans) } .linkToCachedObject(subject) <- .newLinkToCachedObject(seqname, x@.seqs_cache, x@.link_counts) } else { subject <- x[[seqlevel]] } masks(subject) <- NULL loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr), strand(gr), is_circular) }) 8: lapply(seq_len(length(grl)), function(i) { gr <- grl[[i]] if (length(gr) == 0L) return(DNAStringSet()) seqlevel <- grl_seqlevels[i] is_circular <- isCircular(x)[[seqlevel]] idx <- match(seqlevel, x@user_seqnames) if (is.na(idx)) stop("invalid sequence name: ", seqlevel) seqname <- names(x@user_seqnames)[[idx]] if (is.null(snplocs(x, seqname))) { subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE), silent = TRUE) if (is(subject, "try-error")) { ans <- loadSubseqsFromStrandedSequence(x@single_sequences, seqname, ranges(gr), strand(gr), is_circular) return(ans) } .linkToCachedObject(subject) <- .newLinkToCachedObject(seqname, x@.seqs_cache, x@.link_counts) } else { subject <- x[[seqlevel]] } masks(subject) <- NULL loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr), strand(gr), is_circular) }) 7: .extractFromBSgenomeSingleSequences(x, sseq_args$names, sseq_args$start, sseq_args$end, sseq_args$width, sseq_args$strand) 6: .local(x, ...) 5: getSeq(subject, promoter.granges) 4: getSeq(subject, promoter.granges) 3: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000) 2: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000) 1: system.time(prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)) --------------------------------------------- > sessionInfo() R version 3.1.1 (2014-07-10) Platform: x86_64-pc-linux-gnu (64-bit) locale: [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8 [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 [7] LC_PAPER=en_US.UTF-8 LC_NAME=C [9] LC_ADDRESS=C LC_TELEPHONE=C [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C attached base packages: [1] stats4 parallel stats graphics grDevices utils datasets [8] methods base other attached packages: [1] TxDb.Mmusculus.UCSC.mm10.knownGene_3.0.0 [2] BSgenome.Mmusculus.UCSC.mm10_1.4.0 [3] BSgenome_1.34.0 [4] rtracklayer_1.26.2 [5] Biostrings_2.34.0 [6] XVector_0.6.0 [7] org.Mm.eg.db_3.0.0 [8] RSQLite_1.0.0 [9] DBI_0.3.1 [10] GenomicFeatures_1.18.2 [11] AnnotationDbi_1.28.1 [12] Biobase_2.26.0 [13] GenomicRanges_1.18.1 [14] GenomeInfoDb_1.2.3 [15] IRanges_2.0.0 [16] S4Vectors_0.4.0 [17] BiocGenerics_0.12.1 [18] BiocInstaller_1.16.1 loaded via a namespace (and not attached): [1] base64enc_0.1-2 BatchJobs_1.5 BBmisc_1.8 [4] BiocParallel_1.0.0 biomaRt_2.22.0 bitops_1.0-6 [7] brew_1.0-6 checkmate_1.5.0 codetools_0.2-9 [10] digest_0.6.4 fail_1.2 foreach_1.4.2 [13] GenomicAlignments_1.2.1 iterators_1.0.7 RCurl_1.95-4.3 [16] Rsamtools_1.18.2 sendmailR_1.2-1 stringr_0.6.2 [19] tools_3.1.1 XML_3.98-1.1 zlibbioc_1.12.0
thank you , also for the lesson about unplaced ones ...