GenomicFeatures::getPromoterSeq error when selecting all promoters sequences (in mice)
1
0
Entering edit mode
@branislav-misovic-4248
Last seen 5.6 years ago
Netherlands/Amsterdam

hi ,

  When running the search for all promoters sequences  in mice , with  1000 bp  up /downstream, parameter the getPromoterSeq  works fine,  but increasing the up/downstream parameter to say 5000  or 2000 it errors  (log bellow):

Any advice appreciated .
Branko
-------------------------------------------

library(GenomicFeatures)
library(BSgenome.Mmusculus.UCSC.mm10)
library(TxDb.Mmusculus.UCSC.mm10.knownGene)  

chr.loc = transcriptsBy (TxDb.Mmusculus.UCSC.mm10.knownGene, by = "gene")
system.time ( prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream=2000, downstream=1000))

Error in loadFUN(x, seqname, ranges) :
  trying to load regions beyond the boundaries of non-circular sequence "chr4_JH584294_random"

In addition: Warning messages:
1: In valid.GenomicRanges.seqinfo(x, suggest.trim = TRUE) :
  GRanges object contains 1 out-of-bound range located on sequence
  chr4_JH584294_random. Note that only ranges located on a non-circular

........
--------------------

14: stop("trying to load regions beyond the boundaries ", "of non-circular sequence \"",
        seqname, "\"")
13: loadFUN(x, seqname, ranges)
12: loadFUN(x, seqname, ranges)
11: loadSubseqsFromStrandedSequence(x@single_sequences, seqname,
        ranges(gr), strand(gr), is_circular)
10: FUN(1:35[[27L]], ...)
9: lapply(seq_len(length(grl)), function(i) {
       gr <- grl[[i]]
       if (length(gr) == 0L)
           return(DNAStringSet())
       seqlevel <- grl_seqlevels[i]
       is_circular <- isCircular(x)[[seqlevel]]
       idx <- match(seqlevel, x@user_seqnames)
       if (is.na(idx))
           stop("invalid sequence name: ", seqlevel)
       seqname <- names(x@user_seqnames)[[idx]]
       if (is.null(snplocs(x, seqname))) {
           subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE),
               silent = TRUE)
           if (is(subject, "try-error")) {
               ans <- loadSubseqsFromStrandedSequence(x@single_sequences,
                   seqname, ranges(gr), strand(gr), is_circular)
               return(ans)
           }
           .linkToCachedObject(subject) <- .newLinkToCachedObject(seqname,
               x@.seqs_cache, x@.link_counts)
       }
       else {
           subject <- x[[seqlevel]]
       }
       masks(subject) <- NULL
       loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr),
           strand(gr), is_circular)
   })
8: lapply(seq_len(length(grl)), function(i) {
       gr <- grl[[i]]
       if (length(gr) == 0L)
           return(DNAStringSet())
       seqlevel <- grl_seqlevels[i]
       is_circular <- isCircular(x)[[seqlevel]]
       idx <- match(seqlevel, x@user_seqnames)
       if (is.na(idx))
           stop("invalid sequence name: ", seqlevel)
       seqname <- names(x@user_seqnames)[[idx]]
       if (is.null(snplocs(x, seqname))) {
           subject <- try(get(seqname, envir = x@.seqs_cache, inherits = FALSE),
               silent = TRUE)
           if (is(subject, "try-error")) {
               ans <- loadSubseqsFromStrandedSequence(x@single_sequences,
                   seqname, ranges(gr), strand(gr), is_circular)
               return(ans)
           }
           .linkToCachedObject(subject) <- .newLinkToCachedObject(seqname,
               x@.seqs_cache, x@.link_counts)
       }
       else {
           subject <- x[[seqlevel]]
       }
       masks(subject) <- NULL
       loadSubseqsFromStrandedSequence(subject, seqlevel, ranges(gr),
           strand(gr), is_circular)
   })
7: .extractFromBSgenomeSingleSequences(x, sseq_args$names, sseq_args$start,
       sseq_args$end, sseq_args$width, sseq_args$strand)
6: .local(x, ...)
5: getSeq(subject, promoter.granges)
4: getSeq(subject, promoter.granges)
3: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)
2: getPromoterSeq(chr.loc, Mmusculus, upstream = 2000, downstream = 1000)
1: system.time(prom.all <- getPromoterSeq(chr.loc, Mmusculus, upstream = 2000,
       downstream = 1000))


---------------------------------------------

> sessionInfo()
R version 3.1.1 (2014-07-10)
Platform: x86_64-pc-linux-gnu (64-bit)

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=en_US.UTF-8
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C
 [9] LC_ADDRESS=C               LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C

attached base packages:
[1] stats4    parallel  stats     graphics  grDevices utils     datasets
[8] methods   base

other attached packages:
 [1] TxDb.Mmusculus.UCSC.mm10.knownGene_3.0.0
 [2] BSgenome.Mmusculus.UCSC.mm10_1.4.0
 [3] BSgenome_1.34.0
 [4] rtracklayer_1.26.2
 [5] Biostrings_2.34.0
 [6] XVector_0.6.0
 [7] org.Mm.eg.db_3.0.0
 [8] RSQLite_1.0.0
 [9] DBI_0.3.1
[10] GenomicFeatures_1.18.2
[11] AnnotationDbi_1.28.1
[12] Biobase_2.26.0
[13] GenomicRanges_1.18.1
[14] GenomeInfoDb_1.2.3
[15] IRanges_2.0.0
[16] S4Vectors_0.4.0
[17] BiocGenerics_0.12.1
[18] BiocInstaller_1.16.1

loaded via a namespace (and not attached):
 [1] base64enc_0.1-2         BatchJobs_1.5           BBmisc_1.8
 [4] BiocParallel_1.0.0      biomaRt_2.22.0          bitops_1.0-6
 [7] brew_1.0-6              checkmate_1.5.0         codetools_0.2-9
[10] digest_0.6.4            fail_1.2                foreach_1.4.2
[13] GenomicAlignments_1.2.1 iterators_1.0.7         RCurl_1.95-4.3
[16] Rsamtools_1.18.2        sendmailR_1.2-1         stringr_0.6.2
[19] tools_3.1.1             XML_3.98-1.1            zlibbioc_1.12.0

 

GenomicFeatures getPromoterSeq • 2.9k views
ADD COMMENT
1
Entering edit mode
@james-w-macdonald-5106
Last seen 13 hours ago
United States

It depends on if you care about all the unplaced haplotypes. If not, just nuke them and go forward.

> trs <- transcriptsBy(TxDb.Mmusculus.UCSC.mm10.knownGene, by = "gene")
> trs <- keepStandardChromosomes(trs)

> prom <- getPromoterSeq(trs, Mmusculus, upstream = 2e3, downstream=1e3)

> prom
DNAStringSetList of length 23696
[["100009600"]] CTTCTTCTTTTTCGAGACAGGGTTTCTCTGTGTAGCCCTGGCTATCAACTCAGAAATCCGC...
[["100009609"]] CACCTCACACCTGTCAGAATGGCTAGGATCAAAAATTCAGGTGACAGCAGATGCTGGTGAG...
[["100009614"]] TGGGTGTCTCATTAGCTCTTTGTATAGCTTCTGAGAGTCATAAAGCCTATATGTCAAAACT...
[["100009664"]] CACCTACGGGACTAAATAAAGAGTCAGCACTTTTTGGCAAAAGCAGCCTAAGTCCCTGTTT...
[["100012"]] GTTGTCTTACTAAAGCAAACACTAGGAAGATTGTTTTCCTGAGGCAGGCCAAGGTGGAAAAAGT...
[["100017"]] TGCCTGCCCCTCTTTGACTTATTGATGAGGCCTGCTTCTGGGGAGCAAGATAAGGATGTGGACA...
[["100019"]] AGAAAGTAAAATTAAAGAATGAGTAAGCTGAAAAATCAGGTCAAGAAACTCCCAAAAGGAACCC...
[["100033459"]] AATTTCTGTATCCATTCTTCTGTCATGGGACATCTGGGTTGTTTCCAGCTTTGGGCTATCA...
[["100034251"]] GTCTGAGTGAGGGACACTATAAGAAGTGCACATCCAATCAGCAATGAGCTAATGAGCAGAG...
[["100034361"]] TGCAGGATCTAAATCAGCACTTACACTGGATCCTGTAATATTGAGCCCAGTCTACTCCTCC...
...
<23686 more elements>

 

ADD COMMENT
0
Entering edit mode

thank you ,  also for the lesson about unplaced ones ...

ADD REPLY

Login before adding your answer.

Traffic: 850 users visited in the last hour
Help About
FAQ
Access RSS
API
Stats

Use of this site constitutes acceptance of our User Agreement and Privacy Policy.

Powered by the version 2.3.6