I have been struggling with Branchpointer for a few hours, using either a .fa file or the BSgenome option. Below is the code and errors reported
require(data.table)
library(dplyr)
library(branchpointer)
library(BSgenome.Hsapiens.UCSC.hg38)
g <- BSgenome.Hsapiens.UCSC.hg38::BSgenome.Hsapiens.UCSC.hg38
>
exons <- gtfToExons("//home/data1/Genome/gencode.v38.primary_assembly.annotation.gtf")
setwd("/home/alex/RNAseq_data_et_analyses/Analyse_HeLa_v2/analyse1")
head(exons)
GRanges object with 6 ranges and 6 metadata columns:
seqnames ranges strand | gene_id gene_type transcript_id
<Rle> <IRanges> <Rle> | <character> <character> <character>
[1] chr1 11869-12227 + | ENSG00000223972.5 transcribed_unproces.. ENST00000456328.2
[2] chr1 12613-12721 + | ENSG00000223972.5 transcribed_unproces.. ENST00000456328.2
[3] chr1 13221-14409 + | ENSG00000223972.5 transcribed_unproces.. ENST00000456328.2
[4] chr1 12010-12057 + | ENSG00000223972.5 transcribed_unproces.. ENST00000450305.2
[5] chr1 12179-12227 + | ENSG00000223972.5 transcribed_unproces.. ENST00000450305.2
[6] chr1 12613-12697 + | ENSG00000223972.5 transcribed_unproces.. ENST00000450305.2
transcript_type exon_id exon_number
<character> <character> <character>
[1] processed_transcript ENSE00002234944.1 1
[2] processed_transcript ENSE00003582793.1 2
[3] processed_transcript ENSE00002312635.1 3
[4] transcribed_unproces.. ENSE00001948541.1 1
[5] transcribed_unproces.. ENSE00001671638.2 2
[6] transcribed_unproces.. ENSE00001758273.2 3
seqinfo: 47 sequences from an unspecified genome; no seqlengths
>
getwd()
[1] "/home/alex/RNAseq_data_et_analyses/Analyse_HeLa_v2/analyse1"
queryIntron <- readQueryFile("exC_bp1",
queryType = "region",
exons = exons)
>
head(queryIntron)
GRanges object with 6 ranges and 6 metadata columns:
seqnames ranges strand | id to_3prime to_5prime same_gene
<Rle> <IRanges> <Rle> | <character> <numeric> <numeric> <logical>
[1] chr17 66688907-66688933 + | chr17:66688951-66689.. 18 497 TRUE
[2] chr3 75432888-75432914 - | chr3:75432808-75432870 18 1837 TRUE
[3] chr3 48580362-48580388 - | chr3:48580300-48580344 18 219 TRUE
[4] chr4 103147664-103147690 - | chr4:103147356-10314.. 18 1180 TRUE
[5] chr7 100627992-100628018 - | chr7:100627907-10062.. 18 81 TRUE
[6] chr11 65183084-65183110 + | chr11:65183128-65183.. 18 142 TRUE
exon_3prime exon_5prime
<character> <character>
[1] ENSE00003551970.1 ENSE00003465619.1
[2] ENSE00002371856.1 ENSE00001958820.1
[3] ENSE00003548991.1 ENSE00003598438.1
[4] ENSE00000970107.1 ENSE00000970106.1
[5] ENSE00003651039.1 ENSE00003463524.1
[6] ENSE00003483377.1 ENSE00002150254.1
seqinfo: 25 sequences from an unspecified genome; no seqlengths
branchpointPredictionsIntron <- predictBranchpoints(queryIntron,
queryType = "region",
genome="//home/data1/Genome/GRCh38.primary_assembly.genome.fa",
bedtoolsLocation = "/usr/bin/bedtools" )
Error in getListElement(x, i, ...) :
GRanges objects don't support [[, as.list(), lapply(), or unlist() at the moment
>
branchpointPredictionsIntron <- predictBranchpoints(queryIntron,
queryType = "region",
rmChr = TRUE,
BSgenome = g)
Error in getBranchpointSequence(query, uniqueId = uniqueId, queryType = queryType, :
Chromosome names of query and genome do not match
head(BSgenome,100)
1 function (organism, common_name, genome, provider, provider_version,
2 release_date, release_name, source_url, seqnames, circ_seqs = NA,
3 mseqnames, seqs_pkgname, seqs_dirpath, species = NA_character_)
4 {
5 single_sequences <- OnDiskNamedSequences(seqs_dirpath, seqnames = seqnames)
6 if (missing(genome))
7 genome <- provider_version
8 seqinfo <- .make_BSgenome_seqinfo(single_sequences, circ_seqs,
9 genome, seqnames)
10 seqnames <- seqnames(seqinfo)
11 if (missing(common_name))
12 common_name <- species
13 metadata <- list(organism = organism, common_name = common_name,
14 genome = genome, provider = provider, release_date = release_date,
15 source_url = source_url)
16 if (is.null(mseqnames))
17 mseqnames <- character(0)
18 multiple_sequences <- RdaCollection(seqs_dirpath, mseqnames)
19 names(user_seqnames) <- user_seqnames <- seqnames
20 new("BSgenome", metadata = metadata, pkgname = seqs_pkgname,
21 single_sequences = single_sequences, multiple_sequences = multiple_sequences,
22 seqinfo = seqinfo, user_seqnames = user_seqnames, .seqs_cache = new.env(parent = emptyenv()),
23 .link_counts = new.env(parent = emptyenv()))
24 }
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 20.04.3 LTS
Matrix products: default
BLAS: /usr/local/lib/R/lib/libRblas.so
LAPACK: /usr/local/lib/R/lib/libRlapack.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8
[4] LC_COLLATE=en_US.UTF-8 LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C LC_ADDRESS=C
[10] LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] BiocManager_1.30.16 BSgenome.Hsapiens.UCSC.hg38_1.4.4 BSgenome_1.62.0
[4] rtracklayer_1.54.0 Biostrings_2.62.0 XVector_0.34.0
[7] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0
[10] S4Vectors_0.32.3 BiocGenerics_0.40.0 branchpointer_1.20.0
[13] caret_6.0-90 lattice_0.20-45 ggplot2_3.3.5
[16] devtools_2.4.3 usethis_2.1.5 dplyr_1.0.8
[19] data.table_1.14.2
loaded via a namespace (and not attached):
[1] colorspace_2.0-2 rjson_0.2.21 ellipsis_0.3.2
[4] class_7.3-19 rprojroot_2.0.2 fs_1.5.2
[7] listenv_0.8.0 remotes_2.4.2 bit64_4.0.5
[10] AnnotationDbi_1.56.2 prodlim_2019.11.13 fansi_1.0.2
[13] lubridate_1.8.0 xml2_1.3.3 codetools_0.2-18
[16] splines_4.1.2 cachem_1.0.6 pkgload_1.2.4
[19] Rsamtools_2.10.0 pROC_1.18.0 kernlab_0.9-29
[22] dbplyr_2.1.1 png_0.1-7 compiler_4.1.2
[25] httr_1.4.2 assertthat_0.2.1 Matrix_1.3-4
[28] fastmap_1.1.0 cli_3.1.1 prettyunits_1.1.1
[31] tools_4.1.2 gtable_0.3.0 glue_1.6.1
[34] GenomeInfoDbData_1.2.7 reshape2_1.4.4 rappdirs_0.3.3
[37] Rcpp_1.0.8 Biobase_2.54.0 vctrs_0.3.8
[40] nlme_3.1-153 iterators_1.0.14 timeDate_3043.102
[43] gower_1.0.0 stringr_1.4.0 globals_0.14.0
[46] ps_1.6.0 brio_1.1.3 testthat_3.1.2
[49] lifecycle_1.0.1 restfulr_0.0.13 XML_3.99-0.8
[52] future_1.23.0 zlibbioc_1.40.0 MASS_7.3-55
[55] scales_1.1.1 ipred_0.9-12 MatrixGenerics_1.6.0
[58] hms_1.1.1 SummarizedExperiment_1.24.0 parallel_4.1.2
[61] yaml_2.2.2 curl_4.3.2 memoise_2.0.1
[64] biomaRt_2.50.3 rpart_4.1-15 stringi_1.7.6
[67] RSQLite_2.2.9 BiocIO_1.4.0 desc_1.4.0
[70] foreach_1.5.2 filelock_1.0.2 BiocParallel_1.28.3
[73] pkgbuild_1.3.1 lava_1.6.10 matrixStats_0.61.0
[76] rlang_1.0.1 pkgconfig_2.0.3 bitops_1.0-7
[79] purrr_0.3.4 GenomicAlignments_1.30.0 recipes_0.1.17
[82] cowplot_1.1.1 bit_4.0.4 processx_3.5.2
[85] tidyselect_1.1.1 parallelly_1.30.0 gbm_2.1.8
[88] plyr_1.8.6 magrittr_2.0.2 R6_2.5.1
[91] generics_0.1.2 DelayedArray_0.20.0 DBI_1.1.2
[94] pillar_1.7.0 withr_2.4.3 survival_3.2-13
[97] KEGGREST_1.34.0 RCurl_1.98-1.5 nnet_7.3-16
[100] tibble_3.1.6 future.apply_1.8.1 crayon_1.4.2
[103] utf8_1.2.2 BiocFileCache_2.2.1 progress_1.2.2
[106] grid_4.1.2 blob_1.2.2 callr_3.7.0
[109] ModelMetrics_1.2.2.2 digest_0.6.29 munsell_0.5.0
[112] sessioninfo_1.2.2
Hi alexandre.maucuer , I am also facing the getListElement(x, i, ...) error while using Branchpointer. Did you manage to solve it? If yes can you let me know how. Thanks!