How to getting synonymous SNPs from BiomaRt before timing out?
I am currently doing a study where we would like to focus on the synonymous snps of a set of 35 genes. I have their ensembl gene IDs, but when I query them, the timeout is reached. This happens even if I query them one at a time.

I need gene ID, Chromsomome name, position, and consequence type (to get synonymous snps). refSNP ID is a helpful as well.

What can I do differently to get this information? Or what am I doing wrong?

Thank you,



grch37_snp = useEnsembl(biomart="ENSEMBL_MART_SNP",dataset="hsapiens_snp",host="", GRCh = 37)

snp_attributes <- c("ensembl_gene_stable_id","chr_name","chrom_start","chrom_end","consequence_type_tv","refsnp_id","ensembl_peptide_allele") 

snp_annotate <- data.frame(matrix(ncol = 8, nrow = 0))
colnames(snp_annotate) <- snp_attributes

engids <-  c("ENSG00000142208", "ENSG00000149311", "ENSG00000138376", "ENSG00000012048" ,"ENSG00000139618" ,"ENSG00000158019", "ENSG00000136492") #etc there are more, just a few here for example

for (i in engids) {
  vls <- getBM(attributes = snp_attributes, values = i,filters = "ensembl_gene" ,mart = grch37_snp)
  snp_annotate <- rbind(snp_annotate,vls)

[1] "ENSG00000142208"
[1] "ENSG00000149311"
[1] "ENSG00000138376"

Error in curl::curl_fetch_memory(url, handle = handle) : 
  Timeout was reached: [] Operation timed out after 300000 milliseconds with 311013 bytes received

sessionInfo( )

R version 4.2.1 (2022-06-23)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Ubuntu 22.04.2 LTS

Matrix products: default
BLAS:   /usr/lib/x86_64-linux-gnu/blas/
LAPACK: /usr/lib/x86_64-linux-gnu/lapack/

 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=sv_SE.UTF-8        LC_COLLATE=en_US.UTF-8     LC_MONETARY=sv_SE.UTF-8   
 [6] LC_MESSAGES=en_US.UTF-8    LC_PAPER=sv_SE.UTF-8       LC_NAME=C                  LC_ADDRESS=C               LC_TELEPHONE=C            

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] biomaRt_2.54.0

loaded via a namespace (and not attached):
 [1] KEGGREST_1.38.0        progress_1.2.2         tidyselect_1.2.0       purrr_1.0.1            vctrs_0.5.2            generics_0.1.3        
 [7] stats4_4.2.1           BiocFileCache_2.6.1    utf8_1.2.3             blob_1.2.3             XML_3.99-0.13          rlang_1.0.6           
[13] pillar_1.8.1           withr_2.5.0            glue_1.6.2             DBI_1.1.3              BiocParallel_1.32.5    rappdirs_0.3.3        
[19] BiocGenerics_0.44.0    bit64_4.0.5            dbplyr_2.3.1           GenomeInfoDbData_1.2.9 lifecycle_1.0.3        stringr_1.5.0         
[25] zlibbioc_1.44.0        Biostrings_2.66.0      codetools_0.2-19       memoise_2.0.1          Biobase_2.58.0         IRanges_2.32.0        
[31] fastmap_1.1.1          GenomeInfoDb_1.34.9    parallel_4.2.1         curl_5.0.0             AnnotationDbi_1.60.2   fansi_1.0.4           
[37] Rcpp_1.0.10            filelock_1.0.2         cachem_1.0.7           S4Vectors_0.36.2       XVector_0.38.0         bit_4.0.5             
[43] hms_1.1.2              png_0.1-8              digest_0.6.31          stringi_1.7.12         dplyr_1.1.0            cli_3.6.0             
[49] tools_4.2.1            bitops_1.0-7           magrittr_2.0.3         RCurl_1.98-1.10        RSQLite_2.3.0          tibble_3.2.0          
[55] crayon_1.5.2           pkgconfig_2.0.3        ellipsis_0.3.2         xml2_1.3.3             prettyunits_1.1.1      httr_1.4.5            
[61] rstudioapi_0.14        R6_2.5.1               compiler_4.2.1
biomaRt SNP
Asking for multiple things at once will tend to blow out your results, as you will get every possible combination.

> options(timeout = 5000)
> z <- getBM(snp_attributes, "ensembl_gene", "ENSG00000138376", grch37_snp)
> dim(z)
[1] 56433     7

Even just asking for the variant type and RSID is big

> z2 <- getBM(snp_attributes[5:6], "ensembl_gene", "ENSG00000142208", grch37_snp)
> dim(z2)
[1] 14341     2
> table(table(z2[,2]))

   1    2    3    4    5 
4024 1790 1847  294    4

Although trimming down to just synonymous variants makes it tractable.

> zsmall <- subset(z, consequence_type_tv %in% "synonymous_variant")
> head(zsmall)
    ensembl_gene_stable_id chr_name chrom_start chrom_end consequence_type_tv
62         ENSG00000138376        2   215632256 215632256  synonymous_variant
71         ENSG00000138376        2   215645545 215645545  synonymous_variant
119        ENSG00000138376        2   215645464 215645464  synonymous_variant
737        ENSG00000138376        2   215593469 215593469  synonymous_variant
741        ENSG00000138376        2   215593520 215593520  synonymous_variant
943        ENSG00000138376        2   215645989 215645989  synonymous_variant
     refsnp_id ensembl_peptide_allele
62   rs2070093                      H
71   rs2070096                      T
119  rs2229571                      R
737 rs13389324                      V
741 rs13389415                      I
943 rs28997574                      G
> dim(zsmall)
[1] 422   7
Thank you for your response.

How can I request for biomaRt to retrieve only synonymous variants? It does not appear to be an option for filters.


> options(timeout = 1e6)
> z <- getBM(snp_attributes, c("ensembl_gene","so_mini_parent_name"), list("ENSG00000138376", "synonymous_variant"), grch37_snp)
> dim(z)
[1] 427   7
> head(z)
  ensembl_gene_stable_id chr_name chrom_start chrom_end consequence_type_tv
1        ENSG00000138376        2   215632256 215632256  synonymous_variant
2        ENSG00000138376        2   215645545 215645545  synonymous_variant
3        ENSG00000138376        2   215645464 215645464  synonymous_variant
4        ENSG00000138376        2   215593469 215593469  synonymous_variant
5        ENSG00000138376        2   215593520 215593520  synonymous_variant
6        ENSG00000138376        2   215645989 215645989  synonymous_variant
   refsnp_id ensembl_peptide_allele
1  rs2070093                      H
2  rs2070096                      T
3  rs2229571                      R
4 rs13389324                      V
5 rs13389415                      I
6 rs28997574                      G
> table(z$consequence_type_tv)

stop_retained_variant    synonymous_variant 
                    5                   422

