Entering edit mode
miyouduniu
•
0
@miyouduniu-17211
Last seen 5.7 years ago
Hi, I am trying to convert some VCFs to GDS and convert them back, but had some errors. Could you please get me some suggestions? Thanks a lot!
library(SeqArray)
library(VariantAnnotation)
## both vcfs have 5 metadata columns
vcf1 <- readVcf('sample1.vcf','hg38')
vcf1
# class: CollapsedVCF
# dim: 950 1
# rowRanges(vcf):
# GRanges with 5 metadata columns: paramRangeID, REF, ALT, QUAL, FILTER
vcf2 <- readVcf('sample2.vcf','hg38')
vcf2
# class: CollapsedVCF
# dim: 950 1
# rowRanges(vcf):
# GRanges with 5 metadata columns: paramRangeID, REF, ALT, QUAL, FILTER
## convert to gds
seqVCF2GDS("sample1.vcf", "sample1.gds", storage.option="ZIP_RA")
seqVCF2GDS("sample2.vcf", "sample2.gds", storage.option="ZIP_RA")
## after converting, can not convert back to vcf
gds1 <- seqOpen("sample1.gds")
seqAsVCF(gds1)
# Error in dimnames(v) <- listsample.id, variant.id) :
# length of 'dimnames' [1] not equal to array extent
gds2 <- seqOpen("sample2.gds")
seqAsVCF(gds2)
# Error in dimnames(v) <- listsample.id, variant.id) :
# length of 'dimnames' [1] not equal to array extent
## can somehow get around this problem by subsetting info and fmt, but now the rowRanges have duplicated metadata columns
seqExport(gds1, "slim_sample1.gds", info.var=c("SVTYPE","SVLEN","END"), fmt.var=c("DP","GQ","AO","AP"))
seqClose(gds1)
slim1 <- seqOpen("slim_sample1.gds")
seqAsVCF(slim1)
# class: CollapsedVCF
# dim: 950 1
# rowRanges(vcf):
# GRanges with 9 metadata columns: ID, REF, ALT, QUAL, FILTER, REF, ALT, QUAL, FILTER
seqExport(gds2, "slim_sample2.gds", info.var=c("SVTYPE","SVLEN","END"), fmt.var=c("DP","GQ","AO","AP"))
seqClose(gds2)
slim2 <- seqOpen("slim_sample2.gds")
seqAsVCF(slim2)
# class: CollapsedVCF
# dim: 950 1
# rowRanges(vcf):
# GRanges with 9 metadata columns: ID, REF, ALT, QUAL, FILTER, REF, ALT, QUAL, FILTER
seqClose(slim1)
seqClose(slim2)
## can merge the two gds, but can not convert the merged to vcf
seqMerge(c("slim_sample1.gds", "slim_sample2.gds"), "merged.gds", storage.option="ZIP_RA")
merged <- seqOpen("merged.gds")
seqAsVCF(merged)
# Error in .Call2("solve_user_SEW0", start, end, width, PACKAGE = "IRanges") :
# solving row 1: range cannot be determined from the supplied arguments (too many NAs)
sessionInfo()
# R version 3.4.3 (2017-11-30)
# Platform: x86_64-pc-linux-gnu (64-bit)
# Running under: Red Hat Enterprise Linux Server release 6.6 (Santiago)
# Matrix products: default
# BLAS: /gnet/is2/p01/apps/R/3.4.3-20171201-stablerc/x86_64-linux-2.6-rhel6/lib64/R/lib/libRblas.so
# LAPACK: /gnet/is2/p01/apps/R/3.4.3-20171201-stablerc/x86_64-linux-2.6-rhel6/lib64/R/lib/libRlapack.so
# locale:
# [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
# [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
# [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
# [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
# [9] LC_ADDRESS=C LC_TELEPHONE=C
# [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
# attached base packages:
# [1] stats4 parallel stats graphics grDevices utils datasets
# [8] methods base
# other attached packages:
# [1] SeqArray_1.18.2 gdsfmt_1.14.1
# [3] VariantAnnotation_1.24.5 Rsamtools_1.30.0
# [5] Biostrings_2.46.0 XVector_0.18.0
# [7] SummarizedExperiment_1.8.1 DelayedArray_0.4.1
# [9] matrixStats_0.53.0 Biobase_2.38.0
# [11] GenomicRanges_1.30.1 GenomeInfoDb_1.14.0
# [13] IRanges_2.12.0 S4Vectors_0.16.0
# [15] BiocGenerics_0.24.0
A few comments/questions:
readVcf
)?