I have a list of sequences and want to identify gRNAs, and their CFD and on-target efficacy using offTargetAnalysis(). However, I am facing an error.
script:
library(CRISPRseek) # 1.34
library(BSgenome.Hsapiens.UCSC.hg19)
library(TxDb.Hsapiens.UCSC.hg19.knownGene)
library(org.Hs.eg.db)
offTargetAnalysis(inputFilePath = "test.fasta",
format = "fasta",
header = FALSE,
exportAllgRNAs = "fasta",
findgRNAs = TRUE,
findgRNAsWithREcutOnly = FALSE,
findPairedgRNAOnly = FALSE,
annotatePaired = FALSE,
annotateExon = TRUE,
scoring.method = "CFDscore",
min.score = 0,
topN = 100,
topN.OfftargetTotalScore = 10,
calculategRNAefficacyForOfftargets = T,
PAM = "NGG",
PAM.pattern = "NNG$|NGN$",
PAM.location = "3prime",
allowed.mismatch.PAM = 1,
PAM.size = 3,
gRNA.size = 20,
baseBeforegRNA = 4,
baseAfterPAM = 3,
rule.set = "Root_RuleSet2_2016",
chromToSearch = c("chr21"),
max.mismatch = 4,
BSgenomeName = BSgenome.Hsapiens.UCSC.hg19,
txdb = TxDb.Hsapiens.UCSC.hg19.knownGene,
orgAnn = org.Hs.egSYMBOL,
enable.multicore = TRUE,
n.cores.max = 1,
outputDir = ".",
overwrite = T)
and the input "test.fasta" contains 2 fake 100bp sequences. The first one does not contain any PAM, while the second one contains just 1 PAM so that CRISPRseek identify just 1 sgRNA there:
>KLHL17_chr1_896281_896380
ACTGTTGATGTCTTGACTCATGTGCTGAGCTGTGTCTGAACTGAGTATGTTACACAAACGCGACACGCGCGAACATGACGCGACTAACGCTGCTGTAACG
>KLHL17_chr1_896331_896430
TACACAAACGCGACACGCGCGAACAAGACGCGACTAACGCTGCAGTAACGAGAAAGCAGCTAAAGACGGAGAAGAGCTGAGCTCGTAGAAGCGACAAGAA
log:
> library(CRISPRseek) # v. 1.34.0
Loading required package: BiocGenerics
Loading required package: parallel
Attaching package: ‘BiocGenerics’
The following objects are masked from ‘package:parallel’:
clusterApply, clusterApplyLB, clusterCall, clusterEvalQ, clusterExport, clusterMap, parApply,
parCapply, parLapply, parLapplyLB, parRapply, parSapply, parSapplyLB
The following objects are masked from ‘package:stats’:
IQR, mad, sd, var, xtabs
The following objects are masked from ‘package:base’:
anyDuplicated, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval,
evalq, Filter, Find, get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget, order,
paste, pmax, pmax.int, pmin, pmin.int, Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
table, tapply, union, unique, unsplit, which, which.max, which.min
Loading required package: Biostrings
Loading required package: S4Vectors
Loading required package: stats4
Attaching package: ‘S4Vectors’
The following object is masked from ‘package:base’:
expand.grid
Loading required package: IRanges
Loading required package: XVector
Attaching package: ‘Biostrings’
The following object is masked from ‘package:base’:
strsplit
> library(BSgenome.Hsapiens.UCSC.hg19)
Loading required package: BSgenome
Loading required package: GenomeInfoDb
Loading required package: GenomicRanges
Loading required package: rtracklayer
> library(TxDb.Hsapiens.UCSC.hg19.knownGene)
Loading required package: GenomicFeatures
Loading required package: AnnotationDbi
Loading required package: Biobase
Welcome to Bioconductor
Vignettes contain introductory material; view with 'browseVignettes()'. To cite Bioconductor, see
'citation("Biobase")', and for packages 'citation("pkgname")'.
> library(org.Hs.eg.db)
>
> offTargetAnalysis(inputFilePath = "test.fasta",
+ format = "fasta",
+ header = FALSE,
+ exportAllgRNAs = "fasta",
+ findgRNAs = TRUE,
+ findgRNAsWithREcutOnly = FALSE,
+ findPairedgRNAOnly = FALSE,
+ annotatePaired = FALSE,
+ annotateExon = TRUE,
+ scoring.method = "CFDscore",
+ min.score = 0,
+ topN = 100,
+ topN.OfftargetTotalScore = 10,
+ calculategRNAefficacyForOfftargets = T,
+ PAM = "NGG",
+ PAM.pattern = "NNG$|NGN$",
+ PAM.location = "3prime",
+ allowed.mismatch.PAM = 1,
+ PAM.size = 3,
+ gRNA.size = 20,
+ baseBeforegRNA = 4,
+ baseAfterPAM = 3,
+ rule.set = "Root_RuleSet2_2016",
+ chromToSearch = c("chr21"),
+ max.mismatch = 4,
+ BSgenomeName = BSgenome.Hsapiens.UCSC.hg19,
+ txdb = TxDb.Hsapiens.UCSC.hg19.knownGene,
+ orgAnn = org.Hs.egSYMBOL,
+ enable.multicore = TRUE,
+ n.cores.max = 1,
+ outputDir = ".",
+ overwrite = T)
Validating input ...
Searching for gRNAs ...
No gRNAs found in the input sequence KLHL17_chr1_896281_896380>>> Finding all hits in sequence chr21 ...
>>> DONE searching
Building feature vectors for scoring ...
Calculating scores ...
Annotating, filtering and generating reports ...
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Calculates on-target scores for sgRNAs with NGG PAM only.
Error in data.frame(..., check.names = FALSE) :
arguments imply differing number of rows: 34, 4
And it only outputs the sgRNA found:
>KLHL17_chr1_896331_896430_gR63f
AACGAGAAAGCAGCTAAAGACGG
and OfftargetAnalysis.xls (with no efficacy score)
name gRNAPlusPAM OffTargetSequence score n.mismatch mismatch.distance2PAM alignment NGG forViewInUCSC strand chrom chromStart chromEnd
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AATGAGCCAGCAGCTAAAAAAGG 0.092076 4 18,14,13,2 ..T...CC..........A. 1 chr21:25307616-25307638 - chr21 25307616 25307638
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACGAAAAAGCAGCTATACACGG 0.05977 3 15,4,2 .....A..........T.C. 1 chr21:39939357-39939379 + chr21 39939357 39939379
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGGAGAAAGAAACAAAAGAGAG 0.051957 4 18,10,8,6 ..G.......A.A.A..... 0 chr21:39091970-39091992 - chr21 39091970 39091992
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGCAAGCATCTAGAGAAGG 0.020074 4 18,14,8,4 ..A...C.....T...G... 1 chr21:41140041-41140063 + chr21 41140041 41140063
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGAAGCTAAGTAGCG 0.018571 4 18,10,3,2 ..A.......A......GT. 0 chr21:18242330-18242352 + chr21 18242330 18242352
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGGAGAAAGCAGCTAACTAAAG 0.016461 3 18,3,2 ..G..............CT. 0 chr21:14410985-14411007 - chr21 14410985 14411007
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGGAGAATGCAGATAATGACAG 0.013611 4 18,12,7,3 ..G.....T....A...T.. 0 chr21:18794777-18794799 + chr21 18794777 18794799
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACAAGAAAGAAGATAAAGGAGA 0.012546 4 17,10,7,1 ...A......A..A.....G 0 chr21:46790270-46790292 - chr21 46790270 46790292
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGAAGAAAGCAACAAAAGACTG 0.00937 4 18,17,8,6 ..GA........A.A..... 0 chr21:9769132-9769154 + chr21 9769132 9769154
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG TAAGAGAAAGCAGCAGAAGAAGA 0.006701 4 20,18,6,5 T.A...........AG.... 0 chr21:23268009-23268031 - chr21 23268009 23268031
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG CATGAGAAAACAGCCAAAGAGTG 0.005844 4 20,18,11,6 C.T......A....C..... 0 chr21:43529622-43529644 - chr21 43529622 43529644
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACAATAAAGCAGATAAAAAATG 0.005844 4 17,15,7,2 ...A.T.......A....A. 0 chr21:15705064-15705086 - chr21 15705064 15705086
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AATGAGAAAGAAGCAAAAGCGGA 0.004711 4 18,10,6,1 ..T.......A...A....C 0 chr21:31861017-31861039 + chr21 31861017 31861039
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGCAGTAAGCAGCAAAAGATGA 0.004656 4 18,17,14,6 ..GC..T.......A..... 0 chr21:24682610-24682632 - chr21 24682610 24682632
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGGAGAAAGTAGTAAAAGAGGA 0.004536 4 18,10,7,6 ..G.......T..TA..... 0 chr21:14793644-14793666 - chr21 14793644 14793666
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACAAGAAAAGAGCTAAAAAAGC 0.003333 4 17,11,10,2 ...A.....AG.......A. 0 chr21:42374829-42374851 - chr21 42374829 42374851
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGGCGAAAGCAGCTAATTACTG 0.003247 4 18,16,3,2 ..G.C............TT. 0 chr21:34287796-34287818 + chr21 34287796 34287818
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG ATCCAGAAAGGAGCTAAACAGGA 0.002996 4 19,17,10,2 .T.C......G.......C. 0 chr21:24218022-24218044 - chr21 24218022 24218044
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGAAGAGAAAGAAAG 0.002949 4 18,10,7,6 ..A.......A..AG..... 0 chr21:19738033-19738055 - chr21 19738033 19738055
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGCAGATGGAGACAG 0.002669 4 18,7,5,4 ..A..........A.GG... 0 chr21:36289576-36289598 + chr21 36289576 36289598
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACGAGGAAGCAGAGACAGAAGG 0.00218 4 14,7,6,4 ......G......AG.C... 1 chr21:47533895-47533917 - chr21 47533895 47533917
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACCAAAAAAGAGCTAAAGATGT 0.001992 4 17,15,11,10 ...C.A...AG......... 0 chr21:17712259-17712281 - chr21 17712259 17712281
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAGGAGAAAATAGCAAAAGATGC 0.001847 4 18,11,10,6 ..G......AT...A..... 0 chr21:24503199-24503221 - chr21 24503199 24503221
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG GACTAGAAACCAGCTAGAGATGA 0.001783 4 20,17,11,4 G..T.....C......G... 0 chr21:31991329-31991351 - chr21 31991329 31991351
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGCATCAAAATATGT 0.001619 4 18,8,6,2 ..A.........T.A...T. 0 chr21:28564510-28564532 + chr21 28564510 28564532
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACGCAAAAGCAGCTATAGCTGA 0.001052 4 16,15,4,1 ....CA..........T..C 0 chr21:17150758-17150780 + chr21 17150758 17150780
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AACAAGAAACCAGCTAGAGATGT 0.001025 3 17,11,4 ...A.....C......G... 0 chr21:46170832-46170854 - chr21 46170832 46170854
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGAAGAGAAAGAAGA 0.00079 4 18,10,7,6 ..A.......A..AG..... 0 chr21:32995118-32995140 - chr21 32995118 32995140
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG GACGAGGAAGCAGCCAGAGAGGC 0.000755 4 20,14,6,4 G.....G.......C.G... 0 chr21:27281134-27281156 + chr21 27281134 27281156
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGCAGAAAGAGAAGT 0.0005 4 18,7,6,4 ..A..........AA.G... 0 chr21:14756568-14756590 - chr21 14756568 14756590
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAACAGAAAGCAGCTGGAGAGGC 0.000346 4 18,17,5,4 ..AC...........GG... 0 chr21:40940321-40940343 - chr21 40940321 40940343
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG ATCGAGAAAGAAGGCAAAGACCG 0 4 19,10,7,6 .T........A..GC..... 0 chr21:14733282-14733304 + chr21 14733282 14733304
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AAAGAGAAAGCAATTTAAGAGTG 0 4 18,8,7,5 ..A.........AT.T.... 0 chr21:26276443-26276465 + chr21 26276443 26276465
KLHL17_chr1_896331_896430_gR63f AACGAGAAAGCAGCTAAAGANGG AATGAGCAAGCAGCTCAGGACTG 0 4 18,14,5,3 ..T...C........C.G.. 0 chr21:44004290-44004312 - chr21 44004290 44004312
But it does not create a Summary.xls.
I've noticed that there are 34 offtargets in OfftargetAnalysis.xls, while the log stops after printing 30 "Calculates on-target scores for sgRNAs with NGG PAM only.". The error is "differing number of rows: 34, 4", so maybe that is some hint.
I'm using python 2.7.5. Both for R 3.6.0 and R 4.1.1 the same error occurs.
Second question below
Also, if I choose to not calculate the on-target efficacy for offtarget sequences, but still calculate it for the actual gRNAs (I think this is what the following command does), i.e.
and even if test.fasta consists on a real sequence with potential gRNAs:
it identifies the gRNAs, and creates both the OfftargetAnalysis.xls and Summary.xls with no error returned, but the on.target info is empty:
and the Summary.xls does not have the efficacy score (or efficiency) column for the found gRNAs, and it even says "perfect match not found" for all of them, while they all come from a real sequence:
Thanks a lot for your help,
Miguel