Entering edit mode
marisa.e.miller
•
0
@marisaemiller-13344
Last seen 6.1 years ago
I am trying to make a custom OrgDB for use with clusterProfiler and am encountering the following error message:
> nc29_genes = read.csv("nc29.genelist.txt", sep="\t", header=TRUE) > nc29_genes$GID <- as.character(nc29_genes$GID) > nc29_go = read.csv("nc29.genes.go.txt", sep="\t", header=TRUE) > nc29_go$GID <- as.character(nc29_go$GID) > makeOrgPackage(gene_info=nc29_genes, go=nc29_go, + version="0.1", + maintainer="Some One <so@someplace.org>", + author="Some One <so@someplace.org>", + outputDir = ".", + tax_id="860303", + genus="Puccinia", + species="coronata_avenae_12NC29", + goTable="go") Populating genes table: genes table filled Populating gene_info table: Error in rsqlite_send_query(conn@ptr, statement) : near "(": syntax error In addition: Warning messages: 1: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries 2: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries 3: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries 4: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries 5: Factors converted to character 6: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries 7: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries 8: In rsqlite_fetch(res@ptr, n = n) : Don't need to call dbFetch() for statements, only for queries
Here are the input dataframes:
> head(nc29_genes) GID 1 PCANC_00210 2 PCANC_00211 3 PCANC_00212 4 PCANC_00213 5 PCANC_00214 6 PCANC_00215 > head(nc29_go) GID GO 1 PCANC_19536 GO:0006418 2 PCANC_19536 GO:0000166 3 PCANC_19536 GO:0004812 4 PCANC_19536 GO:0005524 5 PCANC_19536 GO:0005737 6 PCANC_19536 GO:0004815
Here is the traceback:
> traceback() 23: stop(list(message = "near \"(\": syntax error", call = rsqlite_send_query(conn@ptr, statement), cppstack = list(file = "", line = -1L, stack = c("1 RSQLite.so 0x000000010d29566c _ZN4Rcpp9exceptionC2EPKcb + 188", "2 RSQLite.so 0x000000010d2953e0 _ZN4Rcpp4stopERKNSt3__112basic_stringIcNS0_11char_traitsIcEENS0_9allocatorIcEEEE + 48", "3 RSQLite.so 0x000000010d2a5a04 _ZN16SqliteResultImpl22raise_sqlite_exceptionEP7sqlite3 + 68", "4 RSQLite.so 0x000000010d2a5286 _ZN16SqliteResultImplC2EP7sqlite3RKNSt3__112basic_stringIcNS2_11char_traitsIcEENS2_9allocatorIcEEEE + 454", "5 RSQLite.so 0x000000010d2a472e _ZN12SqliteResultC2ERKN5boost10shared_ptrI16SqliteConnectionEERKNSt3__112basic_stringIcNS6_11char_traitsIcEENS6_9allocatorIcEEEE + 78", "6 RSQLite.so 0x000000010d2a8bbc _Z18rsqlite_send_queryRKN4Rcpp4XPtrIN5boost10shared_ptrI16SqliteConnectionEENS_15PreserveStorageEXadL_ZNS_25standard_delete_finalizerIS4_EEvPT_EELb0EEERKNSt3__112basic_stringIcNSC_11char_traitsIcEENSC_9allocatorIcEEEE + 60", "7 RSQLite.so 0x000000010d299801 RSQLite_rsqlite_send_query + 193", this continues until line 99 but I edited it to fit length guidelines... )))) 22: .Call(RSQLite_rsqlite_send_query, con, sql) 21: rsqlite_send_query(conn@ptr, statement) 20: initialize(value, ...) 19: initialize(value, ...) 18: new("SQLiteResult", sql = statement, ptr = rsqlite_send_query(conn@ptr, statement), conn = conn) 17: .local(conn, statement, ...) 16: dbSendQuery(conn, statement, ...) 15: dbSendQuery(conn, statement, ...) 14: .local(conn, statement, ...) 13: dbGetQuery(con, sql) 12: dbGetQuery(con, sql) 11: .makeEmptySimpleTable(con, table, tableFieldLines) 10: (function (data, table, con, fieldNameLens = 25) { indFields <- c(names(data)[!(names(data) %in% "GID")], "_id") message(paste("Populating", table, "table:")) tableFieldLines <- paste(paste(names(data)[-1], " VARCHAR(", fieldNameLens, ") NOT NULL, -- data"), collapse = "\n ") if (dim(data)[1] == 0) { warning(paste("no values found for table ", table, " in this data chunk.", sep = "")) .makeEmptySimpleTable(con, table, tableFieldLines) return() } else { dbWriteTable(con, "temp", data, row.names = FALSE) .makeEmptySimpleTable(con, table, tableFieldLines) selFieldLines <- paste(paste("t.", names(data)[-1], sep = ""), collapse = ",") sql <- paste0("\n INSERT INTO ", table, "\n SELECT g._id as _id, ", selFieldLines, "\n FROM genes AS g, temp AS t\n WHERE g.GID=t.GID\n ORDER BY g._id;") dbGetQuery(con, sql) for (i in seq_len(length(indFields))) { dbGetQuery(con, paste0("CREATE INDEX IF NOT EXISTS ", table, "_", indFields[i], "_ind ON ", table, " (", indFields[i], ");")) } dbGetQuery(con, "DROP TABLE temp;") } message(paste(table, "table filled")) })(dots[[1L]][[1L]], dots[[2L]][[1L]], con = <S4 object of class "SQLiteConnection">) 9: .Method(..., FUN = FUN, MoreArgs = MoreArgs, SIMPLIFY = SIMPLIFY, USE.NAMES = USE.NAMES) 8: eval(.dotsCall, env) 7: eval(.dotsCall, env) 6: eval(.dotsCall, env) 5: standardGeneric("mapply") 4: mapply(FUN = .makeTable, data, names(data), MoreArgs = list(con = con)) 3: makeOrgDbFromDataFrames(data, tax_id, genus, species, dbFileName, goTable) 2: .makeOrgPackage(data, version = version, maintainer = maintainer, author = author, outputDir = outputDir, tax_id = tax_id, genus = genus, species = species, goTable = goTable, verbose = verbose) 1: makeOrgPackage(gene_info = nc29_genes, go = nc29_go, version = "0.1", maintainer = "Some One <so@someplace.org>", author = "Some One <so@someplace.org>", outputDir = ".", tax_id = "860303", genus = "Puccinia", species = "coronata_avenae_12NC29", goTable = "go")
And here is the sessionInfo:
R version 3.4.0 (2017-04-21) Platform: x86_64-apple-darwin15.6.0 (64-bit) Running under: macOS Sierra 10.12.5 Matrix products: default BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib locale: [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8 attached base packages: [1] stats4 parallel stats graphics grDevices utils datasets methods base other attached packages: [1] AnnotationForge_1.18.0 AnnotationDbi_1.38.1 IRanges_2.10.2 S4Vectors_0.14.3 Biobase_2.36.2 [6] BiocGenerics_0.22.0 loaded via a namespace (and not attached): [1] Rcpp_0.12.11 XML_3.98-1.9 digest_0.6.12 bitops_1.0-6 DBI_0.7 RSQLite_2.0 rlang_0.1.1 blob_1.1.0 [9] tools_3.4.0 bit64_0.9-7 RCurl_1.95-4.8 bit_1.1-12 compiler_3.4.0 pkgconfig_2.0.1 memoise_1.1.0 tibble_1.3.3
Any ideas as to what could cause the issue?
Thank you,
Marisa
I realized my issues. 1) I need a column titled "EVIDENCE" for the GO table, and 2) I put a duplicate column in the genelist table so that the dataframe now has two columns.
This is the output I saw after running makeOrgPackage and I think it shows a successful DB creation: