--Drey_of_Squirrels_842_000
Content-Type: TEXT/plain; charset=us-ascii
Content-MD5: fRwXNAsOzYzHtK+veM9YRw==
I have fixed the bug in the script
(AnnBuilder/inst/scripts/buildDPkg.R) and the
revised version of the script is attached. Make sure pkgPath is a
valid existing
directory.
A bug in makeGOByNum.R was also fixed and the change will be reflected
by
tomorrow.
>From: Reinhold Koch <reinhold.koch@unibas.ch>
>To: bioconductor <bioconductor@stat.math.ethz.ch>
>MIME-version: 1.0
>Content-transfer-encoding: 7bit
>X-Virus-Scanned: by amavisd-milter (
http://amavis.org/)
>X-Virus-Scanned: by amavisd-milter (
http://amavis.org/)
>X-Spam-Status: No, hits=0.5 required=5.0
tests=SPAM_PHRASE_00_01,TO_LOCALPART_EQ_REAL version=2.43
>X-Spam-Level:
>Subject: [BioC] AnnBuilder script buildDPkg.R
>X-BeenThere: bioconductor@stat.math.ethz.ch
>X-Mailman-Version: 2.0.13
>List-Help: <mailto:bioconductor- request@stat.math.ethz.ch?subject="help">
>List-Post: <mailto:bioconductor@stat.math.ethz.ch>
>List-Subscribe:
<http: www.stat.math.ethz.ch="" mailman="" listinfo="" bioconductor="">,
<mailto:bioconductor-request@stat.math.ethz.ch?subject=subscribe>
>List-Id: The Bioconductor Project Mailing List
<bioconductor.stat.math.ethz.ch>
>List-Unsubscribe:
<http: www.stat.math.ethz.ch="" mailman="" listinfo="" bioconductor="">,
<mailto:bioconductor-request@stat.math.ethz.ch?subject=unsubscribe>
>List-Archive: <http: www.stat.math.ethz.ch="" pipermail="" bioconductor=""/>
>Date: Tue, 21 Jan 2003 17:08:52 +0100
>
>Hello,
>
>I tried to update the annotations for Affymetrix' rgu34a chip using
the
>buildDPkg.R. After some modifications I get as far as the flat file
>"rgu34aLL", but then fileToXML bails out with
>
>"Error in lapply(fileRead[, i], doMultValue, multSep, i) :
> subscript out of bounds
>Execution halted"
>
>Has anybody a version around that performs better?
>
>Cheers
>Reinhold Koch
>
>_______________________________________________
>Bioconductor mailing list
>Bioconductor@stat.math.ethz.ch
>
http://www.stat.math.ethz.ch/mailman/listinfo/bioconductor
--Drey_of_Squirrels_842_000
Content-Type: TEXT/plain; name="buildDPkg.R"; charset=us-ascii; x
-unix-mode=0640
Content-Description: buildDPkg.R
Content-MD5: DF87skt6NzDOsWm6gjzsPA==
# A script to build annotation data packages.
# pkgName - a character string for the name of the data package to be
# built (e. g. hgu95a, rgu34a)
# organism - a character string for the name of the organism of
# concern (now can only be "human", "mouse", or "rat")
# pkgPath - a character string for the full path of an existing
# directory where the built backage will be stored
# version - a character string for the version number
#
# Write to jzhang@jimmy.harvard.edu for questions about this script.
buildDPkg <- function(pkgName, organism = "human",
pkgPath = "temp", version = "1.1.0"){
library(AnnBuilder)
path <- .path.package("AnnBuilder")
# Download data files from sources
llSource <- fileFetcher(getSrcUrl(src = "ll", organism = organism))
ugSource <- fileFetcher(getSrcUrl(src = "ug", organism = organism))
if(tolower(organism) == "human" || tolower(organism) == "mouse"){
linkSource <- fileFetcher( paste(getSrcUrl(src = "UCSC", organism
= organism),
"refLink.txt.gz", sep = ""))
geneSource <- fileFetcher(paste(getSrcUrl(src = "UCSC", organism =
organism),
"refGene.txt.gz", sep = ""))
}
goSource <- fileFetcher(getSrcUrl(src = "go", organism = organism))
# Get the unified mapping between a given id to LocusLink id
llParser <- file.path(path, "data", "llAffyLLParser")
ugParser <- file.path(path, "data", "ugAffyLLParser")
umichMapping <- file.path(path, "data", paste(pkgName, "_UMich", sep =
""))
dchipMapping <- file.path(path, "data", paste(pkgName, "_Cheng", sep =
""))
toMap <- list(c(llSource, llParser), c(ugSource, ugParser),
c(umichMapping, NA), c(dchipMapping, NA))
baseF <- file.path(path, "data", paste(pkgName, "id", sep = ""))
unified <- file.path(path, "temp", paste(pkgName, "unified", sep =
""))
unifier(baseFile = baseF, sources = toMap, trusted = 0, sep = "\t",
header = FALSE, baseCols = c("id", "acc"), byID =
"id",
asFile = TRUE, outName = unified, verbose = TRUE)
# Parse the LocusLink data file using the unified mapping and a base
file
ll <- file.path(path, "temp", paste(pkgName, "LL", sep = ""))
llParser <- file.path(path, "data", "llParser")
fileMuncher(outName = ll, baseFile = unified, dataFile =
llSource, parser = llParser, isDir = FALSE)
# Parse chromosomal location and orientation data
if(tolower(organism) == "human" || tolower(organism) == "mouse"){
testLink <- file.path(path, "temp","buildLink")
linkParser <- file.path(path, "data", "refLinkParser")
fileMuncher(outName = testLink, baseFile = unified, dataFile =
linkSource, parser = linkParser, isDir = FALSE)
testGene <- file.path(path, "temp", "buildGene")
geneParser <- file.path(path, "data", "refGeneParser")
fileMuncher(outName = testGene, baseFile = testLink, dataFile =
geneSource, parser = geneParser, isDir = FALSE)
## Some gene location data may end up with no chromosome number if
## only chromosome number data from LocusLink are used. Get the
## from the refGene file also.
geneChr <- file.path(path, "temp", "geneChr")
geneParser <- file.path(path, "data", "refGeneChrParser")
fileMuncher(outName = geneChr, baseFile = testLink, dataFile =
geneSource, parser = geneParser, isDir = FALSE)
# merge the two parsed files
f1Col <- c("affy", "acc", "locusid", "unigeneid", "name",
"symbol",
"chrom", "cyto", "pmid", "grif","sumfun", "go")
f2Col <- c("affy", "chrlocation", "chrorientation")
testmerged <- file.path(path, "temp", paste(pkgName, "Merged", sep
= ""))
mergeFiles(file1 = ll, file2 = testGene, file1Col = f1Col,
file2Col = f2Col, idCol = "affy", outName = testmerged,
sep = "\t", header = FALSE, isFile = TRUE)
}else{
testmerged <- ll
}
pathData <- file.path(path, "temp", "humanPath")
getPathway(file.path(path, "temp", "humanPath"), organism = organism)
mappedPath <- file.path(path, "temp", "buildPath")
mapPathway(mappedPath, geneFile = unified, pathFile = pathData,
geneColNames = c("affy", "acc", "ll", "counts"), pathColNames =
c("ll", "path","enzyme"), mapId = "ll", colToKeep = c("affy", "path",
"enzyme"),
geneSep = "\t", pathSep = "\t")
# Merge pathway data with previously parsed file
if(tolower(organism) == "human" || tolower(organism) == "mouse"){
f1Col <- c("affy", "acc", "locusid", "unigeneid", "name",
"symbol","chrom", "cyto", "pmid", "grif","sumfun",
"go",
"chrolocation","chroorientation")
}else{
f1Col <- c("affy", "acc", "locusid", "unigeneid", "name",
"symbol",
"chrom", "cyto", "pmid", "grif","sumfun", "go")
}
f2Col <- c("affy", "path", "enzyme")
mergeFiles(file1 = testmerged, file2 = mappedPath, file1Col = f1Col,
file2Col = f2Col, idCol = "affy", outName = testmerged,
sep = "\t", header = FALSE, isFile = TRUE)
# Produce the XM file (testXML1) containing gene information
testXML1 <- file.path(path, "temp", paste(pkgName, ".xml", sep = ""))
if(tolower(organism) == "human" || tolower(organism) == "mouse")
colNames <- c("AFFY", "ACCNUM", "LOCUSID", "UNIGENE", "GENENAME",
"SYMBOL","CHR", "MAP", "PMID", "GRIF", "SUMFUNC",
"GO", "CHRLOC", "CHRORI", "PATH", "ENZYME")
else
colNames <- c("AFFY", "ACCNUM", "LOCUSID", "UNIGENE", "GENENAME",
"SYMBOL","CHR", "MAP", "PMID", "GRIF", "SUMFUNC",
"GO", "PATH", "ENZYME")
if(tolower(organism) == "human" || tolower(organism) == "mouse"){
multC <- c("PMID", "GO", "CHRLOC", "CHRORI", "PATH", "ENZYME",
"CHR","MAP")
}else{
multC <- c("PMID", "GO", "PATH", "ENZYME", "CHR","MAP")
}
typeC <- c("GENENAME", "SYMBOL")
multS <- ";"
typeS <- ";"
fileToXML(targetName = pkgName, outName = testXML1, inName
=testmerged,
idColName = "AFFY", multColNames = multC, typeColNames = typeC,
colNames = colNames)
xml2DataPkg(fileName = testXML1, pkgName = pkgName, path = pkgPath,
organism = organism)
# Get the reverse mapping between PubMed, pathway, enzyme and Affy ids
# pmid <- file2Env(testmerged, colNames, keyColName = "PMID",
# valueColName = "AFFY")
pathway <- file2Env(testmerged, colNames, keyColName =
"PATH", valueColName = "AFFY")
enzyme <- file2Env(testmerged, colNames, keyColName =
"ENZYME", valueColName = "AFFY")
# assign(paste(pkgName, "PMID2AFFY", sep = ""), pmid, parent.frame())
assign(paste(pkgName, "PATH2AFFY", sep = ""), pathway, parent.frame())
assign(paste(pkgName, "ENZYME2AFFY", sep = ""), enzyme,
parent.frame())
# makeRdaFile("PMID2AFFY", pkgName, pkgPath, envir = parent.frame())
makeRdaFile("PATH2AFFY", pkgName, pkgPath, envir = parent.frame())
makeRdaFile("ENZYME2AFFY", pkgName, pkgPath, envir = parent.frame())
# Process the GO data file
GOXMLParser(dbName = "annbuilder", tName = "abtestgoorig", fileName =
goSource)
goNAffy <- file.path(path, "temp", "testgoNaffy")
goParser <- file.path(path, "data", "affyGOParser")
fileMuncher(outName = goNAffy, baseFile=unified, dataFile =
llSource, parser = goParser, isDir = FALSE)
go2GeneMapper(dbName = "annbuilder", tName = "abtestgo", fileName =
goNAffy,
goTName = "abtestgoorig", exclude = "GO:0003673")
testXMLByNum <- file.path(path, "temp", paste(pkgName, "ByNum.xml",
sep = ""))
makeGOByNum(dbName = "annbuilder", tName = "abtestgo", outName =
testXMLByNum,tColNames = c("go",
"geneid","genes","total"), fNames =
c("GO","GO2AFFY", "GO2ALLAFFY", "AFFYCOUNTS"),multCol =
c("GO2AFFY","GO2ALLAFFY"))
xml2DataPkg(fileName = testXMLByNum, pkgName = pkgName, path =
pkgPath, organism = organism, rdaOnly = TRUE)
--Drey_of_Squirrels_842_000--