Entering edit mode
Seth Falcon
★
7.4k
@seth-falcon-992
Last seen 10.2 years ago
## Count probes mapping to top-node offspring
##
## probes - a character vector of probe IDs
##
## probeType - the annotation package type (no trailing ".db") of the
## probe IDs
##
## ontology - one of "all", "BP", "CC", or "MF".
##
## topNodes - a list named by the GO ontologies ("BP", "CC", "MF")
where
## each element is a character vector of GO IDs to be used as a top-
node
## for that ontology. If not specified, a default set of top-nodes
are
## used consisting of the top-level ontology nodes and their direct
## children.
##
## The return value is a data.frame with columns GO, Count, and
## Ontology. Each row corresponds to a GO top-node with at least one
## probe ID having a mapping to one of the top-nodes offspring.
##
goCompare <- function(probes, probeType,
ontology = c("all", "BP", "CC", "MF"),
topNodes = defaultTopNodes())
{
counters <- makeTopNodeCounters(topNodes)
probeToGOIDs <- probeToGO(probes, probeType, ontology)
for (goids in probeToGOIDs) {
countTopNodes(goids, counters)
}
dfs <- lapply(counters, function(counterEnv) {
counts <- Filter(function(x) x > 0, as.list(counterEnv))
data.frame(GO=names(counts), Count=as.integer(counts),
stringsAsFactors = FALSE)
})
ontCounts <- sapply(dfs, nrow)
ans <- do.call(rbind, dfs)
ans[["Ontology"]] <- rep(names(dfs), ontCounts)
row.names(ans) <- NULL
ans[order(ans[["Count"]], decreasing = TRUE), ]
}
## Return a list named by probes. Each element is a list named by GO
## ontology (CC, BP, MF). Each element will have 1, 2, or 3
components
## depending on how many GO ontologies the probe maps to in the
## annotation and the value of the ontology argument.
##
## Probe IDs that do not map to GO are removed and a warning is
issued.
## If ontology is not all, additional probes may be removed that do
not
## map to the specified GO ontology. An additional warning will be
## issued in this case.
##
probeToGO <- function(probes, probeType = "operon",
ontology = c("all", "BP", "CC", "MF"))
{
if (probeType == "operon") {
stop("operon is not supperted")
}
ontology <- match.arg(ontology)
pkg <- paste(probeType, ".db", sep = "")
ok <- require(pkg, character.only = TRUE, quietly = TRUE)
if (!ok) stop("unable to load ", pkg)
GOenv <- get(paste(probeType, "GO", sep = "")) # specify pkg env?
godata <- mget(probes, env = GOenv)
## remove probe IDs that do not map to GO and warn
nogo <- sapply(godata, function(x) !is.list(x))
if (any(nogo)) {
warning("removing ", sum(nogo),
" probe IDs with no mapping to GO")
godata <- godata[!nogo]
}
## return a list named by probe ID containing ontology-specific
## sublists of GO IDs
ans <- lapply(godata, function(gl) {
ids <- sapply(gl, function(x) x[["GOID"]])
ont <- sapply(gl, function(x) x[["Ontology"]])
if (ontology != "all") {
ontFilter <- ont == ontology
ids <- ids[ontFilter]
ont <- ont[ontFilter]
}
split(as.character(ids), as.character(ont))
})
if (ontology != "all") {
## check for probes that don't map in the selected
## ontology
noids <- sapply(ans, function(x) length(x) == 0)
if (any(noids)) {
warning("removing ", sum(noids),
" probe IDs with no mapping to GO ", ontology)
ans <- ans[!noids]
}
}
ans
}
## Given a list mapping GO ontologies to GO IDs, increment top-node
counts
##
## goids - a list named by one or more GO ontologies (BP, CC, MF) with
## each element being a character vector of GO IDs.
##
## topNodeCounters - a list of environments as returned by
## makeTopNodeCounters.
##
## This function is called for its side-effect of incrementing the
## top-node counters in topNodeCounters. Working separately with each
## ontology given in goids, all ancestors for the set of GO IDs are
## determined using the ontology-specific ancestor map, for example,
## GOBPANCESTOR. For each unique GO ID that is an ancestor and a
## top-node, corresponding entry in topNodeCounters is incremented by
## one.
##
countTopNodes <- function(goids, topNodeCounters)
{
for (ont in names(goids)) {
ancestors <- get(paste("GO", ont, "ANCESTOR", sep = ""))
ids <- goids[[ont]]
counters <- topNodeCounters[[ont]]
allParents <- unique(unlist(mget(ids, ancestors)))
for (p in allParents) {
v <- counters[[p]]
if (!is.null(v)) counters[[p]] <- v + 1L
}
}
}
## return a three element list named by GO ontology
## each element is an environment keyed by top-node GO ID
## initialized to zero count.
##
## The list of environments is used for counting occurances of probe
IDs
## that map to GO IDs that are offspring of the top-nodes that appear
as
## keys in the environments.
##
## nodes - a three element list named by GO ontologies BP, CC, MF.
Each
## element should be a character vector of GO IDs to be used as
## top-nodes.
##
makeTopNodeCounters <- function(nodes = defaultTopNodes())
{
ans <- structure(vector(mode = "list", length = length(nodes)),
names = names(nodes))
for (ont in names(nodes)) {
ontEnv <- new.env(parent = emptyenv(), hash = TRUE)
for (goid in nodes[[ont]]) {
ontEnv[[goid]] <- 0L
}
ans[[ont]] <- ontEnv
}
ans
}
## Return default top-nodes
##
## Default top-nodes are the three top-level ontology nodes and their
## direct children.
##
defaultTopNodes <- function()
{
mftop <- "GO:0003674"
cctop <- "GO:0005575"
bptop <- "GO:0008150"
list(MF = c(mftop, get(mftop, env = GOMFCHILDREN)),
CC = c(cctop, get(cctop, env = GOCCCHILDREN)),
BP = c(bptop, get(bptop, env = GOBPCHILDREN)))
}