Hi all!
I'm new with R and amd currently working on data from ZebGene-1_0-st arrays. However I am having problem doing the annotations as firstly there is no package in bioconductor and secondly, the sample workflow that I found for the array does not yield a true sanity check/identical. I realised that the workflow below does not extract and reorder to match my probes. Any advice to overcome this problem helps! Thank you in advance :)
Workflow:
# Import the annotations dat <- read.csv(file.path(metaDir, "ZebGene-1_0-st-v1.na33.3.zv9.transcript.csv"), comment.char = "#", stringsAsFactors=FALSE, na.string = "---") dat <- col2rownames(dat, "probeset_id") #extract and reorder to match the array features dat <- dat[row.names(fData(affyNorm.batch)),] dat <- dat[,c("probeset_id", "seqname", "strand", "start", "stop", "gene_assignment", "mrna_assignment")] dat <- as.matrix(dat) # parse mrna_assignments headercol <- "mrna_assignment" mrnas <- t(sapply(strsplit(dat[, headercol], " /// "), function(x) { dat.probe.df <- do.call(rbind, strsplit(x, " // ")) bestrna <- dat.probe.df[1,1] rnas <- paste(dat.probe.df[,1], collapse=",") c(bestrna, rnas) })) mrnas <- as.data.frame(mrnas) names(mrnas) <- c("best.mrna", "mrnas") # parse gene assignments headercol <- "gene_assignment" genes <- t(sapply(strsplit(dat[, headercol], " /// "), function(x) { if(is.na(x[1])){ out <- rep("NA", 6) } else { dat.probe.mat <- as.matrix(do.call(rbind, strsplit(x, " // "))) bestgene <- as.character(dat.probe.mat[1,1]) dat.probe.vec <- apply(dat.probe.mat, 2, function(y) { paste(unique(y), collapse=",") }) out <- as.character(c(bestgene,dat.probe.vec)) } return(out) })) genes <- as.data.frame(genes[,c(1,2,3,4,6)]) names(genes) <- c("bestgene", "accessions", "symbols", "descriptions", "entrezIDs") genes <- rownames2col(genes, "probeids") #combo mrna and gene assigments gene.annots <- cbind(genes, mrnas)