> library(biomaRt)
> mart <- useEnsembl("plants_mart", "osativa_eg_gene",host = "plants.ensembl.org")
> z <- getGEO("GSE3053")[[1]]
<stuff happens>
> z
ExpressionSet (storageMode: lockedEnvironment)
assayData: 57381 features, 11 samples
element names: exprs
protocolData: none
phenoData
sampleNames: GSM67052 GSM67053 ... GSM67062 (11 total)
varLabels: title geo_accession ... data_row_count (31 total)
varMetadata: labelDescription
featureData
featureNames: AFFX-BioB-3_at AFFX-BioB-5_at ... RPTR-Os-XXU09476-1_at
(57381 total)
fvarLabels: ID GB_ACC ... Gene Ontology Molecular Function (16 total)
fvarMetadata: Column Description labelDescription
experimentData: use 'experimentData(object)'
pubMedIds: 16183841
Annotation: GPL2025
## get data from biomaRt
> annot <- getBM(c("affy_rice","ensembl_gene_id","entrezgene","external_gene_name"), "affy_rice",featureNames(z), mart)
## re-order to match ExpressionSet
> annot2 <- data.frame(PROBEID = featureNames(z), annot[match(featureNames(z), annot[,1]),])
> annot2[290:300,]
PROBEID affy_rice ensembl_gene_id entrezgene
365 Os.10159.1.S1_at Os.10159.1.S1_at Os03g0844100 4334756
109 Os.10160.1.S1_at Os.10160.1.S1_at Os05g0514300 4339307
62 Os.10162.1.S1_at Os.10162.1.S1_at Os01g0384800 4325431
26 Os.10164.1.S1_at Os.10164.1.S1_at Os09g0527900 4347644
283 Os.10166.1.S1_at Os.10166.1.S1_at Os04g0494100 4336265
138 Os.10167.1.S1_at Os.10167.1.S1_at Os02g0779500 NA
140 Os.10167.1.S1_s_at Os.10167.1.S1_s_at Os02g0779500 NA
139 Os.10167.1.S1_x_at Os.10167.1.S1_x_at Os02g0779500 NA
28 Os.10168.1.S1_at Os.10168.1.S1_at Os12g0438000 4352128
208 Os.10169.1.S1_at Os.10169.1.S1_at Os06g0647400 4341667
213 Os.10171.1.S1_at Os.10171.1.S1_at Os03g0103100 4331301
external_gene_name
365 RECEPTOR-LIKE CYTOPLASMIC KINASE 123
109 tubby-like protein 10, tubby-like protein 9, F-box protein 267
62
26 B-box-containing protein 29, DOUBLE B-BOX zinc finger gene 1, DOUBLE B-BOX 1
283 CHITINASE 5
138
140
139
28
208 Lysosomal Pro-x Carboxypeptidase 2, LYSOSOMAL PRO-X CARBOXYPEPTIDASE 2
213 hybrid proline- or glycine-rich protein 3
## extract existing fData and swap new in
> fd <- fData(z)
> fData(z) <- annot2
And do note that there are useful data in the fData
slot to begin with:
> fd[290:300,8]
[1] "AK067164.1" "AK061747.1" "AK119688.1" "AK122172.1" "AB096140.1"
[6] "AK063877.1" "AK063877.1" "AK063877.1" "AK071511.1" "AK068457.1"
[11] "AY466108.1"
Which may be useful
> library(AnnotationHub)
> hub <- AnnotationHub()
> query(hub, c("sativa","OrgDb"))
title
AH66184 | org.Camelina_sativa.eg.sqlite
AH66238 | org.Lactuca_sativa.eg.sqlite
AH66306 | org.Oryza_sativa_(japonica_cultivar-group).eg.sqlite
AH66307 | org.Oryza_sativa_Japonica_Group.eg.sqlite
AH66308 | org.Oryza_sativa_subsp._japonica.eg.sqlite
> zz <- hub[["AH66307"]]
> ids <- fd[,8]
> annot3 <- select(zz, ids, c("ENTREZID","SYMBOL","GENENAME"), "ACCNUM")
'select()' returned many:1 mapping between keys and columns
> annot3[290:300,]
ACCNUM ENTREZID SYMBOL GENENAME
290 AK067164.1 4334756 LOC4334756 PTI1-like tyrosine-protein kinase 1
291 AK061747.1 4339307 LOC4339307 tubby-like F-box protein 9
292 AK119688.1 4325431 LOC4325431 uncharacterized LOC4325431
293 AK122172.1 4347644 LOC4347644 B-box zinc finger protein 18
294 AB096140.1 4336265 LOC4336265 chitinase 5-like
295 AK063877.1 <NA> <NA> <NA>
296 AK063877.1 <NA> <NA> <NA>
297 AK063877.1 <NA> <NA> <NA>
298 AK071511.1 4352128 LOC4352128 probable histone H2A.7
299 AK068457.1 4341667 LOC4341667 lysosomal Pro-X carboxypeptidase
300 AY466108.1 4331301 LOC4331301 cortical cell-delineating protein
And do note that putting the annotation into the fData
slot ensures that limma will use those data when generating topTable
output.