A priori I don't expect there to be any major differences. At one
point
I made a minor modification to how it dealt with ties. I also changed
the C code that is used when you access quantile normalization code
using normalize.quantiles() which is what
normalize.AffyBatch.quantiles() is calling. This second change was
made
so that the function would handle NA values.
rma() etc still call the older standard code, though this has also
been
adjusted slightly for the ties behavior meaning that there may be
small
differences in RMA expression values etc.
The ties behavior affects situations where the number of values tied
is
even (odd numbers of ties were and remain handled in the same manner
that they always have been). In the raw CEL file values you will often
find large numbers of ties, thus this is what I suspect is driving
your
result.
Best,
Ben
Short examples
####(using R-2.7 devel etc)
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
[,1] [,2]
[1,] 1.00 1.0
[2,] 2.00 2.0
[3,] 3.75 3.0
[4,] 3.75 3.5
[5,] 3.75 4.0
[6,] 3.75 4.5
[7,] 7.00 7.0
[8,] 8.00 8.0
>
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
[,1] [,2]
[1,] 1.0 1.0
[2,] 2.0 2.0
[3,] 3.5 3.0
[4,] 3.5 3.5
[5,] 3.5 4.0
[6,] 6.0 6.0
[7,] 7.0 7.0
[8,] 8.0 8.0
>
####(using R-2.5.0 etc)
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
[,1] [,2]
[1,] 1.0 1.0
[2,] 2.0 2.0
[3,] 3.5 3.0
[4,] 3.5 3.5
[5,] 3.5 4.0
[6,] 3.5 4.5
[7,] 7.0 7.0
[8,] 8.0 8.0
>
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
[,1] [,2]
[1,] 1.0 1.0
[2,] 2.0 2.0
[3,] 3.5 3.0
[4,] 3.5 3.5
[5,] 3.5 4.0
[6,] 6.0 6.0
[7,] 7.0 7.0
[8,] 8.0 8.0
Everything below can safely be ignored.
Code I used for testing R-2.5.X series set of code:
library(affy)
set.seed(123456789)
Data <- matrix(rnorm(10^7),ncol=10)
Data.norm <- normalize.quantiles(Data)
save(Data,Data.norm,file="old.Rda")
sessionInfo()
library(affydata)
data(Dilution)
Dilution.norm <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
Dilution.old <- Dilution
Dilution.pm.old <- pm(Dilution.old)
Dilution.pm.norm.old <- normalize.quantiles(Dilution.pm.old)
save(Dilution.norm, Dilution.old, Dilution.pm.old,
Dilution.pm.norm.old,
file="Dil-old.Rda")
Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
normalize.quantiles(Toy.matrix)
Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
normalize.quantiles(Toy.matrix2)
eset.old <- rma(Dilution)
exprvals.old <- exprs(eset.old)
save(exprvals.old,file="ev_old.Rda")
Output from this code:
> library(affy)
Loading required package: Biobase
Loading required package: tools
Welcome to Bioconductor
Vignettes contain introductory material. To view, type
'openVignette()'. To cite Bioconductor, see
'citation("Biobase")' and for packages 'citation(pkgname)'.
Loading required package: affyio
> set.seed(123456789)
> Data <- matrix(rnorm(10^7),ncol=10)
> Data.norm <- normalize.quantiles(Data)
> save(Data,Data.norm,file="old.Rda")
> sessionInfo()
R version 2.5.0 (2007-04-23)
x86_64-unknown-linux-gnu
locale:
LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US
.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.
UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8
;LC_IDENTIFICATION=C
attached base packages:
[1] "tools" "stats" "graphics" "grDevices" "utils"
"datasets"
[7] "methods" "base"
other attached packages:
affy affyio Biobase
"1.14.1" "1.4.1" "1.14.0"
> library(affydata)
> data(Dilution)
> Dilution.norm <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
> Dilution.old <- Dilution
> Dilution.pm.old <- pm(Dilution.old)
> Dilution.pm.norm.old <- normalize.quantiles(Dilution.pm.old)
> save(Dilution.norm, Dilution.old, Dilution.pm.old,
Dilution.pm.norm.old, file="Dil-old.Rda")
>
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
[,1] [,2]
[1,] 1.0 1.0
[2,] 2.0 2.0
[3,] 3.5 3.0
[4,] 3.5 3.5
[5,] 3.5 4.0
[6,] 3.5 4.5
[7,] 7.0 7.0
[8,] 8.0 8.0
>
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
[,1] [,2]
[1,] 1.0 1.0
[2,] 2.0 2.0
[3,] 3.5 3.0
[4,] 3.5 3.5
[5,] 3.5 4.0
[6,] 6.0 6.0
[7,] 7.0 7.0
[8,] 8.0 8.0
>
> eset.old <- rma(Dilution)
Background correcting
Normalizing
Calculating Expression
> exprvals.old <- exprs(eset.old)
> save(exprvals.old,file="ev_old.Rda")
Code I used for testing R-2.7 series set of code:
library(affy)
set.seed(123456789)
Data.new <- matrix(rnorm(10^7),ncol=10)
Data.norm.new <- normalize.quantiles(Data.new)
load("old.Rda")
sessionInfo()
all.equal(Data.new,Data)
all.equal(Data.norm.new,Data.norm)
library(affydata)
data(Dilution)
Dilution.norm.new <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
Dilution.new <- Dilution
Dilution.pm.new <- pm(Dilution.new)
Dilution.pm.norm.new <- normalize.quantiles(Dilution.pm.new)
load("Dil-old.Rda")
all.equal(exprs(Dilution.norm),exprs(Dilution.norm.new))
sum(exprs(Dilution.norm) != exprs(Dilution.norm.new))
all.equal(Dilution.pm.old,Dilution.pm.new)
all.equal(Dilution.pm.norm.old,Dilution.pm.norm.new)
length(Dilution.pm.new[,1])
length(unique(Dilution.pm.new[,1]))
### looking at the relative differences
plot(1/2*log2(Dilution.pm.norm.new*Dilution.pm.norm.old),abs(log2(Dilu
tion.pm.norm.new/Dilution.pm.norm.old)))
### visually seeing just how many ties there really are
par(mfrow=c(2,2))
barplot(table(Dilution.pm.new[,1]))
barplot(table(Dilution.pm.new[,2]))
barplot(table(Dilution.pm.new[,3]))
barplot(table(Dilution.pm.new[,4]))
Toy.matrix <- cbind(c(1,2,3,3,3,3,7,8),1:8)
normalize.quantiles(Toy.matrix)
Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
normalize.quantiles(Toy.matrix2)
eset.new <- rma(Dilution)
exprvals.new <- exprs(eset.new)
load("ev_old.Rda")
max(exprvals.old - exprvals.new)
### output
> library(affy)
Loading required package: Biobase
Loading required package: tools
Welcome to Bioconductor
Vignettes contain introductory material. To view, type
'openVignette()'. To cite Bioconductor, see
'citation("Biobase")' and for packages 'citation(pkgname)'.
Loading required package: affyio
Loading required package: preprocessCore
> set.seed(123456789)
> Data.new <- matrix(rnorm(10^7),ncol=10)
> Data.norm.new <- normalize.quantiles(Data.new)
> load("old.Rda")
> sessionInfo()
R version 2.7.0 Under development (unstable) (2008-02-02 r44303)
x86_64-unknown-linux-gnu
locale:
LC_CTYPE=en_US.UTF-8;LC_NUMERIC=C;LC_TIME=en_US.UTF-8;LC_COLLATE=en_US
.UTF-8;LC_MONETARY=en_US.UTF-8;LC_MESSAGES=en_US.UTF-8;LC_PAPER=en_US.
UTF-8;LC_NAME=C;LC_ADDRESS=C;LC_TELEPHONE=C;LC_MEASUREMENT=en_US.UTF-8
;LC_IDENTIFICATION=C
attached base packages:
[1] tools stats graphics grDevices utils datasets
methods
[8] base
other attached packages:
[1] affy_1.17.3 preprocessCore_1.1.5 affyio_1.7.11
[4] Biobase_1.17.9
> all.equal(Data.new,Data)
[1] TRUE
> all.equal(Data.norm.new,Data.norm)
[1] TRUE
> library(affydata)
> data(Dilution)
>
> Dilution.norm.new <- normalize.AffyBatch.quantiles(Dilution,
type="pmonly")
> Dilution.new <- Dilution
> Dilution.pm.new <- pm(Dilution.new)
> Dilution.pm.norm.new <- normalize.quantiles(Dilution.pm.new)
>
>
>
> load("Dil-old.Rda")
> all.equal(exprs(Dilution.norm),exprs(Dilution.norm.new))
[1] "Mean relative difference: 7.334068e-05"
> sum(exprs(Dilution.norm) != exprs(Dilution.norm.new))
[1] 36770
> all.equal(Dilution.pm.old,Dilution.pm.new)
[1] TRUE
> all.equal(Dilution.pm.norm.old,Dilution.pm.norm.new)
[1] "Mean relative difference: 7.334068e-05"
>
> length(Dilution.pm.new[,1])
[1] 201800
> length(unique(Dilution.pm.new[,1]))
[1] 12578
>
> ### looking at the relative differences
>
plot(1/2*log2(Dilution.pm.norm.new*Dilution.pm.norm.old),abs(log2(Dilu
tion.pm.norm.new/Dilution.pm.norm.old)))
>
> ### visually seeing just how many ties there really are
> par(mfrow=c(2,2))
> barplot(table(Dilution.pm.new[,1]))
> barplot(table(Dilution.pm.new[,2]))
> barplot(table(Dilution.pm.new[,3]))
> barplot(table(Dilution.pm.new[,4]))
>
>
> Toy.matrix <-cbind(c(1,2,3,3,3,3,7,8),1:8)
> normalize.quantiles(Toy.matrix)
[,1] [,2]
[1,] 1.00 1.0
[2,] 2.00 2.0
[3,] 3.75 3.0
[4,] 3.75 3.5
[5,] 3.75 4.0
[6,] 3.75 4.5
[7,] 7.00 7.0
[8,] 8.00 8.0
>
> Toy.matrix2 <-cbind(c(1,2,3,3,3,6,7,8),1:8)
> normalize.quantiles(Toy.matrix2)
[,1] [,2]
[1,] 1.0 1.0
[2,] 2.0 2.0
[3,] 3.5 3.0
[4,] 3.5 3.5
[5,] 3.5 4.0
[6,] 6.0 6.0
[7,] 7.0 7.0
[8,] 8.0 8.0
>
> eset.new <- rma(Dilution)
Background correcting
Normalizing
Calculating Expression
> exprvals.new <- exprs(eset.new)
> load("ev_old.Rda")
> max(exprvals.old - exprvals.new)
[1] 0.01004166
On Fri, 2008-02-15 at 12:11 +0100, Markus Schmidberger wrote:
> Hello,
>
> using "normalize.AffyBatch.quantiles" on R-2.5.0 with affy_1.14.1
and on
> R-2.6.0 with affy_1.16.0 generates different results! Is this
correct?
> I think there were only some changes in the package structure
> (normalize.quantile moved to preprocessCore). So there should be no
> differences in the results.
>
> For testing I used the code below.
>
> Best regards
> Markus Schmidberger
>
>
> #####
> library(affy)
> sessionInfo()
>
> affy affyio Biobase
> "1.14.1" "1.4.0" "1.14.0"
>
> pathCELFiles <- "/home/schmidb/tmp/E-TABM-185"
> celFile <- list.celfiles(path=pathCELFiles,full.names=TRUE)[1:5];
> affyBatch <- ReadAffy(filenames=celFile);
> affyBatchALT <- normalize.AffyBatch.quantiles(affyBatch,
type="pmonly")
>
> save(affyBatchALT, file="alt")
>
> #####
> library(affy)
> sessionInfo()
>
> [1] affy_1.16.0 preprocessCore_1.0.0 affyio_1.6.1
> [4] Biobase_1.16.1
>
> pathCELFiles <- "/home/schmidb/tmp/E-TABM-185"
> celFile <- list.celfiles(path=pathCELFiles,full.names=TRUE)[1:5];
> affyBatch <- ReadAffy(filenames=celFile);
> affyBatchNEU <- normalize.AffyBatch.quantiles(affyBatch,
type="pmonly")
>
> save(affyBatchNEU, file="neu")
>
> #####
> load("alt")
> load("neu")
>
> all.equal(exprs(affyBatchALT),exprs(affyBatchNEU))
> "Mean relative difference: 0.0001107451"
>
> _______________________________________________
> Bioconductor mailing list
> Bioconductor at stat.math.ethz.ch
>
https://stat.ethz.ch/mailman/listinfo/bioconductor
> Search the archives:
http://news.gmane.org/gmane.science.biology.informatics.conductor