Entering edit mode
Mayte Suarez-Farinas
▴
300
@mayte-suarez-farinas-694
Last seen 10.2 years ago
Hi Ian,
I have been working with SDM database and I have to say that I found a
lot
of problems.
marray package have a function called read.SMD that you can use and it
skips the metainformation in the header. There is also a function
read.SMD2 that somebody (sorry, I do not remember who) posted here and
it is more flexible (it is attach to this message). One way is to
download the gal file form SMD and the use this functions.
But you should be careful when you use SDM data. It happened
to me that the information contained in the xls data do not match the
gal
file because some spot were not printed, and both read.smd and
read.smd2
assumes a continuity in the layout (they generates the layout using
only
the dimensions of the chips, so you can not have spot (1, 26) and then
(2,1) ). So I modified the read.SMD2 by myself to math the layout
information and the xls data by ID, including missing
rows in the matrix and update the weights matrix. But if you not have
this
problem download the gal file and using read.smd should be enough.
best
Mayte
On Thu, 3 Jun 2004 bioconductor-request@stat.math.ethz.ch wrote:
> Hi All
> I am interested in downloading the Breast cancer data from Zhao et
al from
> SMD. I have used the following two commands before to download SMD
data
> before
>
> Library(limma)
>
> G = read.maimages(dir(pattern='*.xls'), source='smd',
fill=TRUE,sep='\t')
>
>
> RG = read.maimages((dir(pattern='*.xls')),columns=(list(Rf=Ch1
Intensity
> (Mean),Gf=Ch2 Intensity (Mean),Rb=Ch1 Intensity (Median),Gb=Ch2
Intensity
> (Median) )),fill=TRUE)
>
> but when applied to this data I Get the error message
>
> Cannot find column heading in image output file
>
> I have tried specifying the column headings and using the skip
command to
> rectify the problem but I only get the same error message
>
> Has anyone who has had a similar problem downloading data from SMD
share
> their experience and suggest a suitable command.
>
>
> Thanks,
> Ian
>
> Ps Have attached a reduced version of the file type I am having
difficulty
> with.
> Abstract and data for the paper can be found at
> http://genome-www5.stanford.edu/cgi-
bin/publication/viewPublication.pl?pub_n
> o=322
> -------------- next part --------------
> A non-text attachment was scrubbed...
> Name: 18195.xls
> Type: application/vnd.ms-excel
> Size: 20992 bytes
> Desc: not available
> Url : https://www.stat.math.ethz.ch/pipermail/bioconductor/attachmen
ts/20040602/9200fc76/18195-0001.xls
>
> ------------------------------
-------------- next part --------------
read.SMD2 <- function(fnames = NULL, path = ".", name.Gf =
"CH1I_MEAN", name.Gb = "CH1B_MEDIAN", name.Rf = "CH2I_MEAN", name.Rb =
"CH2B_MEDIAN", name.W = NULL, layout = NULL, gnames = NULL,targets =
NULL, notes = NULL, skip = 0, sep = "\t", quote = "", ...) {
if (is.null(fnames))
fnames <- dir(path = path, pattern = paste("*", "xls", sep =
"."))
if (is.null(path))
fullfnames <- fnames
else
fullfnames <- file.path(path, fnames)
print(fullfnames)
y <- readLines(fullfnames[1], n = 100)
skip <- grepname.Gf, y,extended=FALSE)[1] - 1
print(skip)
smdTable <- NULL
if (is.null(layout)) {
cat("Generating layout from ", fnames[1], "\n", sep="")
smdTable <- read.table(fullfnames[1], header=TRUE, sep="\t",
quote = "", skip = skip, comment.char = "")
print(names(smdTable))
#names(smdTable)<-NewNamesTable
numSectors <- max(smdTable$SECTOR)
print(numSectors)
xsize <- max(smdTable$X.COORD) - min(smdTable$X.COORD)
ysize <- max(smdTable$Y.COORD) - min(smdTable$Y.COORD)
maNgr <- round(sqrt(numSectors*ysize/xsize))
maNgc <- round(sqrt(numSectors*xsize/ysize))
if is.na(maNgr)) {
row <- grep("Exptid", y)[1]
exptid <- strsplit(y[row], "=")[[1]][2]
cat("Image:http://genome-www5.stanford.edu/cgi-
bin/SMD/clickable.pl?exptid=", exptid, "\n", sep = "")
options(warn = getOption("warn")-1)
repeat {
cat("Enter number of vertical sectors (",
numSectors," total sectors): ", sep = "")
maNgr <- as.integer(readLines(n = 1))
if (!is.na(maNgr) && maNgr > 0 && maNgr < numSectors
&&
numSectors/maNgr == as.integer(numSectors/maNgr))
break
}
options(warn = getOption("warn")+1)
maNgc <- numSectors / maNgr
}
row <- grep("Rows per Sector", y)[1]
maNsr <- as.integer(strsplit(y[row], "=")[[1]][2])
row <- grep("Columns per Sector", y)[1]
maNsc <- as.integer(strsplit(y[row], "=")[[1]][2])
maNspots <- maNgr * maNgc * maNsr * maNsc
maSub <- rep(FALSE, maNspots)
maSub[smdTable$SPOT] <- TRUE
row <- grep("Printname", y)[1]
printname <- strsplit(y[row], "=")[[1]][2]
row <- grep("Tip Configuration", y)[1]
tipconfig <- strsplit(y[row], "=")[[1]][2]
maNotes <- paste("Print Name: ", printname, "\nTip
Configuration: ", tipconfig, sep = "")
layout <- new("marrayLayout", maNgr = maNgr,
maNgc = maNgc,
maNsr = maNsr, maNsc = maNsc, maNspots =
maNspots,
maSub = maSub, maNotes = maNotes)
}
if (is.null(gnames)) {
cat("Generating probe sequence info from ", fnames[1], "\n",
sep="")
if (is.null(smdTable))
smdTable <- read.table(fullfnames[1], header=TRUE,
sep="\t",quote = "", skip = skip, comment.char = "")
maLabels <- as.character(smdTable$SUID)
cols <- 2:(match("SUID", colnames(smdTable))-1)
maInfo <- smdTable[,cols]
gnames <- new("marrayInfo", maLabels = maLabels, maInfo =
maInfo)
}
if (is.null(targets)) {
cat("Generating target sample info from all files\n")
maLabels <- character(0)
maInfo <- data.frame()
for (i in 1:length(fnames)) {
z <- readLines(fullfnames[i], n = skip)
row <- grep("Exptid", z)[1]
maLabels <- c(maLabels, strsplit(z[row], "=")[[1]][2])
row <- grep("Experiment Name", z)[1]
Experiment <- strsplit(z[row], "=")[[1]][2]
row <- grep("Channel 1 Description", z)[1]
Cy3 <- strsplit(z[row], "=")[[1]][2]
row <- grep("Channel 2 Description", z)[1]
Cy5 <- strsplit(z[row], "=")[[1]][2]
row <- grep("SlideName", z)[1]
SlideName <- strsplit(z[row], "=")[[1]][2]
maInfo <- rbind(maInfo, data.frame(Experiment =
Experiment, Cy3 = Cy3, Cy5 = Cy5, SlideName = SlideName))
}
rownames(maInfo) <- 1:dim(maInfo)[1]
targets <- new("marrayInfo", maLabels = maLabels, maInfo =
maInfo)
}
if (is.null(notes)) {
cat("Generating notes from ", fnames[1], "\n", sep="")
row <- grep("Organism", y)[1]
organism <- strsplit(y[row], "=")[[1]][2]
row <- grep("Category", y)[1]
category <- strsplit(y[row], "=")[[1]][2]
row <- grep("Subcategory", y)[1]
subcategory <- strsplit(y[row], "=")[[1]][2]
row <- grep("Description", y)[1]
description <- strsplit(y[row], "=")[[1]][2]
row <- grep("Experimenter", y)[1]
experimenter <- strsplit(y[row], "=")[[1]][2]
row <- grep("Contact email", y)[1]
email <- strsplit(y[row], "=")[[1]][2]
row <- grep("Scanning Software", y)[1]
software <- strsplit(y[row], "=")[[1]][2]
row <- grep("Software version", y)[1]
version <- strsplit(y[row], "=")[[1]][2]
row <- grep("Scanning parameters", y)[1]
parameters <- strsplit(y[row], "=")[[1]]
if (length(parameters) > 1)
parameters <- paste(parameters[2:length(parameters)],
collapse = ",")
else
parameters <- NA
notes <- paste("Organism: ", organism,
"\nCategory: ", category,
"\nSubcategory: ", subcategory,
"\nDescription: ", description,
"\nExperimenter: ", experimenter,
"\nE-Mail: ", email,
"\nScanning Software: ", software, " ",
version,
"\nScanning Parameters: ", parameters, sep =
"")
}
mraw <- read.marrayRaw(fnames = fnames, path = path, name.Gf =
name.Gf,
name.Gb = name.Gb, name.Rf = name.Rf, name.Rb = name.Rb,
name.W = name.W, layout = layout, gnames = gnames, targets =
targets,
notes = notes, skip = skip, sep = sep, quote = quote,
...)
return(mraw)
}