I know the reason eventually, please see the following script which was used by ChAMP or some other package to read and creat SampleSheet.csv.
read.metharray.sheet()
function (base, pattern = "csv$", ignore.case = TRUE, recursive = TRUE,
verbose = TRUE)
{
readSheet <- function(file) {
dataheader <- grep("^\\[DATA\\]", readLines(file), ignore.case = TRUE)
if (length(dataheader) == 0)
dataheader <- 0
df <- read.csv(file, stringsAsFactor = FALSE, skip = dataheader)
if (length(nam <- grep("Sentrix_Position", names(df),
ignore.case = TRUE, value = TRUE)) == 1) {
df$Array <- as.character(df[, nam])
df[, nam] <- NULL
}
if (length(nam <- grep("Array[\\._]ID", names(df), ignore.case = TRUE,
value = TRUE)) == 1) {
df$Array <- as.character(df[, nam])
df[, nam] <- NULL
}
if (!"Array" %in% names(df))
warning(sprintf("Could not infer array name for file: %s",
file))
if (length(nam <- grep("Sentrix_ID", names(df), ignore.case = TRUE,
value = TRUE)) == 1) {
df$Slide <- as.character(df[, nam])
df[, nam] <- NULL
}
if (length(nam <- grep("Slide[\\._]ID", names(df), ignore.case = TRUE,
value = TRUE)) == 1) {
df$Slide <- as.character(df[, nam])
df[, nam] <- NULL
}
if (!"Slide" %in% names(df))
warning(sprintf("Could not infer slide name for file: %s",
file))
else df[, "Slide"] <- as.character(df[, "Slide"])
if (length(nam <- grep("Plate[\\._]ID", names(df), ignore.case = TRUE,
value = TRUE)) == 1) {
df$Plate <- as.character(df[, nam])
df[, nam] <- NULL
}
for (nam in c("Pool_ID", "Sample_Plate", "Sample_Well")) {
if (nam %in% names(df)) {
df[[nam]] <- as.character(df[[nam]])
}
}
if (!is.null(df$Array)) {
patterns <- sprintf("%s_%s_Grn.idat", df$Slide, df$Array)
allfiles <- list.files(dirname(file), recursive = recursive,
full.names = TRUE)
basenames <- sapply(patterns, function(xx) grep(xx,
allfiles, value = TRUE))
names(basenames) <- NULL
basenames <- sub("_Grn\\.idat", "", basenames, ignore.case = TRUE)
df$Basename <- basenames
}
df
}
if (!all(file.exists(base)))
stop("'base' does not exists")
info <- file.info(base)
if (!all(info$isdir) && !all(!info$isdir))
stop("'base needs to be either directories or files")
if (all(info$isdir)) {
csvfiles <- list.files(base, recursive = recursive, pattern = pattern,
ignore.case = ignore.case, full.names = TRUE)
if (verbose) {
message("[read.metharray.sheet] Found the following CSV files:\n")
print(csvfiles)
}
}
else csvfiles <- list.files(base, full.names = TRUE)
dfs <- lapply(csvfiles, readSheet)
namesUnion <- Reduce(union, lapply(dfs, names))
df <- do.call(rbind, lapply(dfs, function(df) {
newnames <- setdiff(namesUnion, names(df))
newdf <- matrix(NA, ncol = length(newnames), nrow = nrow(df),
dimnames = list(NULL, newnames))
cbind(df, as.data.frame(newdf))
}))
df
}
Just to add my 2-bit of info in case someone come across this error. I kept getting similar error and when I looked I realized that the filenames were incorrectly rendered because originally I used excel to make my sample sheet. Excel treats the barcode like numbers and thus automatically sets it to scientific however this is a barcode and not number, so make make sure to change it to number with no decimal! works perfectly now after I saved it to csv.
Thanks for the prompt response James!
Sure, I will chose right category while posting next time, thanks for correcting.
Thanks for the pointing the error, yes I checked the data and seems I received incomplete dataset, so wrong Basename column was generated. It removed few samples and it runs fine now.
Hi James,
I am also encountering the same problem but all the IDAT files are available and for some reason it is not recognizing the pair IDAT file. Any thoughts?
thanks!
Cristina