I have not use R in a while and am trying to get a heatmap for a study we are doing. But I a encountering a problem that have not been able to overcome. I know it is a silly question but can figure out. I guess am a little rusty.
I am using the following data set from GEO GSE18817
And this is my complete code:
library("affy")
library("limma")
library("made4")
library("marray")
library("Heatplus")
logData<- read.table(dataFileName0, sep="\t", header = TRUE, skip = 0, na.strings = "NA", blank.lines.skip = FALSE, stringsAsFactors = default.stringsAsFactors())
dim(logData)
GeneID <- logData[,1]
GeneName <- logData[,3]
GeneData <- log2(logData[,4:dim(logData)[2]])
dim(GeneData)
boxplot(GeneData)
apropos("quantile")
Normalized<-normalizeQuantiles(GeneData)
boxplot(Normalized)
type1 <- c("NFE2L1","TRAM1","JUNB","RASSF4","SOCS1","PTPRF","SPSB3","ADAM8","C2","NPAS1","PLXND1","ZNF646","DENND3","PPARD")
type2<- c("HLA-F","HLA-E","ICAM1","ZFAND3","TNS4","EXD3","LMNA","BIRC7","FAM65A","SYNPO")
type3 <- c("USP1","HDJC9","PITX2","CCDC99","FAM29A","MCM10","C12orf48","PBK","ECT2","MSH2","DHFR","CCNA2","MAD2L1","KIF11")
type4<- c("AGPAT5","BRIX1","EIF4E","HSPD1","SFRS3","COX11","SEH1L","CPSF6","SETMAR","NIP7","PDSS1") #"RBM15",
#targetGenes <- c(type1,type2,type3,type4)
Agenes <-c(type1,type2)
Bgenes <- c(type3, type4)
targetGenes <- c(Bgenes) #this is for calculating how many probes available
#chosen normalized data for target gene symbs
targetLines <- "NA" #initial
for (i in 1:length(targetGenes)) {
if ((targetGenes[i])%in%(GeneName)){
if (length(which(GeneName == targetGenes[i])) > 0) {
targetLines <- c(targetLines, which(GeneName == targetGenes[i]))
cat(i, " : ", which(GeneName == targetGenes[i]), " --- ", targetGenes[i], "\n")
}
} #else {targetGenes[i] <- "NA"}
}
targetLines <- targetLines[targetLines != "NA"] #remove the "NA" values
numPart1 <- length( targetLines)
targetGenes <- c(Bgenes, Agenes)
#chosen normalized data for target gene symbs
targetLines <- "NA" #initial
for (i in 1:length(targetGenes)) {
if ((targetGenes[i])%in%(GeneName)){
if (length(which(GeneName == targetGenes[i])) > 0) {
targetLines <- c(targetLines, which(GeneName == targetGenes[i]))
cat(i, " : ", which(GeneName == targetGenes[i]), " --- ", targetGenes[i], "\n")
}
} #else {targetGenes[i] <- "NA"}
}
targetLines <- targetLines[targetLines != "NA"]
#targetLines <- #remove the "NA" values
numPart2 <- length( targetLines) - numPart1
#To select those genes for heatmap I do
gene.symbs <- GeneName[as.numeric(targetLines)]
gene.data <- Normalized[targetLines, ]
genevec<- c(rep(numPart1),rep(numPart2) )
gene.class <- colnames(gene.data)
gene.data[ is.na(gene.data) ] <-0 #######Which I think is not working because when I put the function I get:
heatplot(gene.data, labRow = gene.symbs, classvec=genevec,cexRow = 1, cexCol = 1)
Data (original) range: -9.54 0"
[1] "Data (scale) range: NaN NaN"
[1] "Data scaled to range: NaN NaN"
Error in hclust(distf(t(data)), method = method) :
NA/NaN/Inf in foreign function call (arg 11)
I have tried
using
gene.data [!is.Infinite(gene.data)] <- 0 ### and has not worked.
Any suggestions?
thanks
Leon