I have been working on getting the differential expression of proteins for 2 groups - control and treatment. Control has 4 animals while treatment has 5 animals. The samples are collected at the baseline time and after 2 hours. The serum was used to do a somascan assay. I have been using the limma model to get the differential expression of proteins but I am constantly getting an error after I apply the limma function. It says this - Error in lmFit(protein_data_log2, design) : row dimension of design doesn't match column dimension of data object after I use this prompt - fit <- lmFit(protein_data_log2, design)
Code should be placed in three backticks as shown below
library(limma)
library(dplyr)
soma_data <- read.csv("20241220.csv")
print(paste("Number of rows in soma_data:", nrow(soma_data)))
print(paste("Number of columns in soma_data:", ncol(soma_data)))
print(head(soma_data))
unique_ids <- make.unique(as.character(soma_data$SampleId))
print(paste("Number of unique_ids:", length(unique_ids)))
protein_cols <- grep("seq\\.", colnames(soma_data))
protein_data <- soma_data[, protein_cols]
print(paste("Dimensions of protein_data (before transpose):", nrow(protein_data), "x", ncol(protein_data)))
rownames(protein_data) <- unique_ids # Use unique_ids here!
protein_data_transposed <- t(protein_data)
print(paste("Dimensions of protein_data_transposed:", nrow(protein_data_transposed), "x", ncol(protein_data_transposed)))
protein_data_log2 <- log2(protein_data_transposed)
print(paste("Dimensions of protein_data_log2:", nrow(protein_data_log2), "x", ncol(protein_data_log2)))
sample_data <- data.frame(unique_id = unique_ids, SampleGroup = soma_data$SampleGroup)
print(paste("Dimensions of sample_data:", nrow(sample_data), "x", ncol(sample_data)))
print(head(sample_data))
ids_after_transpose = colnames(protein_data_log2)
sample_data = sample_data[match(ids_after_transpose, sample_data$unique_id),]
print(paste("Dimensions of sample_data:", nrow(sample_data), "x", ncol(sample_data)))
print(paste("Unique length of data is:", length(ids_after_transpose)))
design <- model.matrix(~0 + factor(sample_data$SampleGroup))
colnames(design) <- levels(factor(sample_data$SampleGroup))
rownames(design) <- ids_after_transpose # Align matrix!
print(paste("Dimensions of protein_data_log2 (rows x cols):", nrow(protein_data_log2), ncol(protein_data_log2)))
print(paste("Dimensions of design (rows x cols):", nrow(design), ncol(design)))
print(paste("Are row names of design and column names of protein_data_log2 equal?", all(rownames(design) == colnames(protein_data_log2))))
print(paste("Dimensions of protein_data_log2:", nrow(protein_data_log2), "rows x", ncol(protein_data_log2), "columns (samples)"))
print(paste("Dimensions of design:", nrow(design), "rows x", ncol(design), "columns (groups)"))
print(paste("Number of unique_ids:", length(unique_ids)))
print(paste("Unique length of data is:", length(ids_after_transpose)))
fit <- lmFit(protein_data_log2, design)
contrast_matrix <- makeContrasts(
GroupAvsGroupB = "Control" - "Disease",
levels = design
)
fit_contrast <- contrasts.fit(fit, contrast_matrix)
fit_bayes <- eBayes(fit_contrast)
results <- topTable(fit_bayes, coef = "GroupAvsGroupB", n = Inf)
head(results)