Hello,
I need to remove duplicates in the animal_id
column. I tried using tidySummarizedExperiment
make the code more reproducible. However, distinct
returns only a tibble with one column animal_id
. I then cannot reassign it back to colData
because the number of rows is obviously different.
library(TreeSummarizedExperiment)
library(tidySummarizedExperiment)
library(dplyr)
# Generate assay data
set.seed(42)
assay_data <- matrix(rpois(200, lambda = 10), nrow = 20, ncol = 10)
# Generate sample data
sample_data <- data.frame(
animal_id = factor(rep(1:5, each = 2)),
treated = factor(rep(c("yes", "no"), times = 5)),
disease = factor(rep(c("disease1", "disease2"), times = 5))
)
# Create TSE
tse <- TreeSummarizedExperiment(
assays = list(counts = assay_data),
colData = sample_data
)
print(colData(tse))
# Need to remove animal_id duplicates
#> DataFrame with 10 rows and 3 columns
#> animal_id treated disease
#> <factor> <factor> <factor>
#> 1 1 yes disease1
#> 2 1 no disease2
#> 3 2 yes disease1
#> 4 2 no disease2
#> 5 3 yes disease1
#> 6 3 no disease2
#> 7 4 yes disease1
#> 8 4 no disease2
#> 9 5 yes disease1
#> 10 5 no disease2
distinct_tse |> distinct(animal_id, .keep_all = TRUE)
#> tidySummarizedExperiment says: Key columns are missing. A data frame is returned for independent data analysis.
#> # A tibble: 5 × 1
#> animal_id
#> <fct>
#> 1 1
#> 2 2
#> 3 3
#> 4 4
#> 5 5
sessionInfo( )
#> R version 4.3.1 (2023-06-16)
#> Platform: x86_64-pc-linux-gnu (64-bit)
#> Running under: Linux Mint 21.2
#>
#> Matrix products: default
#> BLAS/LAPACK: /usr/lib/x86_64-linux-gnu/openblas-pthread/libopenblasp-r0.3.20.so; LAPACK version 3.10.0
#>
#> locale:
#> [1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
#> [3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
#> [5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
#> [7] LC_PAPER=en_US.UTF-8 LC_NAME=C
#> [9] LC_ADDRESS=C LC_TELEPHONE=C
#> [11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
#>
#> time zone: Europe/Helsinki
#> tzcode source: system (glibc)
#>
#> attached base packages:
#> [1] stats graphics grDevices utils datasets methods base
#>
#> loaded via a namespace (and not attached):
#> [1] digest_0.6.33 fastmap_1.1.1 xfun_0.40 glue_1.6.2
#> [5] knitr_1.44 htmltools_0.5.6 rmarkdown_2.25 lifecycle_1.0.3
#> [9] cli_3.6.1 reprex_2.0.2 withr_2.5.0 compiler_4.3.1
#> [13] rstudioapi_0.15.0 tools_4.3.1 evaluate_0.21 yaml_2.3.7
#> [17] rlang_1.1.1 fs_1.6.3