Entering edit mode
Hello, I am trying to compare the peaks between 2 conditions to see what peaks belongs to which conditions. I have 15791 peaks where around 521 or 521 peaks are common between 2 conditions(S1 and S3) and 15450 are only for condition1. So I want to see which peaks are for condition S1 only and which are common, but the approach im using is not giving 100% correct results, for some peaks its correct for others it's not. I am working in R studio. Anyone have any idea of other approach or how can I adjust my script
# Create GRanges objects for S1 and S3 peaks
gr_S1 <- GRanges(seqnames = S1$seqnames, ranges = IRanges(start = S1$start, end = S1$end))
gr_S3 <- GRanges(seqnames = S3$seqnames, ranges = IRanges(start = S3$start, end = S3$end))
# Find overlaps between S1 and S3
overlap_hits <- findOverlaps(gr_S1, gr_S3)
# Add a status column to Chip_seq_data (assuming it has the S1 peaks)
Chip_seq_finale <- Chip_seq_finale %>%
mutate(status = ifelse(row_number() %in% queryHits(overlap_hits), "common", "S1 only"))
I tried another approach that didn't workout too
# Combine the peak information into a single string for easy comparison
S3$peak_id <- paste(S3$seqnames, S3$start, S3$end, sep = ":")
S1$peak_id <- paste(S1$seqnames, S1$start, S1$end, sep = ":")
# Extract the unique peak_ids from both data frames
all_peaks <- unique(c(S3$peak_id, S1$peak_id))
# Identify common peaks
common_peaks <- intersect(S3$peak_id, S1$peak_id)
# Create a new data frame
peak_status <- data.frame(
peak_id = all_peaks,
status = ifelse(all_peaks %in% common_peaks, "common", "S1 only")
)
# Split the peak_id back into seqnames, start, and end
peak_status <- cbind(
peak_status,
do.call(rbind, strsplit(as.character(peak_status$peak_id), ":"))
)
# Rename the new columns appropriately
colnames(peak_status)[3:5] <- c("seqnames", "start", "end")
# Convert start and end to numeric
peak_status$start <- as.numeric(peak_status$start)
peak_status$end <- as.numeric(peak_status$end)
# Merge additional information from S3 and S1
peak_status <- merge(peak_status, S3_chromhmm, by.x = "peak_id", by.y = "peak_id", all.x = TRUE, suffixes = c("", "_S3"))
peak_status <- merge(peak_status, S1_chromhmm, by.x = "peak_id", by.y = "peak_id", all.x = TRUE, suffixes = c("", "_S1"))
sessionInfo( )
R version 4.1.1 (2021-08-10)
Platform: x86_64-conda-linux-gnu (64-bit)
Running under: Ubuntu 20.04.6 LTS
Matrix products: default
BLAS/LAPACK: /shared/ifbstor1/software/miniconda/envs/r-4.1.1/lib/libopenblasp-r0.3.18.so
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8 LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
attached base packages:
[1] grid stats4 stats graphics grDevices utils datasets methods base
other attached packages:
[1] data.table_1.14.2 reshape2_1.4.4 TxDb.Hsapiens.UCSC.hg19.knownGene_3.2.2
[4] ChIPseeker_1.30.3 ChIPpeakAnno_3.28.0 msigdbr_7.4.1
[7] ggupset_0.3.0 DOSE_3.20.1 RColorBrewer_1.1-3
[10] fgsea_1.20.0 org.Hs.eg.db_3.14.0 enrichplot_1.14.2
[13] clusterProfiler_4.2.2 pheatmap_1.0.12 VennDiagram_1.7.3
[16] futile.logger_1.4.3 enrichR_3.2 rtracklayer_1.54.0
[19] TxDb.Hsapiens.UCSC.hg38.knownGene_3.14.0 GenomicFeatures_1.46.1 AnnotationDbi_1.56.1
[22] DiffBind_3.4.11 BiocParallel_1.28.3 BiocManager_1.30.25
[25] openxlsx_4.2.7.1 readxl_1.4.3 EnhancedVolcano_1.13.2
[28] ggrepel_0.9.6 magrittr_2.0.3 lubridate_1.9.3
[31] forcats_1.0.0 stringr_1.5.1 dplyr_1.1.4
[34] purrr_1.0.2 readr_2.1.5 tidyr_1.3.1
[37] tibble_3.2.1 tidyverse_2.0.0 DESeq2_1.34.0
[40] SummarizedExperiment_1.24.0 MatrixGenerics_1.6.0 matrixStats_0.62.0
[43] GenomicRanges_1.46.1 GenomeInfoDb_1.30.1 IRanges_2.28.0
[46] S4Vectors_0.32.4 Biobase_2.54.0 BiocGenerics_0.40.0
[49] limma_3.50.3 ggplot2_3.5.1
loaded via a namespace (and not attached):
[1] rappdirs_0.3.3 coda_0.19-4 bit64_4.0.5 knitr_1.48 irlba_2.3.5.1
[6] DelayedArray_0.20.0 hwriter_1.3.2 KEGGREST_1.34.0 RCurl_1.98-1.9 AnnotationFilter_1.18.0
[11] generics_0.1.3 lambda.r_1.2.4 RSQLite_2.2.8 shadowtext_0.0.9 bit_4.0.4
[16] tzdb_0.2.0 xml2_1.3.3 viridis_0.6.2 amap_0.8-20 apeglm_1.16.0
[21] xfun_0.48 hms_1.1.3 babelgene_21.4 evaluate_0.17 fansi_1.0.3
[26] restfulr_0.0.13 progress_1.2.2 caTools_1.18.2 dbplyr_2.5.0 igraph_2.0.3
[31] DBI_1.2.3 geneplotter_1.72.0 htmlwidgets_1.5.4 annotate_1.72.0 biomaRt_2.50.0
[36] vctrs_0.6.5 ensembldb_2.18.2 cachem_1.0.6 withr_2.5.0 ggforce_0.3.3
[41] BSgenome_1.62.0 bdsmatrix_1.3-7 GenomicAlignments_1.30.0 treeio_1.18.1 prettyunits_1.1.1
[46] ape_5.5 lazyeval_0.2.2 crayon_1.5.2 genefilter_1.76.0 pkgconfig_2.0.3
[51] tweenr_1.0.2 nlme_3.1-153 ProtGenerics_1.26.0 rlang_1.1.4 lifecycle_1.0.3
[56] downloader_0.4 filelock_1.0.2 BiocFileCache_2.2.0 invgamma_1.1 cellranger_1.1.0
[61] polyclip_1.10-0 graph_1.72.0 Matrix_1.3-4 aplot_0.1.1 ashr_2.2-47
[66] boot_1.3-28 png_0.1-7 viridisLite_0.4.1 rjson_0.2.20 bitops_1.0-7
[71] KernSmooth_2.23-20 Biostrings_2.62.0 blob_1.2.2 mixsqp_0.3-43 SQUAREM_2021.1
[76] qvalue_2.26.0 regioneR_1.26.0 ShortRead_1.52.0 jpeg_0.1-9 gridGraphics_0.5-1
[81] scales_1.3.0 memoise_2.0.1 plyr_1.8.7 gplots_3.2.0 zlibbioc_1.40.0
[86] compiler_4.1.1 scatterpie_0.1.7 BiocIO_1.4.0 bbmle_1.0.24 plotrix_3.8-2
[91] Rsamtools_2.10.0 cli_3.6.3 systemPipeR_2.0.4 XVector_0.34.0 patchwork_1.1.1
[96] formatR_1.11 MASS_7.3-54 tidyselect_1.2.1 stringi_1.7.8 emdbook_1.3.12
[101] yaml_2.3.6 GOSemSim_2.20.0 locfit_1.5-9.4 latticeExtra_0.6-29 fastmatch_1.1-3
[106] tools_4.1.1 timechange_0.3.0 parallel_4.1.1 rstudioapi_0.17.0 gridExtra_2.3
[111] farver_2.1.1 ggraph_2.0.5 digest_0.6.30 Rcpp_1.0.9 WriteXLS_6.7.0
[116] httr_1.4.7 colorspace_2.0-3 XML_3.99-0.9 truncnorm_1.0-8 splines_4.1.1
[121] yulab.utils_0.0.4 RBGL_1.70.0 tidytree_0.3.6 graphlayouts_0.8.0 multtest_2.50.0
[126] ggplotify_0.1.0 xtable_1.8-4 jsonlite_1.8.9 ggtree_3.2.1 futile.options_1.0.1
[131] tidygraph_1.2.0 ggfun_0.0.4 R6_2.5.1 pillar_1.9.0 htmltools_0.5.2
[136] glue_1.6.2 fastmap_1.1.0 GreyListChIP_1.26.0 mvtnorm_1.1-3 utf8_1.2.2
[141] lattice_0.20-45 numDeriv_2016.8-1.1 curl_4.3.3 gtools_3.9.3 zip_2.2.0
[146] GO.db_3.14.0 survival_3.2-13 rmarkdown_2.27 InteractionSet_1.22.0 munsell_0.5.0
[151] DO.db_2.9 GenomeInfoDbData_1.2.7 gtable_0.3.1
`