There you go, I think that it solved the major error, there are a few warnings that need to be checked (I have not the skills to interpret them.
Please share your opinion, I will work my way to the rest of the walkthrough and let you know if it worked.
> bamFilePath<-"/media/trotos/4TB/WORK/archive/fastq_files/bwa_mem_Q30"
> primerFile<-"/media/trotos/4TB/WORK/archive/fastq_files/bwa_mem_Q30/primers"
> metaData19 <- list(projectPath = "/media/trotos/4TB/WORK/archive/fastq_files/bwa_mem_Q30", fragmentDir = "/media/trotos/4TB/WORK/archive/fastq_files/bwa_mem_Q30", referenceGenomeFile=referenceGenomeFile19, reSequence1 = "RAATTY",reSequence2 = "GATC",primerFile = primerFile,bamFilePath = bamFilePath)
> colData <- DataFrame(viewpoint = "Smad4a",condition = factor(rep(c("Q30","Q30_c1","Q30_uniq_srt"),each=2),levels = c("Q30","Q30_c1","Q30_uniq_srt")), replicate=rep(c(1,2),3), bamFile = c("SAMD4a_0min_rep2_mem_sorted_Q30.bam","SAMD4a_0min_rep3_mem_sorted_Q30.bam","SAMD4a_0min_rep2_mem_sorted_Q30_c1.bam","SAMD4a_0min_rep3_mem_sorted_Q30_c1.bam","SAMD4a_0min_rep2_mem_sorted_Q30_uniq_srt.bam","SAMD4a_0min_rep3_mem_sorted_Q30_uniq_srt.bam"),sequencingPrimer="first")
> fc<-FourC(colData,metaData19)
>
> fc<-addFragments(fc)
> rowRanges(fc)
GRanges object with 2935389 ranges and 4 metadata columns:
seqnames ranges strand | leftSize rightSize
<Rle> <IRanges> <Rle> | <numeric> <numeric>
[1] chrM [ 1, 288] * | 0 284
[2] chrM [ 403, 2051] * | 339 819
[3] chrM [2162, 2804] * | 189 450
[4] chrM [2811, 3111] * | 87 43
[5] chrM [3390, 4121] * | 270 424
... ... ... ... ... ... ...
[2935385] chrY [59351339, 59352173] * | 440 253
[2935386] chrY [59352180, 59353908] * | 110 1403
[2935387] chrY [59353915, 59356216] * | 301 1439
[2935388] chrY [59356223, 59360938] * | 763 211
[2935389] chrY [59360996, 59373566] * | 570 11997
leftValid rightValid
<logical> <logical>
[1] 0 1
[2] 1 1
[3] 1 1
[4] 1 1
[5] 1 1
... ... ...
[2935385] 1 1
[2935386] 1 1
[2935387] 1 1
[2935388] 1 1
[2935389] 1 1
-------
seqinfo: 25 sequences from an unspecified genome
> colData(fc18_sam)$chr = "chr14"
Error in colData(fc18_sam)$chr = "chr14" :
object 'fc18_sam' not found
> colData(fc)$chr = "chr14"
> colData(fc)$start =54565826
> colData(fc)$end =54566537
> traceback()
No traceback available
> debug(countFragmentOverlaps)
> countFragmentOverlaps(fc)
debugging in: countFragmentOverlaps(fc)
debug: {
stopifnot(class(object) == "FourC")
if (length(rowRanges(object)) == 0)
stop("Add fragments before calling 'findViewpointFragments'")
cat("reading bam files\n")
bamFiles = file.path(metadata(object)$bamFilePath, colData(object)$bamFile)
colData(object)$originalReads = sapply(bamFiles, function(bamFile) countBam(bamFile)$records)
reads = lapply(bamFiles, function(bamfile) {
what <- c("mapq")
flag <- scanBamFlag(isUnmappedQuery = FALSE)
param <- ScanBamParam(what = what, flag = flag)
readGAlignments(bamfile, param = param)
})
colData(object)$rawReads = sapply(reads, length)
if (minMapq >= 0) {
reads = lapply(reads, function(rga, minMapq) {
if (!any(is.na(mcols(rga)$mapq))) {
return(rga[mcols(rga)$mapq > minMapq])
}
warning("mapq quality filter could not be applied due to NA values in the mapq quality scores. All reads are used for counting overlaps.")
rga
}, minMapq = minMapq)
colData(object)$lowQualityReads = colData(object)$rawReads -
sapply(reads, length)
}
reads = lapply(reads, granges)
if (trim > 0) {
reads = lapply(reads, function(gr, trim) {
start(gr[strand(gr) == "+"]) = start(gr[strand(gr) ==
"+"]) + trim
end(gr[strand(gr) == "-"]) = end(gr[strand(gr) ==
"-"]) - trim
gr
}, trim = trim)
}
cat("calculating overlaps\n")
frag <- rowRanges(object)
strand(frag) <- "+"
countsLeftFragmentEnd <- sapply(reads, countOverlaps, query = frag,
type = c("start"), maxgap = shift)
strand(frag) <- "-"
countsRightFragmentEnd <- sapply(reads, countOverlaps, query = frag,
type = c("end"), maxgap = shift)
colData(object)$mappedReads = apply(countsLeftFragmentEnd,
2, sum) + apply(countsRightFragmentEnd, 2, sum)
colData(object)$mappingRatio = colData(object)$mappedReads/(colData(object)$rawReads -
colData(object)$lowQualityReads)
metaDataFrame <- DataFrame(type = rep("countInfo", 5), description = rep("",
5))
idx <- colnames(colData(object)) %in% c("originalReads",
"rawReads", "lowQualityReads", "mappedReads", "mappingRatio")
mcols(colData(object))[idx, ] <- metaDataFrame
assays(object) <- SimpleList(countsLeftFragmentEnd = countsLeftFragmentEnd,
countsRightFragmentEnd = countsRightFragmentEnd)
assays(object) <- SimpleList(countsLeftFragmentEnd = countsLeftFragmentEnd,
countsRightFragmentEnd = countsRightFragmentEnd)
object
}
Browse[2]>
debug: stopifnot(class(object) == "FourC")
Browse[2]>
debug: if (length(rowRanges(object)) == 0) stop("Add fragments before calling 'findViewpointFragments'")
Browse[2]>
debug: cat("reading bam files\n")
Browse[2]>
reading bam files
debug: bamFiles = file.path(metadata(object)$bamFilePath, colData(object)$bamFile)
Browse[2]>
debug: colData(object)$originalReads = sapply(bamFiles, function(bamFile) countBam(bamFile)$records)
Browse[2]>
debug: reads = lapply(bamFiles, function(bamfile) {
what <- c("mapq")
flag <- scanBamFlag(isUnmappedQuery = FALSE)
param <- ScanBamParam(what = what, flag = flag)
readGAlignments(bamfile, param = param)
})
Browse[2]>
debug: colData(object)$rawReads = sapply(reads, length)
Browse[2]>
debug: if (minMapq >= 0) {
reads = lapply(reads, function(rga, minMapq) {
if (!any(is.na(mcols(rga)$mapq))) {
return(rga[mcols(rga)$mapq > minMapq])
}
warning("mapq quality filter could not be applied due to NA values in the mapq quality scores. All reads are used for counting overlaps.")
rga
}, minMapq = minMapq)
colData(object)$lowQualityReads = colData(object)$rawReads -
sapply(reads, length)
}
Browse[2]>
debug: reads = lapply(reads, function(rga, minMapq) {
if (!any(is.na(mcols(rga)$mapq))) {
return(rga[mcols(rga)$mapq > minMapq])
}
warning("mapq quality filter could not be applied due to NA values in the mapq quality scores. All reads are used for counting overlaps.")
rga
}, minMapq = minMapq)
Browse[2]>
debug: colData(object)$lowQualityReads = colData(object)$rawReads -
sapply(reads, length)
Browse[2]>
debug: reads = lapply(reads, granges)
Browse[2]>
debug: if (trim > 0) {
reads = lapply(reads, function(gr, trim) {
start(gr[strand(gr) == "+"]) = start(gr[strand(gr) ==
"+"]) + trim
end(gr[strand(gr) == "-"]) = end(gr[strand(gr) == "-"]) -
trim
gr
}, trim = trim)
}
Browse[2]>
debug: cat("calculating overlaps\n")
Browse[2]>
calculating overlaps
debug: frag <- rowRanges(object)
Browse[2]>
debug: strand(frag) <- "+"
Browse[2]>
debug: countsLeftFragmentEnd <- sapply(reads, countOverlaps, query = frag,
type = c("start"), maxgap = shift)
Browse[2]>
debug: strand(frag) <- "-"
Browse[2]>
debug: countsRightFragmentEnd <- sapply(reads, countOverlaps, query = frag,
type = c("end"), maxgap = shift)
Browse[2]>
debug: colData(object)$mappedReads = apply(countsLeftFragmentEnd, 2,
sum) + apply(countsRightFragmentEnd, 2, sum)
Browse[2]>
debug: colData(object)$mappingRatio = colData(object)$mappedReads/(colData(object)$rawReads -
colData(object)$lowQualityReads)
Browse[2]>
debug: metaDataFrame <- DataFrame(type = rep("countInfo", 5), description = rep("",
5))
Browse[2]>
debug: idx <- colnames(colData(object)) %in% c("originalReads", "rawReads",
"lowQualityReads", "mappedReads", "mappingRatio")
Browse[2]>
debug: mcols(colData(object))[idx, ] <- metaDataFrame
Browse[2]>
debug: assays(object) <- SimpleList(countsLeftFragmentEnd = countsLeftFragmentEnd,
countsRightFragmentEnd = countsRightFragmentEnd)
Browse[2]>
debug: assays(object) <- SimpleList(countsLeftFragmentEnd = countsLeftFragmentEnd,
countsRightFragmentEnd = countsRightFragmentEnd)
Browse[2]>
debug: object
Browse[2]>
exiting from: countFragmentOverlaps(fc)
class: FourC
dim: 2935389 6
metadata(7): projectPath fragmentDir ... primerFile bamFilePath
assays(2): countsLeftFragmentEnd countsRightFragmentEnd
rownames: NULL
rowRanges metadata column names(4): leftSize rightSize leftValid
rightValid
colnames(6): Smad4a_Q30_1 Smad4a_Q30_2 ... Smad4a_Q30_uniq_srt_1
Smad4a_Q30_uniq_srt_2
colData names(13): viewpoint condition ... mappedReads mappingRatio
There were 12 warnings (use warnings() to see them)
> warnings()
Warning messages:
1: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
2: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
3: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
4: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
5: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
6: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
7: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
8: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
9: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
10: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
11: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
12: In countOverlaps_GenomicRanges(query, subject, maxgap = maxgap, ... :
With the new findOverlaps implementation based on Nested Containment
Lists, 'maxgap' has no special meaning when 'type' is "start", "end",
"within", or "equal". Note that this is a change with respect to the
old findOverlaps implementation based on Interval Trees. See ?NCList
for more information about this change and other differences between
the new and old algorithms.
>
More details on the error
and final my bam files
I also pasting the first 30 lines of the sam files