Skip to content

Commit c3aaf0a

Browse files
author
Charles Plessy
committed
Merge branch 'clustering_refactoring2' into devel
2 parents 69a4cc0 + a1efc7a commit c3aaf0a

26 files changed

+639
-983
lines changed

DESCRIPTION

+1-3
Original file line numberDiff line numberDiff line change
@@ -15,8 +15,6 @@ Imports:
1515
BSgenome,
1616
CAGEfightR,
1717
data.table,
18-
DelayedArray,
19-
DelayedMatrixStats,
2018
formula.tools,
2119
GenomeInfoDb,
2220
GenomicAlignments,
@@ -81,8 +79,8 @@ Collate:
8179
'AggregationMethods.R'
8280
'CAGEfightR.R'
8381
'CAGEr-package.R'
82+
'Paraclu.R'
8483
'CorrelationMethods.R'
85-
'CumulativeDistributionFunctions.R'
8684
'GetMethods.R'
8785
'CumulativeDistributionMethods.R'
8886
'ExportMethods.R'

NAMESPACE

+5-7
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@ export(CTSScumulativesCC)
1919
export(CTSScumulativesTagClusters)
2020
export(CTSSnormalizedTpmDF)
2121
export(CTSSnormalizedTpmGR)
22-
export(CTSStagCountDA)
2322
export(CTSStagCountDF)
2423
export(CTSStagCountGR)
2524
export(CTSStagCountSE)
@@ -37,6 +36,7 @@ export(consensusClustersGR)
3736
export(consensusClustersSE)
3837
export(consensusClustersTpm)
3938
export(cumulativeCTSSdistribution)
39+
export(distclu)
4040
export(exportToTrack)
4141
export(expressionClasses)
4242
export(findStrandInvaders)
@@ -54,6 +54,7 @@ export(librarySizes)
5454
export(mergeCAGEsets)
5555
export(mergeSamples)
5656
export(normalizeTagCount)
57+
export(paraclu)
5758
export(plotAnnot)
5859
export(plotCorrelation)
5960
export(plotCorrelation2)
@@ -79,8 +80,6 @@ exportMethods(flagByUpstreamSequences)
7980
exportMethods(quickEnhancers)
8081
exportMethods(removeStrandInvaders)
8182
import(BiocGenerics)
82-
import(DelayedArray)
83-
import(DelayedMatrixStats)
8483
import(MultiAssayExperiment)
8584
import(SummarizedExperiment)
8685
import(methods)
@@ -112,35 +111,34 @@ importFrom(GenomicRanges,findOverlaps)
112111
importFrom(GenomicRanges,from_GPos_to_GRanges)
113112
importFrom(GenomicRanges,granges)
114113
importFrom(GenomicRanges,promoters)
115-
importFrom(IRanges,IPos)
116114
importFrom(IRanges,IRanges)
117115
importFrom(IRanges,IRangesList)
118116
importFrom(IRanges,RleList)
119-
importFrom(IRanges,Views)
120117
importFrom(IRanges,extractList)
121118
importFrom(IRanges,reduce)
122-
importFrom(IRanges,viewApply)
123119
importFrom(KernSmooth,bkde2D)
124120
importFrom(Rsamtools,"bamFlag<-")
125121
importFrom(Rsamtools,ScanBamParam)
126122
importFrom(Rsamtools,scanBamFlag)
127123
importFrom(S4Vectors,"%in%")
128124
importFrom(S4Vectors,DataFrame)
129125
importFrom(S4Vectors,List)
126+
importFrom(S4Vectors,Pairs)
130127
importFrom(S4Vectors,Rle)
131128
importFrom(S4Vectors,SimpleList)
132129
importFrom(S4Vectors,decode)
133130
importFrom(S4Vectors,endoapply)
131+
importFrom(S4Vectors,first)
134132
importFrom(S4Vectors,mcols)
135133
importFrom(S4Vectors,queryHits)
136134
importFrom(S4Vectors,runLength)
137135
importFrom(S4Vectors,runValue)
136+
importFrom(S4Vectors,second)
138137
importFrom(S4Vectors,subjectHits)
139138
importFrom(S4Vectors,unstrsplit)
140139
importFrom(SummarizedExperiment,SummarizedExperiment)
141140
importFrom(SummarizedExperiment,assay)
142141
importFrom(SummarizedExperiment,assays)
143-
importFrom(SummarizedExperiment,rowRanges)
144142
importFrom(VGAM,zeta)
145143
importFrom(data.table,data.table)
146144
importFrom(data.table,fread)

NEWS.md

+14-1
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,9 @@ NEW FEATURES
1010
`legend`, `xlab`, `ylab`, `xlim` and `ylim` arguments were removed as this
1111
can be controlled via `ggplot2` functions.
1212
- New `TSSlogo` function wrapping the `ggseqlogo` package.
13+
- New `distclu` and `paraclu` functions that can run directly on `CTSS`
14+
objects. You can use them to test parameters before running the whole
15+
`CAGEexp` object through `clusterCTSS`.
1316

1417
# Changes in version 2.11.1
1518

@@ -22,6 +25,16 @@ BUG FIXES
2225

2326
# Changes in version 2.8.0 (25/10/2023)
2427

28+
BACKWARDS-INCOMPATIBLE CHANGES
29+
30+
- The `CTSStagCountDA` function is removed.
31+
- The _dominant peak_ in `TagClusters` objects is now a `GRanges` object like
32+
in `ConsensusClusters`.
33+
- The `custom` method for tag clustering is removed. It was obsoleted by
34+
the newer `CustomConsensusClusters` function.
35+
- The `exportToTrack` function now exports scores of _tag clusters_ and
36+
_consensus clusters_ instead of setting them to zero.
37+
2538
BUG FIXES
2639

2740
- Correct quantile positions, which were shifted by one base. This bug may
@@ -41,7 +54,7 @@ BUG FIXES
4154
NEW FEATURES
4255

4356
- Allow URLs to files in `getCTSS()` (Fixes #50).
44-
- Accelerated the computation of quantile position by ~20 times.
57+
- Accelerated the computation of cumulative sums and quantile position ~10×.
4558
- New `resetCAGEexp()` function.
4659
- New `flagByUpstreamSequences()` function.
4760
- The `annotateCTSS` and `annotateConsensusClusters` function gain a

R/AggregationMethods.R

+3-24
Original file line numberDiff line numberDiff line change
@@ -144,35 +144,14 @@ setMethod( "aggregateTagClusters", "CAGEr"
144144

145145
# CTSS with score that is sum of all samples
146146
ctss <- CTSScoordinatesGR(CAGEexp_obj)
147-
score(ctss) <- rowSums(CTSSnormalizedTpmDF(CAGEexp_obj) |> DelayedArray::DelayedArray() )
147+
score(ctss) <- rowSums.RleDataFrame(CTSSnormalizedTpmDF(CAGEexp_obj))
148148

149149
# Stop if some TCs do not overlap with any CTSS, because the rest of the code
150150
# is not robust against that.
151151
if (any(countOverlaps(clusters.gr, ctss) == 0))
152152
stop("Some TCs do not overlap any CTSS!")
153153

154-
# See `benchmarks/dominant_ctss.md`.
155-
o <- findOverlaps(clusters.gr, ctss)
156-
157-
rl <- rle(queryHits(o))$length
158-
cluster_start_idx <- cumsum(c(1, head(rl, -1))) # Where each run starts
159-
grouped_scores <- extractList(score(ctss), o)
160-
# grouped_pos <- extractList(pos(ctss), o)
161-
162-
find.dominant.idx <- function (x) {
163-
# which.max is breaking ties by taking the last, but this will give slightly
164-
# different biases on plus an minus strands.
165-
w <- which(x == max(x))
166-
w[ceiling(length(w)/2)]
167-
}
168-
local_max_idx <- sapply(grouped_scores, find.dominant.idx) -1 # Start at zero
169-
global_max_ids <- cluster_start_idx + local_max_idx
170-
# start(clusters.gr) <- min(grouped_pos)
171-
# end (clusters.gr) <- max(grouped_pos)
172-
score(clusters.gr) <- sum(grouped_scores)
173-
clusters.gr$dominant_ctss <- granges(ctss)[subjectHits(o)][global_max_ids]
174-
clusters.gr$tpm.dominant_ctss <- score(ctss) [subjectHits(o)][global_max_ids]
175-
clusters.gr
154+
clusters.gr <- .ctss_summary_for_clusters(ctss, clusters.gr, removeSingletons = FALSE)
176155

177156
names(clusters.gr) <- as.character(clusters.gr)
178157
.ConsensusClusters(clusters.gr)
@@ -190,7 +169,7 @@ setMethod( ".CCtoSE"
190169
rowRanges(se)$cluster <- ranges2names(rowRanges(se), consensus.clusters)
191170

192171
if (tpmThreshold > 0)
193-
se <- se[rowSums(DelayedArray(assays(se)[["normalizedTpmMatrix"]])) > tpmThreshold,]
172+
se <- se[rowSums.RleDataFrame(assays(se)[["normalizedTpmMatrix"]]) > tpmThreshold,]
194173

195174
.rowsumAsMatrix <- function(DF, names) {
196175
# First, remove CTSS that do not match clusters

R/CAGEr.R

+9-4
Original file line numberDiff line numberDiff line change
@@ -186,10 +186,15 @@ setMethod(".filterCtss", "RangedSummarizedExperiment", function (object, thresho
186186
assay <- ifelse(thresholdIsTpm, "normalizedTpmMatrix", "counts")
187187
if(assay == "normalizedTpmMatrix" & is.null(assays(object)[[assay]]))
188188
stop("Normalise the CAGEr object first with ", sQuote("normalizeTagCount()"), ".")
189-
.filterCtss(DelayedArray(assays(object)[[assay]]), threshold, nrPassThreshold, thresholdIsTpm)
189+
.filterCtss(assays(object)[[assay]], threshold, nrPassThreshold, thresholdIsTpm)
190190
})
191191

192-
setMethod(".filterCtss", "DelayedArray", function (object, threshold, nrPassThreshold, thresholdIsTpm) {
193-
nr.pass.threshold <- rowSums(object >= threshold)
194-
Rle(nr.pass.threshold >= min(nrPassThreshold, ncol(object)))
192+
setMethod(".filterCtss", "DataFrame", function (object, threshold, nrPassThreshold, thresholdIsTpm) {
193+
nr.pass.threshold <- rowSums.RleDataFrame(lapply(object, \(x) x > threshold) |> DataFrame())
194+
nr.pass.threshold >= min(nrPassThreshold, ncol(object))
195195
})
196+
197+
setMethod(".filterCtss", "matrix", function (object, threshold, nrPassThreshold, thresholdIsTpm) {
198+
nr.pass.threshold <- rowSums(object > threshold)
199+
nr.pass.threshold >= min(nrPassThreshold, ncol(object))
200+
})

R/CTSS.R

-24
Original file line numberDiff line numberDiff line change
@@ -105,30 +105,6 @@ setMethod("coerce", c("GRanges", "CTSS"),
105105
as(gp, "CTSS")
106106
})
107107

108-
109-
#' @description The `CTSS.chr` class represents a `CTSS` object that is
110-
#' guaranteed to be only on a single chromosome. It is used internally by
111-
#' _CAGEr_ for type-safe polymorphic dispatch.
112-
#'
113-
#' @rdname CTSS-class
114-
#'
115-
#' @import methods
116-
#' @importFrom GenomicRanges GPos
117-
#'
118-
#' @examples
119-
#'
120-
#' # (internal use) Transform CTSS to CTSS.chr object
121-
#' ctss.chr <- as(CTSScoordinatesGR(exampleCAGEexp), "CTSS.chr")
122-
123-
setClass( "CTSS.chr"
124-
, contains = "CTSS"
125-
, validity =
126-
function(object)
127-
if (length(seqlevelsInUse(object)) > 1)
128-
return("Mutiple sequnames found: CTSS.chr objects should be only on a single chromosome.")
129-
)
130-
131-
132108
#' ConsensusClusters
133109
#'
134110
#' The \code{ConsensusClusters} class represents consensus clusters.

0 commit comments

Comments
 (0)