Skip to content

Commit 8ed1da1

Browse files
author
Charles Plessy
committed
Remove the removeSingletons option.
Changing the default value of keepSingletonsAbove from `Inf` to `0` keeps the standard behaviour. The `clusterCTSS()` function had five expression filtering options: - removeSingletons, - keepSingletonsAbove, - threshold, - nrPassThreshold, - thresholdIsTpm I always found it difficult to review code that uses all these options.
1 parent c107007 commit 8ed1da1

11 files changed

+59
-126
lines changed

NEWS.md

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,9 @@ BACKWARDS-INCOMPATIBLE CHANGES
1313
the newer `CustomConsensusClusters` function.
1414
- The `exportToTrack` function now exports scores of _tag clusters_ and
1515
_consensus clusters_ instead of setting them to zero.
16+
- The `removeSingletons` option of clustering methods is removed and the
17+
default value of `keepSingletonsAbove` is set to `0`, which keeps the
18+
standard behavior.
1619

1720
BUG FIXES
1821

R/AggregationMethods.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,7 @@ setMethod( "aggregateTagClusters", "CAGEr"
151151
if (any(countOverlaps(clusters.gr, ctss) == 0))
152152
stop("Some TCs do not overlap any CTSS!")
153153

154-
clusters.gr <- .ctss_summary_for_clusters(ctss, clusters.gr, removeSingletons = FALSE)
154+
clusters.gr <- .ctss_summary_for_clusters(ctss, clusters.gr)
155155

156156
names(clusters.gr) <- as.character(clusters.gr)
157157
.ConsensusClusters(clusters.gr)

R/ClusteringFunctions.R

+4-5
Original file line numberDiff line numberDiff line change
@@ -11,9 +11,9 @@ NULL
1111
#' (clusters <- GRanges('chr1', IRanges(start = c(1,9), end = c(8,10)), '+'))
1212
#'
1313
#' # The function assumes that all CTSSes have a score above zero
14-
#' .ctss_summary_for_clusters(ctss[score(ctss)>0], clusters, removeSingletons = TRUE)
14+
#' .ctss_summary_for_clusters(ctss[score(ctss)>0], clusters, keepSingletonsAbove = Inf)
1515
#' # If not the case, it will give incorrect nr_ctss and fail to remove singletons
16-
#' .ctss_summary_for_clusters(ctss, clusters, removeSingletons = TRUE)
16+
#' .ctss_summary_for_clusters(ctss, clusters, keepSingletonsAbove = Inf)
1717
#'
1818
#' # The function needs its output to be sorted and is not going to check it.
1919
#' .ctss_summary_for_clusters(rev(ctss), clusters)
@@ -23,7 +23,7 @@ NULL
2323
#' # This may create a small bias.
2424
#' .ctss_summary_for_clusters(ctss |> plyranges::mutate(strand = '-'), clusters |> plyranges::mutate(strand = '-'))
2525

26-
.ctss_summary_for_clusters <- function(ctss, clusters, removeSingletons = FALSE, keepSingletonsAbove = Inf) {
26+
.ctss_summary_for_clusters <- function(ctss, clusters, keepSingletonsAbove = 0) {
2727
# Match the clusters and the CTSS
2828
o <- findOverlaps(clusters, ctss)
2929

@@ -60,8 +60,7 @@ NULL
6060
clusters$nr_ctss <- rl
6161

6262
# Remove clusters that match only one CTSS unless their expression is high enough
63-
if(removeSingletons)
64-
clusters <- subset(clusters, clusters$nr_ctss > 1 | score(clusters) >= keepSingletonsAbove)
63+
clusters <- subset(clusters, clusters$nr_ctss > 1 | score(clusters) >= keepSingletonsAbove)
6564

6665
# Give numerical names to the clusters
6766
names(clusters) <- seq_along(clusters)

R/ClusteringMethods.R

+9-16
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,9 @@
2222
#' part of the same cluster. Used only when `method = "distclu"`,
2323
#' otherwise ignored.
2424
#'
25-
#' @param removeSingletons Logical indicating if tag clusters containing only
26-
#' one CTSS be removed.
27-
#'
28-
#' @param keepSingletonsAbove Controls which singleton tag clusters will be
29-
#' removed. When `removeSingletons = TRUE`, only singletons with signal
30-
#' `< keepSingletonsAbove` will be removed. Useful to prevent removing
31-
#' highly supported singleton tag clusters. Default value `Inf` results
32-
#' in removing all singleton TCs when `removeSingletons = TRUE`.
25+
#' @param keepSingletonsAbove Remove "singleton" tag clusters of width 1 with
26+
#' signal `< keepSingletonsAbove`. Default value `0` results in keeping
27+
#' all TCs by default. Setting it to `Inf` removes all singletons.
3328
#'
3429
#' @param minStability Minimal stability of the cluster, where stability is
3530
#' defined as ratio between maximal and minimal density value for which
@@ -58,7 +53,7 @@
5853
#' `"paraclu"` is an implementation of Paraclu algorithm for parametric
5954
#' clustering of data attached to sequences (Frith _et al._, Genome Research,
6055
#' 2007). Since Paraclu finds clusters within clusters (unlike distclu),
61-
#' additional parameters (`removeSingletons`, `keepSingletonsAbove`,
56+
#' additional parameters (`keepSingletonsAbove`,
6257
#' `minStability`, `maxLength` and `reduceToNonoverlapping`) can be specified to
6358
#' simplify the output by discarding too small (singletons) or too big clusters,
6459
#' and to reduce the clusters to a final set of non-overlapping clusters.
@@ -89,13 +84,13 @@
8984
#' # Using 'distclu', notice argument 'maxDist'
9085
#' ce <- clusterCTSS( exampleCAGEexp, threshold = 50, thresholdIsTpm = TRUE
9186
#' , nrPassThreshold = 1, method = "distclu", maxDist = 20
92-
#' , removeSingletons = TRUE, keepSingletonsAbove = 100)
87+
#' , keepSingletonsAbove = 100)
9388
#' tagClustersGR(ce, "Zf.30p.dome")
9489
#'
9590
#' # Using 'paraclu', notice arguments 'maxLength' and 'minStability'
9691
#' ce <- clusterCTSS( exampleCAGEexp, threshold = 50, thresholdIsTpm = TRUE
9792
#' , nrPassThreshold = 1, method = "paraclu"
98-
#' , removeSingletons = TRUE, keepSingletonsAbove = 100
93+
#' , keepSingletonsAbove = 100
9994
#' , maxLength = 500, minStability = 1
10095
#' , reduceToNonoverlapping = TRUE)
10196
#' tagClustersGR(ce, "Zf.30p.dome")
@@ -106,7 +101,7 @@ setGeneric( "clusterCTSS"
106101
, function( object
107102
, threshold = 1, nrPassThreshold = 1, thresholdIsTpm = TRUE
108103
, method = c("distclu", "paraclu"), maxDist = 20
109-
, removeSingletons = FALSE, keepSingletonsAbove = Inf
104+
, keepSingletonsAbove = 0
110105
, minStability = 1, maxLength = 500
111106
, reduceToNonoverlapping = TRUE
112107
, useMulticore = FALSE, nrCores = NULL)
@@ -116,7 +111,7 @@ setGeneric( "clusterCTSS"
116111

117112
setMethod( "clusterCTSS", "CAGEexp"
118113
, function( object, threshold, nrPassThreshold, thresholdIsTpm, method, maxDist
119-
, removeSingletons, keepSingletonsAbove, minStability, maxLength
114+
, keepSingletonsAbove, minStability, maxLength
120115
, reduceToNonoverlapping, useMulticore, nrCores) {
121116

122117
assay <- ifelse(isTRUE(thresholdIsTpm), "normalizedTpmMatrix", "counts")
@@ -137,12 +132,10 @@ setMethod( "clusterCTSS", "CAGEexp"
137132

138133
if (method == "distclu") {
139134
ctss.cluster.list <- distclu( object = data[decode(filteredCTSSidx(object)),]
140-
, max.dist = maxDist, removeSingletons = removeSingletons
141-
, keepSingletonsAbove = keepSingletonsAbove)
135+
, max.dist = maxDist, keepSingletonsAbove = keepSingletonsAbove)
142136
} else if (method == "paraclu") {
143137
ctss.cluster.list <- paraclu( object = data[decode(filteredCTSSidx(object)),]
144138
, minStability = minStability, maxLength = maxLength
145-
, removeSingletons = removeSingletons
146139
, keepSingletonsAbove = keepSingletonsAbove
147140
, reduceToNonoverlapping = reduceToNonoverlapping
148141
, useMulticore = useMulticore, nrCores = nrCores)

R/Distclu.R

+6-23
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,9 @@ NULL
99
#' @param max.dist Maximal distance between two neighbouring CTSSs for them to
1010
#' be part of the same cluster.
1111
#'
12-
#' @param removeSingletons Logical indicating if tag clusters containing only
13-
#' one CTSS be removed.
14-
#'
15-
#' @param keepSingletonsAbove Controls which singleton tag clusters will be
16-
#' removed. When `removeSingletons = TRUE`, only singletons with signal
17-
#' `< keepSingletonsAbove` will be removed. Useful to prevent removing
18-
#' highly supported singleton tag clusters. Default value `Inf` results
19-
#' in removing all singleton TCs when `removeSingletons = TRUE`.
20-
#'
21-
#' @param removeSingletons Logical indicating if tag clusters containing only
22-
#' one CTSS be removed.
23-
#'
24-
#' @param keepSingletonsAbove Controls which singleton tag clusters will be
25-
#' removed. When `removeSingletons = TRUE`, only singletons with signal
26-
#' `< keepSingletonsAbove` will be removed. Useful to prevent removing
27-
#' highly supported singleton tag clusters. Default value `Inf` results
28-
#' in removing all singleton TCs when `removeSingletons = TRUE`.
12+
#' @param keepSingletonsAbove Remove "singleton" tag clusters of width 1 with
13+
#' signal `< keepSingletonsAbove`. Default value `0` results in keeping
14+
#' all TCs by default. Setting it to `Inf` removes all singletons.
2915
#'
3016
#' @family CAGEr clustering methods
3117
#'
@@ -42,14 +28,13 @@ setGeneric("distclu",
4228
function(
4329
object,
4430
max.dist = 20,
45-
removeSingletons = FALSE,
46-
keepSingletonsAbove = Inf
31+
keepSingletonsAbove = 0
4732
) standardGeneric("distclu"))
4833

4934
#' @rdname distclu
5035

5136
setMethod("distclu", "SummarizedExperiment",
52-
function(object, max.dist, removeSingletons, keepSingletonsAbove) {
37+
function(object, max.dist, keepSingletonsAbove) {
5338
ctss.cluster.list <- GRangesList()
5439
for(s in colnames(object)) {
5540
message("\t-> ", s)
@@ -58,16 +43,14 @@ setMethod("distclu", "SummarizedExperiment",
5843
d <- subset(d, score(d) > 0)
5944
ctss.cluster.list[[s]] <-
6045
distclu(d, max.dist = max.dist,
61-
removeSingletons = removeSingletons,
6246
keepSingletonsAbove = keepSingletonsAbove)
6347
}
6448
endoapply(ctss.cluster.list, as, "TagClusters")
6549
})
6650

67-
.distclu_CTSS <- function(object, max.dist, removeSingletons, keepSingletonsAbove) {
51+
.distclu_CTSS <- function(object, max.dist, keepSingletonsAbove) {
6852
clusters <- reduce(GRanges(object), min = max.dist)
6953
clusters <- .ctss_summary_for_clusters(object, clusters,
70-
removeSingletons = removeSingletons,
7154
keepSingletonsAbove = keepSingletonsAbove)
7255
names(clusters) <- seq_along(clusters)
7356
as(clusters, "TagClusters")

R/GetMethods.R

+1-1
Original file line numberDiff line numberDiff line change
@@ -544,7 +544,7 @@ setMethod( "consensusClustersGR", "CAGEexp"
544544
ctss <- CTSScoordinatesGR(object)
545545
score(ctss) <- CTSSnormalizedTpmDF(object) |> rowSums.RleDataFrame()
546546
ctss <- ctss[ctss$filteredCTSSidx]
547-
cc <- .ctss_summary_for_clusters(ctss, cc, removeSingletons = FALSE)
547+
cc <- .ctss_summary_for_clusters(ctss, cc)
548548
if (! is.null(qLow) & ! is.null(qUp)) {
549549
qLowName <- paste0("q_", qLow)
550550
qUpName <- paste0("q_", qUp)

R/Paraclu.R

+11-18
Original file line numberDiff line numberDiff line change
@@ -15,14 +15,9 @@
1515
#' @param maxLength Maximal length of cluster in base-pairs. Clusters with length
1616
#' `> maxLength` will be discarded.
1717
#'
18-
#' @param removeSingletons Logical indicating if tag clusters containing only
19-
#' one CTSS be removed.
20-
#'
21-
#' @param keepSingletonsAbove Controls which singleton tag clusters will be
22-
#' removed. When `removeSingletons = TRUE`, only singletons with signal
23-
#' `< keepSingletonsAbove` will be removed. Useful to prevent removing
24-
#' highly supported singleton tag clusters. Default value `Inf` results
25-
#' in removing all singleton TCs when `removeSingletons = TRUE`.
18+
#' @param keepSingletonsAbove Remove "singleton" tag clusters of width 1 with
19+
#' signal `< keepSingletonsAbove`. Default value `0` results in keeping
20+
#' all TCs by default. Setting it to `Inf` removes all singletons.
2621
#'
2722
#' @param reduceToNonoverlapping Logical, should smaller clusters contained
2823
#' within bigger cluster be removed to make a final set of tag clusters
@@ -47,8 +42,8 @@
4742
#' Running Paraclu on a `CTSS` object dispatches the computation on each strand
4843
#' of each sequence level of the object, collects the `IRanges` and assemble
4944
#' them back in a [`TagClusters`] object after filtering them by size and by
50-
#' expression following the `minStability`, `maxLength`, `removeSingletons`,
51-
#' `keepSingletonsAbove` and `reduceToNonoverlapping` parameters.
45+
#' expression following the `minStability`, `maxLength`, `keepSingletonsAbove`
46+
#' and `reduceToNonoverlapping` parameters.
5247
#'
5348
#' Running Paraclu on a [`RangedSummarizedExperiment`] object will loop on each
5449
#' sample, and return the results as a [`GRangesList`] of `TagClusters`.
@@ -72,7 +67,7 @@
7267
setGeneric("paraclu",
7368
function( object
7469
, minStability = 1, maxLength = 500
75-
, removeSingletons = FALSE, keepSingletonsAbove = Inf
70+
, keepSingletonsAbove = 0
7671
, reduceToNonoverlapping = TRUE
7772
, useMulticore = FALSE, nrCores = NULL)
7873
standardGeneric("paraclu"))
@@ -116,7 +111,7 @@ setGeneric("paraclu",
116111
setMethod("paraclu", "Pairs",
117112
function( object
118113
, minStability = 1, maxLength = 500
119-
, removeSingletons = FALSE, keepSingletonsAbove = Inf
114+
, keepSingletonsAbove = 0
120115
, reduceToNonoverlapping = TRUE
121116
, useMulticore = FALSE, nrCores = NULL) {
122117
if (length(object) == 0) return(IRanges())
@@ -129,7 +124,7 @@ setMethod("paraclu", "Pairs",
129124
setMethod("paraclu", "CTSS",
130125
function( object
131126
, minStability = 1, maxLength = 500
132-
, removeSingletons = FALSE, keepSingletonsAbove = Inf
127+
, keepSingletonsAbove = 0
133128
, reduceToNonoverlapping = TRUE
134129
, useMulticore = FALSE, nrCores = NULL) {
135130
# Sort and remove null ranges
@@ -157,7 +152,6 @@ setMethod("paraclu", "CTSS",
157152
# Compute score and dominant CTSs, and remove singletons as wanted.
158153
clusters <-
159154
.ctss_summary_for_clusters( object, clusters
160-
, removeSingletons = removeSingletons
161155
, keepSingletonsAbove = keepSingletonsAbove)
162156
# Reduce to non-overlapping as wanted
163157
if(reduceToNonoverlapping == TRUE){
@@ -178,12 +172,12 @@ setMethod("paraclu", "CTSS",
178172
setMethod("paraclu", "GRanges",
179173
function( object
180174
, minStability = 1, maxLength = 500
181-
, removeSingletons = FALSE, keepSingletonsAbove = Inf
175+
, keepSingletonsAbove = 0
182176
, reduceToNonoverlapping = TRUE
183177
, useMulticore = FALSE, nrCores = NULL) {
184178
stopifnot(all(width(object) == 1))
185179
paraclu( as(object, "CTSS"), minStability = minStability, maxLength = maxLength
186-
, removeSingletons = removeSingletons, keepSingletonsAbove = keepSingletonsAbove
180+
, keepSingletonsAbove = keepSingletonsAbove
187181
, reduceToNonoverlapping = reduceToNonoverlapping
188182
, useMulticore = useMulticore, nrCores = nrCores)
189183
})
@@ -193,7 +187,7 @@ setMethod("paraclu", "GRanges",
193187
setMethod("paraclu", "SummarizedExperiment",
194188
function( object
195189
, minStability = 1, maxLength = 500
196-
, removeSingletons = FALSE, keepSingletonsAbove = Inf
190+
, keepSingletonsAbove = 0
197191
, reduceToNonoverlapping = TRUE
198192
, useMulticore = FALSE, nrCores = NULL) {
199193

@@ -205,7 +199,6 @@ setMethod("paraclu", "SummarizedExperiment",
205199
tag.cluster.list[[s]] <-
206200
paraclu( gr
207201
, minStability = minStability, maxLength = maxLength
208-
, removeSingletons = removeSingletons
209202
, keepSingletonsAbove = keepSingletonsAbove
210203
, reduceToNonoverlapping = reduceToNonoverlapping
211204
, useMulticore = useMulticore, nrCores = nrCores)

man/clusterCTSS.Rd

+8-15
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)