|
1 | 1 | #' @include CTSS.R Multicore.R
|
2 | 2 |
|
3 |
| -#' @name clusterCTSS |
4 |
| -#' |
5 |
| -#' @title Cluster CTSSs into tag clusters |
6 |
| -#' |
7 |
| -#' @description Clusters individual CAGE transcription start sites (CTSSs) along |
8 |
| -#' the genome into tag clusters (TCs) using specified _ab initio_ method, or |
9 |
| -#' assigns them to predefined genomic regions. |
10 |
| -#' |
11 |
| -#' @param object A [`CAGEr`] object. |
12 |
| -#' |
13 |
| -#' @param threshold,nrPassThreshold Ignore CTSSs with signal `< threshold` |
14 |
| -#' in `< nrPassThreshold` experiments. |
15 |
| -#' |
16 |
| -#' @param thresholdIsTpm Logical indicating if `threshold` is expressed in |
17 |
| -#' raw tag counts (`FALSE`) or normalized signal (`TRUE`). |
18 |
| -#' |
19 |
| -#' @param method Clustering method: `"distclu"` or `"paraclu"`. |
20 |
| -#' |
21 |
| -#' @param maxDist Maximal distance between two neighbouring CTSSs for them to be |
22 |
| -#' part of the same cluster. Used only when `method = "distclu"`, |
23 |
| -#' otherwise ignored. |
24 |
| -#' |
25 |
| -#' @param keepSingletonsAbove Remove "singleton" tag clusters of width 1 with |
26 |
| -#' signal `< keepSingletonsAbove`. Default value `0` results in keeping |
27 |
| -#' all TCs by default. Setting it to `Inf` removes all singletons. |
28 |
| -#' |
29 |
| -#' @param minStability Minimal stability of the cluster, where stability is |
30 |
| -#' defined as ratio between maximal and minimal density value for which |
31 |
| -#' this cluster is maximal scoring. For definition of stability refer to |
32 |
| -#' Frith _et al._, Genome Research, 2007. Clusters with stability |
33 |
| -#' `< minStability` will be discarded. Used only when `method = "paraclu"`. |
34 |
| -#' |
35 |
| -#' @param maxLength Maximal length of cluster in base-pairs. Clusters with length |
36 |
| -#' `> maxLength` will be discarded. |
37 |
| -#' |
38 |
| -#' @param reduceToNonoverlapping Logical, should smaller clusters contained |
39 |
| -#' within bigger cluster be removed to make a final set of tag clusters |
40 |
| -#' non-overlapping. Used only `method = "paraclu"`. |
41 |
| -#' |
42 |
| -#' @param useMulticore Logical, should multicore be used. `useMulticore = TRUE` |
43 |
| -#' has no effect on non-Unix-like platforms. |
44 |
| -#' |
45 |
| -#' @param nrCores Number of cores to use when `useMulticore = TRUE`. Default |
46 |
| -#' value `NULL` uses all detected cores. |
47 |
| -#' |
48 |
| -#' @details The `"distclu"` method is an implementation of simple distance-based |
49 |
| -#' clustering of data attached to sequences, where two neighbouring TSSs are |
50 |
| -#' joined together if they are closer than some specified distance (see |
51 |
| -#' [`GenomicRanges::reduce`] for implementation details. |
52 |
| -#' |
53 |
| -#' `"paraclu"` is an implementation of Paraclu algorithm for parametric |
54 |
| -#' clustering of data attached to sequences (Frith _et al._, Genome Research, |
55 |
| -#' 2007). Since Paraclu finds clusters within clusters (unlike distclu), |
56 |
| -#' additional parameters (`keepSingletonsAbove`, |
57 |
| -#' `minStability`, `maxLength` and `reduceToNonoverlapping`) can be specified to |
58 |
| -#' simplify the output by discarding too small (singletons) or too big clusters, |
59 |
| -#' and to reduce the clusters to a final set of non-overlapping clusters. |
60 |
| -#' |
61 |
| -#' Clustering is done for every CAGE dataset within the CAGEr object separately, |
62 |
| -#' resulting in a different set of tag clusters for every CAGE dataset. TCs from |
63 |
| -#' different datasets can further be aggregated into a single referent set of |
64 |
| -#' consensus clusters by calling the [`aggregateTagClusters`] function. |
65 |
| -#' |
66 |
| -#' @return Returns the [`CAGEexp`] object, in which, the results will be stored as a `GRangesList` of |
67 |
| -#' [`TagClusters`] objects in the metadata slot `tagClusters`. The |
68 |
| -#' `TagClusters` objects will contain a `filteredCTSSidx` column if appropriate. |
69 |
| -#' The clustering method name is saved in the metadata slot of the `GRangesList`. |
70 |
| -#' |
71 |
| -#' @references Frith _et al._ (2007) A code for transcription initiation in |
72 |
| -#' mammalian genomes, _Genome Research_ **18**(1):1-12, |
73 |
| -#' (\href{http://www.cbrc.jp/paraclu/}{http://www.cbrc.jp/paraclu/}). |
74 |
| -#' |
75 |
| -#' @author Vanja Haberle |
76 |
| -#' |
77 |
| -#' @seealso [`aggregateTagClusters`] |
78 |
| -#' |
79 |
| -#' @family CAGEr object modifiers |
80 |
| -#' @family CAGEr clusters functions |
81 |
| -#' |
82 |
| -#' @examples |
83 |
| -#' |
84 |
| -#' # Using 'distclu', notice argument 'maxDist' |
85 |
| -#' ce <- clusterCTSS( exampleCAGEexp, threshold = 50, thresholdIsTpm = TRUE |
86 |
| -#' , nrPassThreshold = 1, method = "distclu", maxDist = 20 |
87 |
| -#' , keepSingletonsAbove = 100) |
88 |
| -#' tagClustersGR(ce, "Zf.30p.dome") |
89 |
| -#' |
90 |
| -#' # Using 'paraclu', notice arguments 'maxLength' and 'minStability' |
91 |
| -#' ce <- clusterCTSS( exampleCAGEexp, threshold = 50, thresholdIsTpm = TRUE |
92 |
| -#' , nrPassThreshold = 1, method = "paraclu" |
93 |
| -#' , keepSingletonsAbove = 100 |
94 |
| -#' , maxLength = 500, minStability = 1 |
95 |
| -#' , reduceToNonoverlapping = TRUE) |
96 |
| -#' tagClustersGR(ce, "Zf.30p.dome") |
97 |
| -#' |
98 |
| -#' @export |
99 |
| - |
100 |
| -setGeneric( "clusterCTSS" |
101 |
| - , function( object |
102 |
| - , threshold = 1, nrPassThreshold = 1, thresholdIsTpm = TRUE |
103 |
| - , method = c("distclu", "paraclu"), maxDist = 20 |
104 |
| - , keepSingletonsAbove = 0 |
105 |
| - , minStability = 1, maxLength = 500 |
106 |
| - , reduceToNonoverlapping = TRUE |
107 |
| - , useMulticore = FALSE, nrCores = NULL) |
108 |
| - standardGeneric("clusterCTSS")) |
109 |
| - |
110 |
| -#' @rdname clusterCTSS |
111 |
| - |
112 |
| -setMethod( "clusterCTSS", "CAGEexp" |
113 |
| - , function( object, threshold, nrPassThreshold, thresholdIsTpm, method, maxDist |
114 |
| - , keepSingletonsAbove, minStability, maxLength |
115 |
| - , reduceToNonoverlapping, useMulticore, nrCores) { |
116 |
| - |
117 |
| - assay <- ifelse(isTRUE(thresholdIsTpm), "normalizedTpmMatrix", "counts") |
118 |
| - data <- CTSStagCountSE(object) |
119 |
| - |
120 |
| - if (! "normalizedTpmMatrix" %in% assayNames(data)) |
121 |
| - stop( "Could not find normalized CAGE signal values, see ?normalizeTagCount.\n" |
122 |
| - , "clusterCTSS() needs normalized values to create its output tables, that " |
123 |
| - , "include TPM expression columns.") |
124 |
| - |
125 |
| - message("\nFiltering out CTSSs below threshold...") |
126 |
| - filteredCTSSidx(object) <- |
127 |
| - .filterCtss(data, threshold = threshold |
128 |
| - , nrPassThreshold = nrPassThreshold, thresholdIsTpm = thresholdIsTpm) |
129 |
| - |
130 |
| - message("Clustering...") |
131 |
| - method <- match.arg(method) |
132 |
| - |
133 |
| - if (method == "distclu") { |
134 |
| - ctss.cluster.list <- distclu( object = data[decode(filteredCTSSidx(object)),] |
135 |
| - , max.dist = maxDist, keepSingletonsAbove = keepSingletonsAbove) |
136 |
| - } else if (method == "paraclu") { |
137 |
| - ctss.cluster.list <- paraclu( object = data[decode(filteredCTSSidx(object)),] |
138 |
| - , minStability = minStability, maxLength = maxLength |
139 |
| - , keepSingletonsAbove = keepSingletonsAbove |
140 |
| - , reduceToNonoverlapping = reduceToNonoverlapping |
141 |
| - , useMulticore = useMulticore, nrCores = nrCores) |
142 |
| - } else if(method == "custom") { |
143 |
| - stop("Deprecated method. See ", dQuote("CustomConsensusClusters()"), " instead.") |
144 |
| - } |
145 |
| - |
146 |
| - seqlevels(ctss.cluster.list) <- seqlevels(CTSStagCountSE(object)) |
147 |
| - seqinfo(ctss.cluster.list) <- seqinfo(CTSStagCountSE(object)) |
148 |
| - # Changing the sequence levels may change the sort order. Re-sort |
149 |
| - ctss.cluster.list <- sort(ctss.cluster.list) |
150 |
| - metadata(object)$tagClusters <- ctss.cluster.list |
151 |
| - object |
152 |
| -}) |
153 |
| - |
154 | 3 | #' @rdname byCtss
|
155 | 4 | #'
|
156 | 5 | #' @title Apply functions to identical CTSSes.
|
|
0 commit comments