|
1 | 1 | #' @include CAGEexp.R CTSS.R
|
2 | 2 | NULL
|
3 | 3 |
|
4 |
| -#' @noRd |
| 4 | +#' Summarise CTSSs included in clusters |
| 5 | +#' |
| 6 | +#' @param ctss A [`CTSS`] object. |
| 7 | +#' |
| 8 | +#' @param clusters A [`TagClusters`], [`ConsensusClusters`] or any other |
| 9 | +#' object implementing the [`GRanges`] class. |
| 10 | +#' |
| 11 | +#' @return The `clusters` object with a new `dominant_CTSS` metadata in `CTSS` |
| 12 | +#' format reporting the genomic coordinate and expression score of most |
| 13 | +#' highly expressed position in each cluster, plus a `nr_ctss` metadata reporting |
| 14 | +#' the number of expressed CTSSs in each cluster. |
5 | 15 | #'
|
6 | 16 | #' @importFrom S4Vectors queryHits subjectHits runLength runValue
|
| 17 | +#' @export |
7 | 18 | #'
|
8 | 19 | #' @examples
|
9 | 20 | #' # See also benchmarks/dominant_ctss.md
|
10 |
| -#' (ctss <- GRanges('chr1', IRanges(start = 1:10, end = 1:10), '+', score = c(1, 0, 0, 1, 2, 0, 2, 1, 0, 1))) |
11 |
| -#' (clusters <- GRanges('chr1', IRanges(start = c(1,9), end = c(8,10)), '+')) |
| 21 | +#' (ctss <- CTSS( 'chr1', IRanges(start = 1:10, end = 1:10) |
| 22 | +#' , '+', score = c(1, 0, 0, 1, 2, 0, 2, 1, 0, 1))) |
| 23 | +#' (clusters <- GRanges( 'chr1', IRanges(start = c(1,9) |
| 24 | +#' , end = c(8,10)), '+')) |> as("TagClusters") |
12 | 25 | #'
|
13 | 26 | #' # The function assumes that all CTSSes have a score above zero
|
14 |
| -#' .ctss_summary_for_clusters(ctss[score(ctss)>0], clusters, keepSingletonsAbove = Inf) |
| 27 | +#' .ctss_summary_for_clusters(ctss[score(ctss)>0], clusters) |
15 | 28 | #' # If not the case, it will give incorrect nr_ctss and fail to remove singletons
|
16 |
| -#' .ctss_summary_for_clusters(ctss, clusters, keepSingletonsAbove = Inf) |
| 29 | +#' .ctss_summary_for_clusters(ctss, clusters) |
17 | 30 | #'
|
18 | 31 | #' # The function needs its output to be sorted and is not going to check it.
|
19 | 32 | #' .ctss_summary_for_clusters(rev(ctss), clusters)
|
20 | 33 | #' .ctss_summary_for_clusters(ctss, rev(clusters))
|
21 | 34 | #'
|
22 | 35 | #' # Ties are resolved with 5' preference for both plus and minus strands.
|
23 | 36 | #' # This may create a small bias.
|
24 |
| -#' .ctss_summary_for_clusters(ctss |> plyranges::mutate(strand = '-'), clusters |> plyranges::mutate(strand = '-')) |
| 37 | +#' ctss_minus <- ctss |
| 38 | +#' strand(ctss_minus) <- '-' |
| 39 | +#' clusters_minus <- clusters |
| 40 | +#' strand(clusters_minus) <- '-' |
| 41 | +#' .ctss_summary_for_clusters(ctss_minus, clusters_minus) |
25 | 42 |
|
26 |
| -.ctss_summary_for_clusters <- function(ctss, clusters, keepSingletonsAbove = 0) { |
| 43 | +.ctss_summary_for_clusters <- function(ctss, clusters) { |
27 | 44 | # Match the clusters and the CTSS
|
28 | 45 | o <- findOverlaps(clusters, ctss)
|
29 | 46 |
|
|
47 | 64 | # Find absolute position of dominant CTSS in each run.
|
48 | 65 | global_max_ids <- cluster_start_idx + local_max_idx - 1
|
49 | 66 |
|
50 |
| - # Record dominant CTSS as GRanges object. |
51 |
| - clusters$dominant_ctss <- granges(ctss)[subjectHits(o)][global_max_ids] |
52 |
| - |
53 |
| - # Record dominant CTSS score. Mabye we should use its GRanges's score instead. |
54 |
| - clusters$tpm.dominant_ctss <- score(ctss)[subjectHits(o)][global_max_ids] |
55 |
| - |
56 | 67 | # Record total expression of the cluster
|
57 | 68 | score(clusters) <- Rle(sum(grouped_scores))
|
| 69 | + |
| 70 | + # Record dominant CTSS as CTSS object. |
| 71 | + clusters$dominant_ctss <- CTSS(granges(ctss)[subjectHits(o)][global_max_ids]) |
| 72 | + |
| 73 | + # Record dominant CTSS score. |
| 74 | + score(clusters$dominant_ctss) <- score(ctss)[subjectHits(o)][global_max_ids] |
58 | 75 |
|
59 | 76 | # Count the number of clusters
|
60 | 77 | clusters$nr_ctss <- rl
|
61 | 78 |
|
62 |
| - # Remove clusters that match only one CTSS unless their expression is high enough |
63 |
| - clusters <- subset(clusters, clusters$nr_ctss > 1 | score(clusters) >= keepSingletonsAbove) |
64 |
| - |
65 | 79 | # Give numerical names to the clusters
|
66 | 80 | names(clusters) <- seq_along(clusters)
|
67 | 81 |
|
|
0 commit comments