Skip to content

Commit 7f85aba

Browse files
Refactor annotation code to expand use of TxDB objects
1 parent c91b465 commit 7f85aba

File tree

4 files changed

+82
-71
lines changed

4 files changed

+82
-71
lines changed

NEWS.md

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
11
# Upcoming changes in version 2.11.4
22

3+
BACKWARDS-INCOMPATIBLE CHANGES
4+
5+
- The `range` argument of the `annotateCTSS` function is renamed `annot`.
6+
37
NEW FEATURES
48

59
- Support the use of `TxDB` objects for annotating clusters.

R/Annotations.R

+57-59
Original file line numberDiff line numberDiff line change
@@ -408,15 +408,21 @@ msScope_annotation <- function(libs) {
408408
#'
409409
#' @param object `CAGEexp` object.
410410
#'
411-
#' @param ranges A [`GRanges`] object, optionally containing `gene_name`,
412-
#' `type` and `transcript_type` metadata.
411+
#' @param annot A [`GRanges`] or a [`TxDb`] object representing the genome
412+
#' annotation. See details for the `GRanges` object.
413413
#'
414414
#' @param upstream Number of bases _upstream_ the start of the transcript models
415415
#' to be considered as part of the _promoter region_.
416416
#'
417417
#' @param downstream Number of bases _downstream_ the start of the transcript
418418
#' models to be considered as part of the _promoter region_.
419-
#'
419+
#'
420+
#' @details
421+
#' If the annotation is a [`GRanges`], gene names will be extracted from the
422+
#' `gene_name` metadata, the `transcript_type` metadata will be used to filter
423+
#' out entries that do not have promoters (such as immunogloblulin VDJ segments),
424+
#' and the `type` metadata is used to extract positions of introns and exons.
425+
#'
420426
#' @return `annotateCTSS` returns the input object with the following
421427
#' modifications:
422428
#'
@@ -444,14 +450,14 @@ msScope_annotation <- function(libs) {
444450
#'
445451
#' @export
446452

447-
setGeneric("annotateCTSS", function(object, ranges, upstream=500, downstream=500)
453+
setGeneric("annotateCTSS", function(object, annot, upstream=500, downstream=500)
448454
standardGeneric("annotateCTSS"))
449455

450456
#' @rdname annotateCTSS
451457

452-
setMethod("annotateCTSS", c("CAGEexp", "GRanges"), function (object, ranges, upstream=500, downstream=500){
453-
CTSScoordinatesGR(object)$genes <- ranges2genes(CTSScoordinatesGR(object), ranges)
454-
CTSScoordinatesGR(object)$annotation <- ranges2annot(CTSScoordinatesGR(object), ranges, upstream, downstream)
458+
setMethod("annotateCTSS", c("CAGEexp", "GRanges"), function (object, annot, upstream=500, downstream=500){
459+
CTSScoordinatesGR(object)$genes <- ranges2genes(CTSScoordinatesGR(object), annot)
460+
CTSScoordinatesGR(object)$annotation <- ranges2annot(CTSScoordinatesGR(object), annot, upstream, downstream)
455461

456462
annot <- sapply( CTSStagCountDF(object)
457463
, function(X) tapply(X, CTSScoordinatesGR(object)$annotation, sum))
@@ -463,20 +469,32 @@ setMethod("annotateCTSS", c("CAGEexp", "GRanges"), function (object, ranges, ups
463469

464470
#' @rdname annotateCTSS
465471

466-
setMethod("annotateCTSS", c("CAGEexp", "TxDb"), function (object, ranges){
467-
g <- genes(ranges)
468-
g$gene_name <- g$gene_id
469-
annotateCTSS(object, g)
470-
CTSScoordinatesGR(object)$genes <- ranges2genes(CTSScoordinatesGR(object), g)
471-
CTSScoordinatesGR(object)$annotation <- txdb2annot(CTSScoordinatesGR(object), ranges)
472-
473-
annot <- sapply( CTSStagCountDF(object)
474-
, function(X) tapply(X, CTSScoordinatesGR(object)$annotation, sum))
475-
colData(object)[levels(CTSScoordinatesGR(object)$annotation)] <- DataFrame(t(annot))
476-
validObject(object)
477-
object
472+
setMethod("annotateCTSS", c("CAGEexp", "TxDb"), function (object, annot){
473+
annotateCTSS(object, .txdb2annotranges(annot))
478474
})
479475

476+
#' @name .txdb2annotranges
477+
#'
478+
#' @noRd
479+
#'
480+
#' @importFrom GenomicFeatures promoters exons transcripts
481+
#'
482+
#' @examples
483+
#' txdb_file <- system.file("extdata", "hg19_knownGene_sample.sqlite",
484+
#' package="GenomicFeatures")
485+
#' txdb <- loadDb(txdb_file)
486+
#' .txdb2annotranges(txdb)
487+
488+
.txdb2annotranges <- function(txdb) {
489+
t <- transcripts(txdb, columns=c("gene_id"))
490+
t$type <- "transcript"
491+
e <- exons(txdb, columns=c("gene_id"))
492+
e$type <- "exon"
493+
annot <- c(t,e)
494+
annot$gene_name <- annot$gene_id
495+
annot
496+
}
497+
480498
#' `annotateTagClusters` annotates the _tag clusters_ of a _CAGEr_ object.
481499
#'
482500
#' @return `annotateTagClusters` returns the input object with the same
@@ -489,24 +507,29 @@ setMethod("annotateCTSS", c("CAGEexp", "TxDb"), function (object, ranges){
489507
#' @export
490508
#' @rdname annotateCTSS
491509

492-
setGeneric("annotateTagClusters", function(object, ranges, upstream=500, downstream=500)
510+
setGeneric("annotateTagClusters", function(object, annot, upstream=500, downstream=500)
493511
standardGeneric("annotateTagClusters"))
494512

495513
#' @rdname annotateCTSS
496514

497-
setMethod("annotateTagClusters", c("CAGEexp", "GRanges"), function (object, ranges, upstream=500, downstream=500){
515+
setMethod("annotateTagClusters", c("CAGEexp", "GRanges"), function (object, annot, upstream=500, downstream=500){
498516
if(is.null(experiments(object)$tagCountMatrix))
499517
stop("Input does not contain CTSS expressiond data, see ", dQuote("getCTSS()"), ".")
500518
tagClustersGR(object) <- endoapply(tagClustersGR(object), function (gr) {
501-
gr$annotation <- ranges2annot(gr, ranges, upstream, downstream)
502-
if(!is.null(ranges$gene_name))
503-
gr$genes <- ranges2genes(gr, ranges)
519+
gr$annotation <- ranges2annot(gr, annot, upstream, downstream)
520+
if(!is.null(annot$gene_name))
521+
gr$genes <- ranges2genes(gr, annot)
504522
gr
505523
})
506524
validObject(object)
507525
object
508526
})
509527

528+
#' @rdname annotateCTSS
529+
530+
setMethod("annotateTagClusters", c("CAGEexp", "TxDb"), function (object, annot){
531+
annotateTagClusters(object, .txdb2annotranges(annot))
532+
})
510533

511534
#' @name annotateConsensusClusters
512535
#'
@@ -524,21 +547,24 @@ setMethod("annotateTagClusters", c("CAGEexp", "GRanges"), function (object, rang
524547
#'
525548
#' @export
526549

527-
setGeneric("annotateConsensusClusters", function(object, ranges, upstream=500, downstream=500)
550+
setGeneric("annotateConsensusClusters", function(object, annot, upstream=500, downstream=500)
528551
standardGeneric("annotateConsensusClusters"))
529552

530553
#' @rdname annotateCTSS
531554

532-
setMethod("annotateConsensusClusters", c("CAGEexp", "GRanges"), function (object, ranges, upstream=500, downstream=500){
533-
if(is.null(experiments(object)$tagCountMatrix))
534-
stop("Input does not contain CTSS expressiond data, see ", dQuote("getCTSS()"), ".")
535-
consensusClustersGR(object)$annotation <- ranges2annot(consensusClustersGR(object), ranges, upstream, downstream)
536-
if(!is.null(ranges$gene_name))
537-
consensusClustersGR(object)$genes <- ranges2genes(consensusClustersGR(object), ranges)
555+
setMethod("annotateConsensusClusters", c("CAGEexp", "GRanges"), function (object, annot, upstream=500, downstream=500){
556+
consensusClustersGR(object)$annotation <- ranges2annot(consensusClustersGR(object), annot, upstream, downstream)
557+
if(!is.null(annot$gene_name))
558+
consensusClustersGR(object)$genes <- ranges2genes(consensusClustersGR(object), annot)
538559
validObject(object)
539560
object
540561
})
541562

563+
#' @rdname annotateCTSS
564+
565+
setMethod("annotateConsensusClusters", c("CAGEexp", "TxDb"), function (object, annot){
566+
annotateConsensusClusters(object, .txdb2annotranges(annot))
567+
})
542568

543569
#' Hierarchical annotation of genomic regions.
544570
#'
@@ -637,34 +663,6 @@ ranges2annot <- function(ranges, annot, upstream=500, downstream=500) {
637663
Rle(annot)
638664
}
639665

640-
#' @name txdb2annot
641-
#'
642-
#' @rdname ranges2annot
643-
#'
644-
#' @importFrom GenomicFeatures promoters exons transcripts
645-
646-
txdb2annot <- function(ranges, annot) {
647-
findOverlapsBool <- function(A, B) {
648-
overlap <- findOverlaps(A, B)
649-
overlap <- as(overlap, "List")
650-
any(overlap)
651-
}
652-
653-
classes <- c("promoter", "exon", "intron", "unknown")
654-
p <- findOverlapsBool(ranges, trim(suppressWarnings(promoters(annot, 500, 500))))
655-
e <- findOverlapsBool(ranges, exons(annot))
656-
t <- findOverlapsBool(ranges, transcripts(annot))
657-
annot <- sapply( 1:length(ranges), function(i) {
658-
if (p[i]) {classes[1]}
659-
else if (e[i]) {classes[2]}
660-
else if (t[i]) {classes[3]}
661-
else {classes[4]}
662-
})
663-
664-
annot <- factor(annot, levels = classes)
665-
Rle(annot)
666-
}
667-
668666
#' ranges2genes
669667
#'
670668
#' Assign gene symbol(s) to Genomic Ranges.

man/annotateCTSS.Rd

+21-9
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/ranges2annot.Rd

-3
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)