Skip to content

Commit

Permalink
Make bedset cache
Browse files Browse the repository at this point in the history
  • Loading branch information
jwokaty committed Jan 15, 2025
1 parent 49acf32 commit 31d2932
Show file tree
Hide file tree
Showing 10 changed files with 136 additions and 75 deletions.
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
Package: bedbaser
Title: A BEDbase client
Version: 0.99.17
Version: 0.99.18
Authors@R: c(
person(
given = "Jen",
Expand Down
25 changes: 13 additions & 12 deletions R/bedbaser.R
Original file line number Diff line number Diff line change
Expand Up @@ -56,18 +56,13 @@
BEDbase <- function(cache_path, quietly = FALSE) {
if (missing(cache_path)) {
cache_path <- tools::R_user_dir("bedbaser", which = "cache")
} else if (!dir.exists(cache_path)) {
dir.create(file.path(cache_path, "bedfiles"), recursive = TRUE)
dir.create(file.path(cache_path, "bedsets"), recursive = TRUE)
}
bedfiles_path <- file.path(cache_path, "bedfiles")
bedsets_path <- file.path(cache_path, "bedsets")
bedbase <- suppressWarnings(
.BEDbase(
bedfiles = BiocFileCache::BiocFileCache(
file.path(cache_path, "bedfiles")
),
bedsets = BiocFileCache::BiocFileCache(
file.path(cache_path, "bedsets")
),
bedfiles = BiocFileCache::BiocFileCache(bedfiles_path, ask = FALSE),
bedsets = BiocFileCache::BiocFileCache(bedsets_path, ask = FALSE),
AnVIL::Service(
service = "bedbase",
host = "api.bedbase.org",
Expand All @@ -91,12 +86,13 @@ BEDbase <- function(cache_path, quietly = FALSE) {
#' @rdname BEDbase
#'
#' @param x BEDbase(1) object
#' @param type character(1) bedfiles or bedsets
#' @param cache_type character(1) bedfiles or bedsets
#'
#' @export
setGeneric(
"getCache",
function(x, cache_type = c("bedfiles", "bedsets")) standardGeneric("getCache")
function(x, cache_type = c("bedfiles", "bedsets"))
standardGeneric("getCache")
)

#' Return cache path
Expand Down Expand Up @@ -501,14 +497,15 @@ bb_to_granges <- function(
bb_to_grangeslist <- function(bedbase, bedset_id, quietly = TRUE) {
beds <- bb_beds_in_bedset(bedbase, bedset_id)
gros <- list()
.cache_bedset_txt(bedset_id, beds$id, getCache(bedbase, "bedsets"))
for (bed_id in beds$id) {
gro <- bb_to_granges(bedbase, bed_id, quietly = quietly)
gros[[length(gros) + 1]] <- gro
}
GenomicRanges::GRangesList(gros)
}

#' Save a BED or BEDset files to a path given an id
#' Save a BED file or BEDset to a path given an id
#'
#' @rdname bb_save
#'
Expand Down Expand Up @@ -538,6 +535,10 @@ bb_save <- function(
ids <- list(metadata$id)
} else {
ids <- metadata$bed_ids
.cache_bedset_txt(
bed_or_bedset_id, unlist(metadata$bed_ids),
getCache(bedbase, "bedsets")
)
}
for (id in ids) {
metadata <- bb_metadata(bedbase, id, TRUE)
Expand Down
81 changes: 58 additions & 23 deletions R/cache.R
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
#' Create nested path
#'
#' @description Create directory structure following the BEDbase url structure
#' by creating nested directories in the cache.
#' @description Create directory structure following the BEDbase BED
#' file url or bedset id by creating nested directories in the cache.
#' If the path is for a bedset, the path is given ".txt" as an extension.
#'
#' @param file_name character(1) BEDbase file name
#' @param bfc BiocFileCache(1) object
#' @param bedbase_path character(1) BED file url or bedset id
#' @param cache BiocFileCache(1) object
#'
#' @return character(1) file path
#'
Expand All @@ -13,14 +14,20 @@
#' "https://data2.bedbase.org/files/2/6/",
#' "26a57da7c732a8e63a1dda7ea18af021.bed.gz"
#' )
#' .create_nested_path(bedbase_url, bfc)
#' # [1] "2/6/26a57da7c732a8e63a1dda7ea18af021.bed.gz"
#' bedbase <- BEDbase(quietly = TRUE)
#' cache <- getCache(bedbase, "bedfiles")
#' .create_nested_path(bedbase_url, cache)
#' # [1] "/path/to/cache/bedfiles/2/6/26a57da7c732a8e63a1dda7ea18af021.bed.gz"
#'
#' @noRd
.create_nested_path <- function(bedbase_url, bfc) {
file_name <- .get_file_name(bedbase_url)
bfc_path <- BiocFileCache::bfccache(bfc)
nested_path <- file.path(bfc_path, substr(file_name, 1, 1))
.create_nested_path <- function(bedbase_path, cache) {
if (grepl("/", bedbase_path)) {
file_name <- .get_file_name(bedbase_path)
} else {
file_name <- paste0(bedbase_path, ".txt")
}
cache_path <- BiocFileCache::bfccache(cache)
nested_path <- file.path(cache_path, substr(file_name, 1, 1))
if (!dir.exists(nested_path)) {
dir.create(nested_path)
}
Expand All @@ -31,13 +38,41 @@
file.path(nested_path, file_name)
}

#' Retrieve path from cache or download file and cache
#' Cache bedset
#'
#' @description Create an entry in the bedsets cache to a nested path for a
#' file named after the BEDset's id that contains the BED ids.
#'
#' @param id character(1) BEDset id
#' @param bedfiles list(1) BED ids
#' @param cache BiocFileCache(1) bedbaser cache
#'
#' This function is described in the BiocFileCache vignette.
#' @return character(1) local file path
#'
#' @examples
#' bedbase <- BEDbase(quietly = TRUE)
#' id <- "test_bedset"
#' cache <- getCache(bedbase, "bedfiles")
#' beds <- bb_beds_in_bedset(bedbase, id)
#' .cache_bedset_txt(id, beds$id, cache)
#'
#' @noRd
.cache_bedset_txt <- function(id, bedfiles, cache) {
rpath <- .create_nested_path(id, cache)
writeLines(bedfiles, rpath)
rid <- names(BiocFileCache::bfcadd(cache,
rname = id, fpath = rpath, rtype = "local", rpath = rpath,
download = FALSE, action = "asis", verbose = !quietly
))
rpath
}

#' Cache a bed file, downloading if needed, and return a path to the file in
#' the cache.
#'
#' @param id character(1) BEDbase id
#' @param bedbase_url character(1) remote resource
#' @param bfc BiocFileCache(1) object
#' @param cache BiocFileCache(1) object
#' @param quietly logical(1) (default \code{TRUE}) display message
#'
#' @return character(1) filepath
Expand All @@ -48,22 +83,22 @@
#' "https://data2.bedbase.org/files/2/6/",
#' "26a57da7c732a8e63a1dda7ea18af021.bed.gz"
#' )
#' .download_to_cache(id, bedbase_url, BiocFileCache::BiocFileCache(tempdir()))
#' .cache_bedfile(id, bedbase_url, BiocFileCache::BiocFileCache(tempdir()))
#'
#' @noRd
.download_to_cache <- function(id, bedbase_url, bfc, quietly = TRUE) {
rid <- BiocFileCache::bfcquery(bfc, id, "rname")$rid
.cache_bedfile <- function(id, bedbase_url, cache, quietly = TRUE) {
rid <- BiocFileCache::bfcquery(cache, id, "rname")$rid
if (!length(rid)) {
rpath <- .create_nested_path(bedbase_url, cache)
if (!quietly) {
rlang::inform(paste("Downloading", bedbase_url, "..."))
}
rpath <- .create_nested_path(bedbase_url, bfc)
download.file(bedbase_url, rpath, quiet = quietly)
rid <- names(BiocFileCache::bfcadd(bfc,
rname = id,
fpath = rpath, rtype = "local", rpath = rpath,
download = FALSE, action = "asis", verbose = !quietly
utils::download.file(bedbase_url, rpath, quiet = quietly)
rid <- names(BiocFileCache::bfcadd(cache,
rname = id, fpath = rpath,
rtype = "local", rpath = rpath, download = FALSE, action = "asis",
verbose = !quietly
))
}
BiocFileCache::bfcrpath(bfc, rids = rid)
BiocFileCache::bfcrpath(cache, rids = rid)
}
9 changes: 4 additions & 5 deletions R/utils.R
Original file line number Diff line number Diff line change
Expand Up @@ -64,12 +64,11 @@
#' .get_file(md, tempdir(), "bed", "http")
#'
#' @noRd
.get_file <- function(
metadata, cache_or_path, file_type, access_type,
quietly = TRUE) {
.get_file <- function(metadata, cache_or_path, file_type, access_type,
quietly = TRUE) {
file_url <- .get_url(metadata, file_type, access_type)
if (methods::is(cache_or_path, "BiocFileCache")) {
bed_file <- .download_to_cache(
bed_file <- .cache_bedfile(
metadata$id, file_url,
cache_or_path, quietly
)
Expand All @@ -80,7 +79,7 @@
bed_file
}

#' Get extra_cols
#' Attempt to guess extraCols for rtracklayer with dummy column names
#'
#' @param file_path character(1) path to BED
#' @param x double(1) the x in BEDX+Y
Expand Down
2 changes: 1 addition & 1 deletion man/BEDbase.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions man/bb_save.Rd

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

14 changes: 7 additions & 7 deletions tests/testthat/test-bedbaser.R
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ test_that("bb_bed_text_search returns results scored against the query", {
})

test_that("bb_to_granges returns a GRanges object given a 3+0 bed file", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- "95a593b8337074a334b425aba5e77d4c"
md <- bb_metadata(bedbase, id, TRUE)
expect_equal("bed3+0", md$bed_type)
Expand All @@ -102,7 +102,7 @@ test_that("bb_to_granges returns a GRanges object given a 3+0 bed file", {
})

test_that("bb_to_granges returns a GRanges object given a bigbed file", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- "ffc1e5ac45d923135500bdd825177356"
if (.Platform$OS.type != "windows") {
gro <- bb_to_granges(bedbase, id, "bigbed")
Expand All @@ -116,7 +116,7 @@ test_that("bb_to_granges returns a GRanges object given a bigbed file", {
})

test_that("bb_to_granges returns a GRanges object given narrowpeak (6+4) file", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- "bbad85f21962bb8d972444f7f9a3a932"
md <- bb_metadata(bedbase, id, TRUE)
expect_equal("bed6+4", md$bed_type)
Expand All @@ -133,7 +133,7 @@ test_that("bb_to_granges returns a GRanges object given narrowpeak (6+4) file",
})

test_that("bb_to_granges returns GRanges object given bed3+9 with genome", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- "608827efc82fcaa4b0bfc65f590ffef8"
md <- bb_metadata(bedbase, id, TRUE)
expect_equal("bed3+9", md$bed_type)
Expand All @@ -149,7 +149,7 @@ test_that("bb_to_granges returns GRanges object given bed3+9 with genome", {
})

test_that("bb_to_granges allows passing extra_cols", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- "608827efc82fcaa4b0bfc65f590ffef8"
md <- bb_metadata(bedbase, id, TRUE)
expect_equal("bed3+9", md$bed_type)
Expand All @@ -168,14 +168,14 @@ test_that("bb_to_granges allows passing extra_cols", {
})

test_that("bb_to_grangeslist creates a GRangesList", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
grl <- bb_to_grangeslist(bedbase, "lola_hg38_ucsc_features")
expect_true(methods::is((grl)[1], "CompressedGRangesList"))
expect_equal(11, length(grl))
})

test_that("bb_save saves bed files to a path", {
bedbase <- BEDbase(quietly = TRUE)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
path <- tempdir()
if (!dir.exists(path)) {
dir.create(path)
Expand Down
47 changes: 33 additions & 14 deletions tests/testthat/test-cache.R
Original file line number Diff line number Diff line change
@@ -1,31 +1,50 @@
test_that("nested path is created", {
bedbase <- BEDbase(quietly = TRUE)
bfc <- getCache(bedbase, "bedfiles")
bfc_path <- BiocFileCache::bfccache(bfc)
bedbase <- BEDbase(tempdir(), quietly = TRUE)
cache <- getCache(bedbase, "bedfiles")
cache_path <- BiocFileCache::bfccache(cache)
bedbase_url <- "http://an_example_url/2468.bed.gz"
expect_equal(
.create_nested_path(bedbase_url, bfc),
file.path(bfc_path, "2/4/2468.bed.gz")
.create_nested_path(bedbase_url, cache),
file.path(cache_path, "2/4/2468.bed.gz")
)
})

test_that("default cache location is used", {
path <- tools::R_user_dir("bedbaser", which = "cache")
bedbase <- BEDbase(quietly = TRUE)
id <- "bbad85f21962bb8d972444f7f9a3a932"
gro <- bb_to_granges(bedbase, id, "bed")
bfc <- BiocFileCache::BiocFileCache(file.path(path, "bedfiles"))
expect_equal(BiocFileCache::bfccache(bfc), file.path(path, "bedfiles"))
md <- bb_metadata(bedbase, id, TRUE)
file_path <- .get_file(md, getCache(bedbase, "bedfiles"), "bed", "http")
expect_true(file_path %in% BiocFileCache::bfcinfo(bfc)$rpath)
expect_true(dir.exists(file.path(path, "bedfiles")))
expect_true(dir.exists(file.path(path, "bedsets")))
})

test_that("path is used if set when calling constructor", {
path <- tempdir()
bedbase <- BEDbase(path, TRUE)
bfc <- getCache(bedbase, "bedfiles")
cache <- getCache(bedbase, "bedfiles")
id <- "bbad85f21962bb8d972444f7f9a3a932"
gro <- bb_to_granges(bedbase, id, "bed")
expect_equal(BiocFileCache::bfccache(bfc), file.path(path, "bedfiles"))
expect_equal(BiocFileCache::bfccache(cache), file.path(path, "bedfiles"))
})

test_that("bedset txt is cached", {
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- "test_bedset"
beds <- bb_beds_in_bedset(bedbase, id)
cache <- getCache(bedbase, "bedsets")
.cache_bedset_txt(id, beds$id, cache)
rpath <- .create_nested_path(id, cache)
expect_equal(readLines(rpath), beds$id)
expect_equal(BiocFileCache::bfcquery(cache, id, "rname")$rpath, rpath)
})

test_that("bed files are cached", {
bedbase <- BEDbase(tempdir(), quietly = TRUE)
id <- bb_example(bedbase, "bed")$id
cache <- getCache(bedbase, "bedfiles")
rid <- BiocFileCache::bfcquery(cache, id, "rname")$rid
expect_length(rid, 0)
bedbase_url <- .get_url(bb_metadata(bedbase, id, TRUE), "bed", "http")
rpath <- .cache_bedfile(id, bedbase_url, cache)
expect_true(file.exists(rpath))
rid <- BiocFileCache::bfcquery(cache, id, "rname")$rid
expect_length(length(rid), 1)
})
Loading

0 comments on commit 31d2932

Please sign in to comment.