Skip to content

Commit 639724e

Browse files
committed
docs+feat: cache checks if dir doesn't exist
1 parent 3a6d0de commit 639724e

File tree

3 files changed

+124
-28
lines changed

3 files changed

+124
-28
lines changed

NAMESPACE

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ S3method(print,covidcast_data_signal)
66
S3method(print,covidcast_data_source)
77
S3method(print,epidata_call)
88
export("%>%")
9+
export(clear_cache)
910
export(covid_hosp_facility)
1011
export(covid_hosp_facility_lookup)
1112
export(covid_hosp_state_timeseries)
@@ -14,6 +15,7 @@ export(covidcast_epidata)
1415
export(covidcast_meta)
1516
export(delphi)
1617
export(dengue_nowcast)
18+
export(disable_cache)
1719
export(ecdc_ili)
1820
export(epirange)
1921
export(fetch)
@@ -37,8 +39,11 @@ export(pvt_norostat)
3739
export(pvt_quidel)
3840
export(pvt_sensors)
3941
export(pvt_twitter)
42+
export(set_cache)
4043
export(wiki)
4144
export(with_base_url)
45+
import(cachem)
46+
import(openssl)
4247
importFrom(MMWRweek,MMWRweek2Date)
4348
importFrom(checkmate,assert)
4449
importFrom(checkmate,assert_character)

R/cache.R

Lines changed: 115 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,62 +5,149 @@ cache_environ <- new.env(parent = emptyenv())
55
cache_environ$use_cache <- NULL
66
cache_environ$epidatr_cache <- NULL
77
#' create a new cache for this session
8-
#' @rdname set_cache
9-
#' @aliases foob
108
#'
119
#' @description
12-
#' to reuse this cache, the environmental variables need to be set
13-
#' Due to the 80 character limit on filenames in cachem, the cache for a given api call will be stored in the `md5` hash of the filename. For example,
10+
#' `set_cache` (re)defines the cache to use. This does not clear existing data at any previous location, but defines a new access for this R session.
11+
#' Say your cache is normally stored in the default directory, but for the current session you want to save your results in `~/my/temporary/savedirectory`, then you would call `set_cache(dir = "~/my/temporary/savedirectory")`.
12+
#' Or if you know the data from 2 days ago is wrong, you could call `set_cache(days = 1)` to clear older data. In both cases, these changes would only last for a single session.
13+
#' In general, it is better to set your preferences via environmental variables in your `.Renviron` folder, with the corresponding variables listed in the arguments section below.
14+
#' In addition to those, there is the `EPIDATR_USE_CACHE` environmental variable, which unless defined to be `TRUE` otherwise defaults to `FALSE`.
1415
#'
16+
#' On the backend, the cache uses cachem, with filenames generated using an md5 encoding of the call url. Each file corresponds to a unique epidata-API call.
1517
#' @examples
16-
#' set_cache()
18+
#' \dontrun{
19+
#' set_cache(
20+
#' dir = "some/subdir",
21+
#' days = 14,
22+
#' max_size = 512,
23+
#' logfile = "some/subdir/logs.txt",
24+
#' prune_rate = 20L
25+
#' )
26+
#' }
1727
#'
18-
#' @param dir
19-
#' @param days
20-
#' @param max_size
21-
#' @param logfile
22-
#' @param prune_rate
23-
set_cache <- function(dir = NULL,
28+
#' @param dir the directory in which the cache is stored. By default, this is `here::here(".epidatr_cache")`. The environmental variable is `EPIDATR_CACHE_DIR`
29+
#' @param days the maximum length of time in days to keep any particular cached call. By default this is `7`
30+
#' @param max_size the size of the entire cache, in MB, at which to start pruning entries.
31+
#' @param logfile where cachem's log of transactions is stored. By default, it is `file.path(dir, "logfile.txt")`, so it's contained in the cache's directory. The environmental variable is `EPIDATR_CACHE_LOGFILE`
32+
#' @param prune_rate how many calls to go between checking if any cache elements are too old or if the cache overall is too large. Defaults to `2000L`. Since cachem fixes the max time between prune checks to 5 seconds, there's little reason to actually change this parameter. Doesn't have a corresponding environmental variable.
33+
#' @export
34+
#' @import cachem
35+
<<<<<<< Updated upstream
36+
set_cache <- function(cache_dir = NULL,
37+
=======
38+
>>>>>>> Stashed changes
2439
days = NULL,
2540
max_size = NULL,
2641
logfile = NULL,
2742
prune_rate = 2000L) {
28-
if (is.null(dir)) {
29-
dir <- Sys.getenv("EPIDATR_CACHE_DIR", unset = here::here(".epidatr_cache"))
43+
if (is.null(cache_dir)) {
44+
cache_dir <- Sys.getenv("EPIDATR_CACHE_DIR", unset = here::here(".epidatr_cache"))
3045
}
31-
stopifnot(is.character(dir))
46+
stopifnot(is.character(cache_dir))
3247
if (is.null(days)) {
33-
days <- Sys.getenv("EPIDATR_CACHE_MAX_AGE_DAYS", unset = 7) %>% as.integer()
48+
days <- Sys.getenv("EPIDATR_CACHE_MAX_AGE_DAYS", unset = 7) %>% as.numeric()
3449
}
3550
if (is.null(max_size)) {
36-
max_size <- Sys.getenv("EPIDATR_CACHE_MAX_SIZE_BYTES", unset = 1024^3) %>% as.integer()
51+
max_size <- Sys.getenv("EPIDATR_CACHE_MAX_SIZE_MB", unset = 1024) %>% as.numeric()
3752
}
3853
if (is.null(logfile)) {
39-
logfile <- Sys.getenv("EPIDATR_CACHE_LOGFILE", unset = file.path(dir, "logfile.txt"))
54+
logfile <- Sys.getenv("EPIDATR_CACHE_LOGFILE", unset = file.path(cache_dir, "logfile.txt"))
4055
}
4156
stopifnot(is.character(logfile))
42-
stopifnot(is.integer(days), is.integer(max_size), is.integer(prune_rate))
43-
dir <- Sys.getenv("EPIDATR_CACHE_DIR", unset = here::here(".epidatr_cache"))
44-
cache_days <- Sys.getenv("")
45-
cache_environ$epidatr_cache <<- cachem::cache_disk(
46-
max_size = max_size,
47-
max_age = days * 24 * 60 * 60,
48-
logfile = logfile,
49-
prune_rate = prune_rate
50-
)
57+
stopifnot(is.numeric(days), is.numeric(max_size), is.integer(prune_rate))
58+
#
59+
# make sure that that directory exists and drag the user into that process
60+
cache_exists <- file.exists(cache_dir)
61+
cache_usable <- file.access(cache_dir, mode = 6) == 0
62+
if (!(cache_exists)) {
63+
user_input <- readline(glue::glue("there is no directory at {cache_dir}; the cache will be turned off until a viable directory has been set. Create one? (yes|no) "))
64+
repeat {
65+
valid_user_input <- ifelse(grepl("yes|no", user_input), sub(".*(yes|no).*", "\\1", user_input), NA)
66+
if (!is.na(valid_user_input)) {
67+
break
68+
}
69+
user_input <- readline(glue::glue(" please answer either yes or no: "))
70+
}
71+
if (valid_user_input == "yes") {
72+
dir.create(cache_dir, showWarnings = TRUE, recursive = TRUE)
73+
cache_exists <- TRUE
74+
cache_usable <- file.access(cache_dir, mode = 6) == 0
75+
}
76+
}
77+
78+
79+
if (!cache_usable) {
80+
print(glue::glue("The directory at {cache_dir} is not accessible; check permissions and/or use a different directory for the cache (see the `set_cache` documentation)."))
81+
} else if (cache_exists) {
82+
cache_environ$epidatr_cache <- cachem::cache_disk(
83+
dir = cache_dir,
84+
max_size = as.integer(max_size * 1024^2),
85+
max_age = days * 24 * 60 * 60,
86+
logfile = logfile,
87+
prune_rate = prune_rate
88+
)
89+
}
5190
}
5291

53-
#' to manually reset the cache, deleting the currently saved data and starting fresh, call `epidatr_cache$destroy()`
92+
93+
#' manually reset the cache, deleting all currently saved data and starting afresh
94+
#'
95+
#' @description
96+
#' deletes the current cache and resets a new cache. Deletes local data! If you are using a session unique cache, you will have to pass the arguments you used for `set_cache` earlier, otherwise the system-wide `.Renviron`-based defaults will be used.
97+
#' @examples
98+
#' \dontrun{
99+
#' clear_cache(
100+
#' dir = "some/subdir",
101+
#' days = 14,
102+
#' max_size = 512,
103+
#' logfile = "some/subdir/logs.txt",
104+
#' prune_rate = 20L
105+
#' )
106+
#' }
107+
#'
108+
#' @inheritParams set_cache
109+
#' @export
54110
clear_cache <- function(...) {
55111
cache_environ$epidatr_cache$destroy()
56112
set_cache(...)
57113
}
58114

115+
#' turn off the caching for this session
116+
#' @description
117+
#' Disable caching until you call `set_cache` or restart R. The files defining the cache are untouched. If you are looking to disable the caching more permanently, set `EPIDATR_USE_CACHE=FALSE` as environmental variable in your `.Renviron`.
118+
#' @export
59119
disable_cache <- function() {
60-
cache_environ$epidatr_cache <<- NULL
120+
cache_environ$epidatr_cache <- NULL
121+
}
122+
123+
#' turn off the caching for this session
124+
#' @description
125+
#' Print out the information about the cache (as would be returned by cachem's `info()` method)
126+
#' @export
127+
cache_info <- function() {
128+
cache_environ$epidatr_cache$info()
61129
}
62130

131+
<<<<<<< Updated upstream
132+
=======
63133
cache_epidata_call <- function(call, ...) {
134+
#' turn off the caching for this session
135+
#' @description
136+
#' Print out the information about the cache (as would be returned by cachem's `info()` method)
137+
#' @export
138+
cache_info <- function() {
139+
cache_environ$epidatr_cache$info()
140+
}
141+
142+
>>>>>>> Stashed changes
143+
#' create a new cache for this session
144+
#'
145+
#' @description
146+
#' the guts of caching, its interposed between fetch and the specific fetch methods. Internal method only.
147+
#'
148+
#' @param call the `epidata_call` object
149+
#' @inheritParams fetch
150+
#' @import cachem openssl
64151
cache_epidata_call <- function(epidata_call, ...) {
65152
if (cache_environ$use_cache && !is.null(cache_environ$epidatr_cache)) {
66153
target <- request_url(epidata_call)

tests/testthat/test-cache.R

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,6 @@
11
# need to come up with some tests
22
# after everything else, make sure that epidatr_cache isn't defined in the global state
3+
# use the existing examples to save, then load and compare the values
4+
# make sure the md5 name is correct
5+
# make sure that file/folder creation works as expected
6+
# make sure the saves are in the right location, maybe load them the dumb way

0 commit comments

Comments
 (0)