Skip to content

feat: add ignore_cache to fetch_args_list #301

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/test-coverage.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,4 +53,4 @@ jobs:
path: ${{ runner.temp }}/package

- name: Upload coverage reports to Codecov
uses: codecov/codecov-action@v3
uses: codecov/codecov-action@v5
6 changes: 3 additions & 3 deletions DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ URL: https://cmu-delphi.github.io/epidatr/,
https://cmu-delphi.github.io/delphi-epidata/,
https://github.com/cmu-delphi/epidatr
BugReports: https://github.com/cmu-delphi/epidatr/issues
Depends:
Depends:
R (>= 3.5.0)
Imports:
cachem,
Expand Down Expand Up @@ -58,9 +58,9 @@ Suggests:
rmarkdown,
testthat (>= 3.1.5),
withr
VignetteBuilder:
VignetteBuilder:
knitr
Remotes:
Remotes:
cmu-delphi/delphidocs
Config/Needs/website: cmu-delphi/delphidocs
Config/testthat/edition: 3
Expand Down
2 changes: 1 addition & 1 deletion NAMESPACE
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ import(cachem)
import(glue)
importFrom(MMWRweek,MMWRweek)
importFrom(MMWRweek,MMWRweek2Date)
importFrom(cachem,is.key_missing)
importFrom(checkmate,assert)
importFrom(checkmate,assert_character)
importFrom(checkmate,assert_integerish)
Expand Down Expand Up @@ -80,7 +81,6 @@ importFrom(magrittr,"%>%")
importFrom(openssl,md5)
importFrom(purrr,map_chr)
importFrom(purrr,map_lgl)
importFrom(readr,read_csv)
importFrom(stats,na.omit)
importFrom(tibble,as_tibble)
importFrom(tibble,tibble)
Expand Down
1 change: 1 addition & 0 deletions NEWS.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
- Support more date formats in function to convert dates to epiweeks. Use `parse_api_date` since it already supports both common formats. #276
- `EPIDATR_USE_CACHE` only supported exactly "TRUE" before. Now it supports all logical values and includes a warning when any value that can't be converted to logical is provided. #273
- `missing` doesn't count default values as non-missing. If a user doesn't pass `geo_values` or `time_values` (both of which default to `"*"` in `pub_covidcast`), or `dates` (in `pub_covid_hosp_state_timeseries`), the missing check fails. To avoid this, just don't check missingness of those two arguments.
- `fetch_args_list` now has an `refresh_cache` argument, which is `FALSE` by default.

# epidatr 1.1.1

Expand Down
143 changes: 89 additions & 54 deletions R/cache.R
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
cache_environ <- new.env(parent = emptyenv())
cache_environ$use_cache <- NULL
cache_environ$epidatr_cache <- NULL
cache_environ$cache_args <- NULL

#' Create or renew a cache for this session
#' @aliases set_cache
Expand Down Expand Up @@ -169,6 +170,12 @@ set_cache <- function(cache_dir = NULL,
max_age = days * 24 * 60 * 60,
logfile = file.path(cache_dir, logfile)
)
cache_environ$cache_args <- list2(
cache_dir = cache_dir,
days = days,
max_size = max_size,
logfile = logfile
)
}

cli::cli_inform(c(
Expand All @@ -183,9 +190,9 @@ set_cache <- function(cache_dir = NULL,
#' Manually reset the cache, deleting all currently saved data and starting afresh
#' @description
#' Deletes the current cache and resets a new cache. Deletes local data! If you
#' are using a session unique cache, you will have to pass the arguments you
#' used for `set_cache` earlier, otherwise the system-wide `.Renviron`-based
#' defaults will be used.
#' are using a session unique cache, the previous settings will be reused. If
#' you pass in new `set_cache` arguments, they will take precedence over the
#' previous settings.
#' @param disable instead of setting a new cache, disable caching entirely;
#' defaults to `FALSE`
#' @inheritDotParams set_cache
Expand All @@ -198,11 +205,22 @@ set_cache <- function(cache_dir = NULL,
clear_cache <- function(..., disable = FALSE) {
if (any(!is.na(cache_environ$epidatr_cache))) {
cache_environ$epidatr_cache$destroy()
recovered_args <- cache_environ$cache_args
cache_environ$cache_args <- NULL
} else {
recovered_args <- list()
}
args <- rlang::dots_list(
...,
confirm = FALSE,
!!!recovered_args,
.homonyms = "first",
.ignore_empty = "all"
)
if (disable) {
cache_environ$epidatr_cache <- NULL
} else {
set_cache(...)
rlang::inject(set_cache(!!!args))
}
}

Expand Down Expand Up @@ -234,68 +252,85 @@ disable_cache <- function() {
#' disable without deleting
#' @export
cache_info <- function() {
if (is.null(cache_environ$epidatr_cache)) {
return("there is no cache")
} else {
if (is_cache_enabled()) {
return(cache_environ$epidatr_cache$info())
} else {
return("there is no cache")
}
}

#' Dispatch caching
#' Check if the cache is enabled
#' @keywords internal
is_cache_enabled <- function() {
!is.null(cache_environ$epidatr_cache)
}

#' Helper that checks whether a call is actually cachable
#'
#' The cacheable endpoints are those with `as_of` or `issues` parameters:
#' - pub_covidcast
#' - pub_covid_hosp_state_timeseries
#' - pub_ecdc_ili
#' - pub_flusurv
#' - pub_fluview_clinical
#' - pub_fluview
#' - pub_kcdc_ili
#' - pub_nidss_flu
#' - pub_paho_dengue
#'
#' @keywords internal
check_is_cachable <- function(epidata_call, fetch_args) {
as_of_cachable <- !is.null(epidata_call$params$as_of) && !identical(epidata_call$params$as_of, "*")
issues_cachable <- !is.null(epidata_call$params$issues) && !identical(epidata_call$params$issues, "*")
is_cachable <- (
# Cache should be enabled
is_cache_enabled() &&
# Call should be cachable
(as_of_cachable || issues_cachable) &&
# This should not be a dry run
!fetch_args$dry_run &&
# Base url should be null
is.null(fetch_args$base_url) &&
# Don't cache debug calls
!fetch_args$debug &&
# Format type should be json
fetch_args$format_type == "json" &&
# Fields should be null
is.null(fetch_args$fields) &&
# Disable date parsing should be false
!fetch_args$disable_date_parsing &&
# Disable data frame parsing should be false
!fetch_args$disable_data_frame_parsing &&
# Refresh cache should be false
fetch_args$refresh_cache == FALSE
)
return(is_cachable)
}

#' Check for warnings for the cache
#'
#' @description
#' The guts of caching, its interposed between fetch and the specific fetch
#' methods. Internal method only.
#' Adds warnings when arguments are potentially too recent to use with the cache.
#'
#' @param epidata_call the `epidata_call` object
#' @param fetch_args the args list for fetch as generated by [`fetch_args_list()`]
#' @keywords internal
#' @importFrom openssl md5
cache_epidata_call <- function(epidata_call, fetch_args = fetch_args_list()) {
is_cachable <- check_is_cachable(epidata_call, fetch_args)
if (is_cachable) {
target <- request_url(epidata_call)
hashed <- md5(target)
cached <- cache_environ$epidatr_cache$get(hashed)
as_of_recent <- check_is_recent(epidata_call$params$as_of, 7)
issues_recent <- check_is_recent(epidata_call$params$issues, 7)
if (as_of_recent || issues_recent) {
cli::cli_warn(
c(
"Using cached results with `as_of` within the past week (or the future!).
check_for_cache_warnings <- function(epidata_call, fetch_args) {
as_of_recent <- check_is_recent(epidata_call$params$as_of, 7)
issues_recent <- check_is_recent(epidata_call$params$issues, 7)
if (as_of_recent || issues_recent) {
cli::cli_warn(
c(
"Using cached results with `as_of` within the past week (or the future!).
This will likely result in an invalid cache. Consider",
"i" = "disabling the cache for this session with `disable_cache` or
"i" = "disabling the cache for this session with `disable_cache` or
permanently with environmental variable `EPIDATR_USE_CACHE=FALSE`",
"i" = "setting `EPIDATR_CACHE_MAX_AGE_DAYS={Sys.getenv('EPIDATR_CACHE_MAX_AGE_DAYS
"i" = "setting `EPIDATR_CACHE_MAX_AGE_DAYS={Sys.getenv('EPIDATR_CACHE_MAX_AGE_DAYS
', unset = 1)}` to e.g. `3/24` (3 hours)."
),
.frequency = "regularly",
.frequency_id = "cache timing issues",
class = "cache_recent_data"
)
}
if (!is.key_missing(cached)) {
cli::cli_warn(
c(
"Loading from the cache at {cache_environ$epidatr_cache$info()$dir};
see {cache_environ$epidatr_cache$info()$logfile} for more details."
),
.frequency = "regularly",
.frequency_id = "using the cache",
class = "cache_access"
)
return(cached[[1]])
}
}
# need to actually get the data, since its either not in the cache or we're not caching
runtime <- system.time(if (epidata_call$only_supports_classic) {
fetched <- fetch_classic(epidata_call, fetch_args)
} else {
fetched <- fetch_tbl(epidata_call, fetch_args)
})
# add it to the cache if appropriate
if (is_cachable) {
cache_environ$epidatr_cache$set(hashed, list(fetched, Sys.time(), runtime))
),
.frequency = "regularly",
.frequency_id = "cache timing issues",
class = "cache_recent_data"
)
}
return(fetched)
}
75 changes: 43 additions & 32 deletions R/epidatacall.R
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ print.epidata_call <- function(x, ...) {
#' @param debug if `TRUE`, return the raw response from the API
#' @param format_type the format to request from the API, one of classic, json,
#' csv; this is only used by `fetch_debug`, and by default is `"json"`
#' @param refresh_cache if `TRUE`, ignore the cache, fetch the data from the
#' API, and update the cache, if it is enabled
#' @return A `fetch_args` object containing all the specified options
#' @export
#' @aliases fetch_args
Expand All @@ -168,7 +170,8 @@ fetch_args_list <- function(
base_url = NULL,
dry_run = FALSE,
debug = FALSE,
format_type = c("json", "classic", "csv")) {
format_type = c("json", "classic", "csv"),
refresh_cache = FALSE) {
rlang::check_dots_empty()

assert_character(fields, null.ok = TRUE, any.missing = FALSE)
Expand All @@ -180,6 +183,7 @@ fetch_args_list <- function(
assert_logical(dry_run, null.ok = FALSE, len = 1L, any.missing = TRUE)
assert_logical(debug, null.ok = FALSE, len = 1L, any.missing = FALSE)
format_type <- match.arg(format_type)
assert_logical(refresh_cache, null.ok = FALSE, len = 1L, any.missing = FALSE)

structure(
list(
Expand All @@ -191,7 +195,8 @@ fetch_args_list <- function(
base_url = base_url,
dry_run = dry_run,
debug = debug,
format_type = format_type
format_type = format_type,
refresh_cache = refresh_cache
),
class = "fetch_args"
)
Expand Down Expand Up @@ -219,6 +224,9 @@ print.fetch_args <- function(x, ...) {
#' - For `fetch`: a tibble or a JSON-like list
#' @export
#' @include cache.R
#' @importFrom openssl md5
#' @importFrom cachem is.key_missing
#' @importFrom tibble tibble as_tibble
#'
fetch <- function(epidata_call, fetch_args = fetch_args_list()) {
stopifnot(inherits(epidata_call, "epidata_call"))
Expand All @@ -228,48 +236,49 @@ fetch <- function(epidata_call, fetch_args = fetch_args_list()) {
epidata_call <- with_base_url(epidata_call, fetch_args$base_url)
}

# Just display the epidata_call object, don't fetch the data
if (fetch_args$dry_run) {
return(epidata_call)
}

# Just display the raw response from the API, don't parse
if (fetch_args$debug) {
return(fetch_debug(epidata_call, fetch_args))
}

cache_epidata_call(epidata_call, fetch_args = fetch_args)
}

#' Fetches the data and returns a tibble
#' @rdname fetch_tbl
#'
#' @param epidata_call an instance of `epidata_call`
#' @param fetch_args a `fetch_args` object
#' @importFrom readr read_csv
#' @importFrom httr stop_for_status content
#' @importFrom tibble as_tibble tibble
#' @return
#' - For `fetch_tbl`: a [`tibble::tibble`]
#' @keywords internal
fetch_tbl <- function(epidata_call, fetch_args = fetch_args_list()) {
stopifnot(inherits(epidata_call, "epidata_call"))
stopifnot(inherits(fetch_args, "fetch_args"))
# Check if the data is cachable
is_cachable <- check_is_cachable(epidata_call, fetch_args)
if (is_cachable) {
check_for_cache_warnings(epidata_call, fetch_args)

if (epidata_call$only_supports_classic) {
cli::cli_abort(
c(
"This endpoint only supports the classic message format, due to non-standard behavior.
Use fetch_classic instead."
),
epidata_call = epidata_call,
class = "only_supports_classic_format"
)
# Check if the data is in the cache
target <- request_url(epidata_call)
hashed <- md5(target)
cached <- cache_environ$epidatr_cache$get(hashed)
if (!is.key_missing(cached)) {
return(cached[[1]]) # extract `fetched` from `fetch()`, no metadata
}
}

response_content <- fetch_classic(epidata_call, fetch_args = fetch_args)
if (fetch_args$return_empty && length(response_content) == 0) {
return(tibble())
# Need to actually get the data, since its either not in the cache or we're not caching
runtime <- system.time(if (epidata_call$only_supports_classic) {
fetch_args[["disable_data_frame_parsing"]] <- TRUE
fetched <- fetch_classic(epidata_call, fetch_args)
} else {
response_content <- fetch_classic(epidata_call, fetch_args = fetch_args)
if (fetch_args$return_empty && length(response_content) == 0) {
fetched <- tibble()
} else {
fetched <- parse_data_frame(epidata_call, response_content, fetch_args$disable_date_parsing) %>% as_tibble()
}
})

# Add it to the cache if appropriate
if (is_cachable || (fetch_args$refresh_cache && is_cache_enabled())) {
cache_environ$epidatr_cache$set(hashed, list(fetched, Sys.time(), runtime))
}
return(parse_data_frame(epidata_call, response_content, fetch_args$disable_date_parsing) %>% as_tibble())

return(fetched)
}

#' Fetches the data, raises on epidata errors, and returns the results as a
Expand Down Expand Up @@ -303,6 +312,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) {
)
}
}

if (response_content$message != "success") {
cli::cli_warn(
c(
Expand All @@ -311,6 +321,7 @@ fetch_classic <- function(epidata_call, fetch_args = fetch_args_list()) {
class = "epidata_warning"
)
}

return(response_content$epidata)
}

Expand Down
Loading