From 737e2cb0e7cd204192033cb5e8221a76e44e0154 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 27 Nov 2023 16:48:08 -0500 Subject: [PATCH 01/10] remove duplicate spec for issue field in hosp state timeseries --- R/endpoints.R | 1 - 1 file changed, 1 deletion(-) diff --git a/R/endpoints.R b/R/endpoints.R index a1cba8f6..2acebe76 100644 --- a/R/endpoints.R +++ b/R/endpoints.R @@ -513,7 +513,6 @@ pub_covid_hosp_state_timeseries <- function( create_epidata_field_info("state", "text"), create_epidata_field_info("issue", "date"), create_epidata_field_info("date", "date"), - create_epidata_field_info("issue", "date"), create_epidata_field_info("critical_staffing_shortage_today_yes", "bool"), create_epidata_field_info("critical_staffing_shortage_today_no", "bool"), create_epidata_field_info("critical_staffing_shortage_today_not_reported", "bool"), From a43ea6f47cc8d5032b9056bb10dd0a2040c3e2cf Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 27 Nov 2023 16:49:46 -0500 Subject: [PATCH 02/10] error if epidata meta has duplicates --- R/epidatacall.R | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/R/epidatacall.R b/R/epidatacall.R index c858d25f..47f7eb58 100644 --- a/R/epidatacall.R +++ b/R/epidatacall.R @@ -41,12 +41,31 @@ #' @return #' - For `create_epidata_call`: an `epidata_call` object #' +#' @importFrom purrr map_chr create_epidata_call <- function(endpoint, params, meta = NULL, only_supports_classic = FALSE) { stopifnot(is.character(endpoint), length(endpoint) == 1) stopifnot(is.list(params)) stopifnot(is.null(meta) || is.list(meta)) stopifnot(is.logical(only_supports_classic), length(only_supports_classic) == 1) + + if (length(unique(meta)) != length(meta)) { + cli::cli_abort(c( + "List of expected epidata fields contains duplicate entries", + "i" = "duplicates in meta can cause problems parsing fetched data", + "Please fix in `endpoints.R`" + )) + } + + meta_field_names <- map_chr(meta, ~ .x$name) + if (length(meta_field_names) != length(unique(meta_field_names))) { + cli::cli_abort(c( + "List of expected epidata fields contains duplicate names", + "i" = "duplicates in meta can cause problems parsing fetched data", + "Please fix in `endpoints.R`" + )) + } + if (is.null(meta)) { meta <- list() } From 83d781337afff487ad4149c906a90acc683838d5 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Mon, 27 Nov 2023 16:50:36 -0500 Subject: [PATCH 03/10] don't try to convert date fields to date again; warn if meta and return values have diff number of fields --- R/model.R | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/R/model.R b/R/model.R index 249124e8..85fd5516 100644 --- a/R/model.R +++ b/R/model.R @@ -122,9 +122,9 @@ parse_value <- function(info, value, disable_date_parsing = FALSE) { if (is.null(value)) { return(value) - } else if (info$type == "date" && !disable_date_parsing) { + } else if (info$type == "date" && !disable_date_parsing && !inherits(value, "Date")) { return(parse_api_date(value)) - } else if (info$type == "epiweek" && !disable_date_parsing) { + } else if (info$type == "epiweek" && !disable_date_parsing && !inherits(value, "Date")) { return(parse_api_week(value)) } else if (info$type == "bool") { return(as.logical(value)) @@ -142,6 +142,11 @@ parse_data_frame <- function(epidata_call, df, disable_date_parsing = FALSE) { stopifnot(inherits(epidata_call, "epidata_call")) meta <- epidata_call$meta df <- as.data.frame(df) + + if (length(meta) != 0 && ncol(df) != length(meta)) { + cli::cli_warn("Not all return columns are specified as expected epidata fields") + } + if (length(meta) == 0) { return(df) } From 2d4f1d4b616d0da6481e938f9e2bd2d0c712c24f Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 28 Nov 2023 10:32:33 -0500 Subject: [PATCH 04/10] check diff of expected and actual field names, since user can request subset --- R/model.R | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/R/model.R b/R/model.R index 85fd5516..9438350f 100644 --- a/R/model.R +++ b/R/model.R @@ -138,18 +138,23 @@ parse_value <- function(info, value, disable_date_parsing = FALSE) { value } +#' @importFrom purrr map_chr parse_data_frame <- function(epidata_call, df, disable_date_parsing = FALSE) { stopifnot(inherits(epidata_call, "epidata_call")) meta <- epidata_call$meta df <- as.data.frame(df) - if (length(meta) != 0 && ncol(df) != length(meta)) { - cli::cli_warn("Not all return columns are specified as expected epidata fields") - } - if (length(meta) == 0) { return(df) } + + meta_field_names <- map_chr(meta, ~ .x$name) + if ( + length(setdiff(names(df), meta_field_names)) != 0 + ) { + cli::cli_warn("Not all return columns are specified as expected epidata fields") + } + columns <- colnames(df) for (i in seq_len(length(meta))) { info <- meta[[i]] From 0f6b003aeefaa6523435aed63585e410f1edc332 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 28 Nov 2023 11:04:48 -0500 Subject: [PATCH 05/10] provide error and warning class names --- R/epidatacall.R | 26 ++++++++++++++++---------- R/model.R | 5 ++++- 2 files changed, 20 insertions(+), 11 deletions(-) diff --git a/R/epidatacall.R b/R/epidatacall.R index 47f7eb58..92c431df 100644 --- a/R/epidatacall.R +++ b/R/epidatacall.R @@ -50,20 +50,26 @@ create_epidata_call <- function(endpoint, params, meta = NULL, stopifnot(is.logical(only_supports_classic), length(only_supports_classic) == 1) if (length(unique(meta)) != length(meta)) { - cli::cli_abort(c( - "List of expected epidata fields contains duplicate entries", - "i" = "duplicates in meta can cause problems parsing fetched data", - "Please fix in `endpoints.R`" - )) + cli::cli_abort( + c( + "List of expected epidata fields contains duplicate entries", + "i" = "duplicates in meta can cause problems parsing fetched data", + "Please fix in `endpoints.R`" + ), + class = "epidatr__duplicate_meta_entries" + ) } meta_field_names <- map_chr(meta, ~ .x$name) if (length(meta_field_names) != length(unique(meta_field_names))) { - cli::cli_abort(c( - "List of expected epidata fields contains duplicate names", - "i" = "duplicates in meta can cause problems parsing fetched data", - "Please fix in `endpoints.R`" - )) + cli::cli_abort( + c( + "List of expected epidata fields contains duplicate names", + "i" = "duplicates in meta can cause problems parsing fetched data", + "Please fix in `endpoints.R`" + ), + class = "epidatr__duplicate_meta_names" + ) } if (is.null(meta)) { diff --git a/R/model.R b/R/model.R index 9438350f..051a3d33 100644 --- a/R/model.R +++ b/R/model.R @@ -152,7 +152,10 @@ parse_data_frame <- function(epidata_call, df, disable_date_parsing = FALSE) { if ( length(setdiff(names(df), meta_field_names)) != 0 ) { - cli::cli_warn("Not all return columns are specified as expected epidata fields") + cli::cli_warn( + "Not all return columns are specified as expected epidata fields", + class = "epidatr__missing_meta_fields" + ) } columns <- colnames(df) From c093c11452b08952118183ccb5363bf4e5a7a150 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 28 Nov 2023 11:05:03 -0500 Subject: [PATCH 06/10] test create_epidata_call success and failures --- tests/testthat/test-epidatacall.R | 36 +++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) diff --git a/tests/testthat/test-epidatacall.R b/tests/testthat/test-epidatacall.R index b0d171d8..83a1c71f 100644 --- a/tests/testthat/test-epidatacall.R +++ b/tests/testthat/test-epidatacall.R @@ -160,3 +160,39 @@ test_that("classic only fetch", { # making sure that fetch_tbl and throws the expected error on classic only expect_error(epidata_call %>% fetch_tbl(), class = "only_supports_classic_format") }) + +test_that("create_epidata_call basic behavior", { + endpoint <- "endpoint" + params <- list() + + # Success + meta <- list(list(name = "time_value", class = "date"), list(name = "value", class = "double")) + expected <- list( + endpoint = endpoint, + params = params, + base_url = "https://api.delphi.cmu.edu/epidata/", + meta = meta, + only_supports_classic = FALSE + ) + class(expected) = "epidata_call" + + expect_identical(create_epidata_call(endpoint, params, meta = meta), expected) + + expected$meta <- list() + expect_identical(create_epidata_call(endpoint, params, meta = NULL), expected) + expect_identical(create_epidata_call(endpoint, params, meta = list()), expected) +}) + + +test_that("create_epidata_call fails when meta arg contains duplicates", { + endpoint <- "endpoint" + params <- list() + + # Duplicate names + meta <- list(list(name = "time_value", class = "date"), list(name = "time_value", class = "int")) + expect_error(create_epidata_call(endpoint, params, meta = meta), class = "epidatr__duplicate_meta_names") + + # Duplicate entries + meta <- list(list(name = "time_value", class = "date"), list(name = "time_value", class = "date")) + expect_error(create_epidata_call(endpoint, params, meta = meta), class = "epidatr__duplicate_meta_entries") +}) From a716b27816ef4632d62af00211b24b7c0ae0217b Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 28 Nov 2023 11:30:22 -0500 Subject: [PATCH 07/10] test parse_data_frame --- tests/testthat/test-model.R | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-model.R b/tests/testthat/test-model.R index 7a05aa0e..d5acf51d 100644 --- a/tests/testthat/test-model.R +++ b/tests/testthat/test-model.R @@ -69,7 +69,32 @@ test_that("null parsing", { }) test_that("parse invalid time", { - vale <- list(3) - vale$class <- "my nonexistant class" - expect_error(parse_timeset_input(vale)) + value <- list(3) + value$class <- "my nonexistant class" + expect_error(parse_timeset_input(value)) +}) + +test_that("parse_data_frame warns when df contains fields not listed in meta", { + epidata_call <- pub_flusurv( + locations = "ca", + epiweeks = 202001, + fetch_args = fetch_args_list(dry_run = TRUE) + ) + # see generate_test_data.R + mock_df <- as.data.frame(readr::read_rds(testthat::test_path("data/flusurv-epiweeks.rds"))) + + # Success when meta and df fields match exactly + expect_no_warning(parse_data_frame(epidata_call, mock_df)) + + # Warning when df contains extra fields + mock_df$extra <- 5 + expect_warning( + parse_data_frame(epidata_call, mock_df), + class = "epidatr__missing_meta_fields" + ) + mock_df$extra <- NULL + + # Success when meta contains extra fields + mock_df$rate_age_0 <- NULL + expect_no_warning(parse_data_frame(epidata_call, mock_df)) }) From 073ac5b7e2f2f25aac303a7d0e588d8d48f538f5 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 28 Nov 2023 11:50:59 -0500 Subject: [PATCH 08/10] linting --- tests/testthat/test-epidatacall.R | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/testthat/test-epidatacall.R b/tests/testthat/test-epidatacall.R index 83a1c71f..bc198fbc 100644 --- a/tests/testthat/test-epidatacall.R +++ b/tests/testthat/test-epidatacall.R @@ -174,7 +174,7 @@ test_that("create_epidata_call basic behavior", { meta = meta, only_supports_classic = FALSE ) - class(expected) = "epidata_call" + class(expected) <- "epidata_call" expect_identical(create_epidata_call(endpoint, params, meta = meta), expected) From 4228e79c3e2e60c30b704e0405248de1b95c4a38 Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Tue, 28 Nov 2023 12:12:51 -0500 Subject: [PATCH 09/10] list unspecified fields in warning message --- R/model.R | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/R/model.R b/R/model.R index 051a3d33..93d79807 100644 --- a/R/model.R +++ b/R/model.R @@ -149,11 +149,15 @@ parse_data_frame <- function(epidata_call, df, disable_date_parsing = FALSE) { } meta_field_names <- map_chr(meta, ~ .x$name) + missing_fields <- setdiff(names(df), meta_field_names) if ( - length(setdiff(names(df), meta_field_names)) != 0 + length(missing_fields) != 0 ) { cli::cli_warn( - "Not all return columns are specified as expected epidata fields", + c( + "Not all return columns are specified as expected epidata fields", + "i" = "Unspecified fields {missing_fields} may need to be manually converted to more appropriate classes" + ), class = "epidatr__missing_meta_fields" ) } From 8e4fb6c018956142dc3e7fd518783ad253e87deb Mon Sep 17 00:00:00 2001 From: Nat DeFries <42820733+nmdefries@users.noreply.github.com> Date: Wed, 29 Nov 2023 12:17:49 -0500 Subject: [PATCH 10/10] verify that field specs are all EpidataFieldInfo objs --- R/epidatacall.R | 5 +++-- R/model.R | 2 +- tests/testthat/test-epidatacall.R | 26 ++++++++++++++++++++------ 3 files changed, 24 insertions(+), 9 deletions(-) diff --git a/R/epidatacall.R b/R/epidatacall.R index 92c431df..dbf3fc23 100644 --- a/R/epidatacall.R +++ b/R/epidatacall.R @@ -41,12 +41,13 @@ #' @return #' - For `create_epidata_call`: an `epidata_call` object #' -#' @importFrom purrr map_chr +#' @importFrom purrr map_chr map_lgl create_epidata_call <- function(endpoint, params, meta = NULL, only_supports_classic = FALSE) { stopifnot(is.character(endpoint), length(endpoint) == 1) stopifnot(is.list(params)) stopifnot(is.null(meta) || is.list(meta)) + stopifnot(all(map_lgl(meta, ~ inherits(.x, "EpidataFieldInfo")))) stopifnot(is.logical(only_supports_classic), length(only_supports_classic) == 1) if (length(unique(meta)) != length(meta)) { @@ -60,7 +61,7 @@ create_epidata_call <- function(endpoint, params, meta = NULL, ) } - meta_field_names <- map_chr(meta, ~ .x$name) + meta_field_names <- map_chr(meta, "name") if (length(meta_field_names) != length(unique(meta_field_names))) { cli::cli_abort( c( diff --git a/R/model.R b/R/model.R index 93d79807..0bad9a84 100644 --- a/R/model.R +++ b/R/model.R @@ -148,7 +148,7 @@ parse_data_frame <- function(epidata_call, df, disable_date_parsing = FALSE) { return(df) } - meta_field_names <- map_chr(meta, ~ .x$name) + meta_field_names <- map_chr(meta, "name") missing_fields <- setdiff(names(df), meta_field_names) if ( length(missing_fields) != 0 diff --git a/tests/testthat/test-epidatacall.R b/tests/testthat/test-epidatacall.R index bc198fbc..642c609d 100644 --- a/tests/testthat/test-epidatacall.R +++ b/tests/testthat/test-epidatacall.R @@ -166,7 +166,10 @@ test_that("create_epidata_call basic behavior", { params <- list() # Success - meta <- list(list(name = "time_value", class = "date"), list(name = "value", class = "double")) + meta <- list( + create_epidata_field_info("time_value", "date"), + create_epidata_field_info("value", "float") + ) expected <- list( endpoint = endpoint, params = params, @@ -175,7 +178,6 @@ test_that("create_epidata_call basic behavior", { only_supports_classic = FALSE ) class(expected) <- "epidata_call" - expect_identical(create_epidata_call(endpoint, params, meta = meta), expected) expected$meta <- list() @@ -189,10 +191,22 @@ test_that("create_epidata_call fails when meta arg contains duplicates", { params <- list() # Duplicate names - meta <- list(list(name = "time_value", class = "date"), list(name = "time_value", class = "int")) - expect_error(create_epidata_call(endpoint, params, meta = meta), class = "epidatr__duplicate_meta_names") + meta <- list( + create_epidata_field_info("time_value", "date"), + create_epidata_field_info("time_value", "int") + ) + expect_error( + create_epidata_call(endpoint, params, meta = meta), + class = "epidatr__duplicate_meta_names" + ) # Duplicate entries - meta <- list(list(name = "time_value", class = "date"), list(name = "time_value", class = "date")) - expect_error(create_epidata_call(endpoint, params, meta = meta), class = "epidatr__duplicate_meta_entries") + meta <- list( + create_epidata_field_info("time_value", "date"), + create_epidata_field_info("time_value", "date") + ) + expect_error( + create_epidata_call(endpoint, params, meta = meta), + class = "epidatr__duplicate_meta_entries" + ) })