diff --git a/DESCRIPTION b/DESCRIPTION index 19bc82e0..1b1573a5 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,7 +1,7 @@ Type: Package Package: epiprocess Title: Tools for basic signal processing in epidemiology -Version: 0.11.3 +Version: 0.11.4 Authors@R: c( person("Jacob", "Bien", role = "ctb"), person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")), @@ -104,6 +104,7 @@ Collate: 'methods-epi_archive.R' 'grouped_epi_archive.R' 'growth_rate.R' + 'inline-roxygen.R' 'key_colnames.R' 'methods-epi_df.R' 'outliers.R' diff --git a/NEWS.md b/NEWS.md index 88acabee..2734419b 100644 --- a/NEWS.md +++ b/NEWS.md @@ -58,6 +58,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat - Various functions are now faster, using faster variants of core operations and avoiding reconstructing grouped `epi_df`s when unnecessary. - Add `autoplot.epi_archive()` to display revision patterns. +- `sum_groups_epi_df()` now supports tidyselect syntax in it's second argument (#655). ## Bug fixes diff --git a/R/inline-roxygen.R b/R/inline-roxygen.R new file mode 100644 index 00000000..ae2ce66c --- /dev/null +++ b/R/inline-roxygen.R @@ -0,0 +1,16 @@ +# Helpers here are meant to be used inside inline R expressions within roxygen2 +# documentation when @template is inappropriate. + +#' Description of a single arg that tidyselects value variables +#' +#' Not meant for when describing tidyselect `...`. +#' +#' @keywords internal +tidyselect_arg_roxygen <- ' + <[`tidy-select`][dplyr_tidy_select]> An unquoted column + name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`), + [other tidy-select expression][tidyselect::language], or a vector of + characters (e.g. `c("cases", "deaths")`). Variable names can be used as if + they were positions in the data frame, so expressions like `x:y` can be + used to select a range of variables. +' diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R index 1191521c..7870dede 100644 --- a/R/methods-epi_df.R +++ b/R/methods-epi_df.R @@ -499,34 +499,40 @@ group_epi_df <- function(x, exclude = character()) { #' the resulting `epi_df` will have `geo_value` set to `"total"`. #' #' @param .x an `epi_df` -#' @param sum_cols character vector of the columns to aggregate +#' @param sum_cols `r tidyselect_arg_roxygen` #' @param group_cols character vector of column names to group by. "time_value" is -#' included by default. +#' included by default. #' @return an `epi_df` object #' +#' @examples +#' # This data has other_keys age_group and edu_qual: +#' grad_employ_subset +#' +#' # Aggregate num_graduates within each geo_value (and time_value): +#' grad_employ_subset %>% +#' sum_groups_epi_df(num_graduates, group_cols = "geo_value") +#' #' @export -sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character()) { +sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") { assert_class(.x, "epi_df") - assert_character(sum_cols) assert_character(group_cols) - checkmate::assert_subset(sum_cols, setdiff(names(.x), key_colnames(.x))) checkmate::assert_subset(group_cols, key_colnames(.x)) if (!"time_value" %in% group_cols) { group_cols <- c("time_value", group_cols) } - - out <- .x %>% - group_by(across(all_of(group_cols))) %>% - dplyr::summarize(across(all_of(sum_cols), sum), .groups = "drop") + # Attempt tidyselection ourselves to get "Error in `sum_groups_epi_df()`" + # rather than "in `dplyr::summarize()`", before forwarding: + sum_cols <- rlang::enquo(sum_cols) + tidyselect::eval_select(sum_cols, .x) + out <- group_by(.x, across(all_of(group_cols))) %>% + dplyr::summarize(across(!!sum_cols, sum), .groups = "drop") # To preserve epi_df-ness, we need to ensure that the `geo_value` column is # present. - out <- if (!"geo_value" %in% group_cols) { - out %>% + if (!"geo_value" %in% group_cols) { + out <- out %>% mutate(geo_value = "total") %>% - relocate(geo_value, .before = 1) - } else { - out + relocate(.data$geo_value, .before = 1) } # The `geo_type` will be correctly inherited here by the following logic: @@ -535,10 +541,10 @@ sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character()) # - if `geo_value` is not in `group_cols`, then the constructor will see # the unrecognizeable "total" value and will correctly infer the "custom" # geo_type. - out %>% - as_epi_df( - as_of = attr(.x, "metadata")$as_of, - other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols) - ) %>% + as_epi_df( + out, + as_of = attr(.x, "metadata")$as_of, + other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols) + ) %>% arrange_canonical() } diff --git a/R/slide.R b/R/slide.R index be000f57..6be54baa 100644 --- a/R/slide.R +++ b/R/slide.R @@ -557,12 +557,7 @@ get_before_after_from_window <- function(window_size, align, time_type) { #' `vignette("epi_df")` for more examples. #' #' @template basic-slide-params -#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column -#' name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`), -#' [other tidy-select expression][tidyselect::language], or a vector of -#' characters (e.g. `c("cases", "deaths")`). Variable names can be used as if -#' they were positions in the data frame, so expressions like `x:y` can be -#' used to select a range of variables. +#' @param .col_names `r tidyselect_arg_roxygen` #' #' The tidy-selection renaming interface is not supported, and cannot be used #' to provide output column names; if you want to customize the output column diff --git a/man/sum_groups_epi_df.Rd b/man/sum_groups_epi_df.Rd index f1ba8474..34ec9993 100644 --- a/man/sum_groups_epi_df.Rd +++ b/man/sum_groups_epi_df.Rd @@ -4,12 +4,17 @@ \alias{sum_groups_epi_df} \title{Aggregate an \code{epi_df} object} \usage{ -sum_groups_epi_df(.x, sum_cols = "value", group_cols = character()) +sum_groups_epi_df(.x, sum_cols, group_cols = "time_value") } \arguments{ \item{.x}{an \code{epi_df}} -\item{sum_cols}{character vector of the columns to aggregate} +\item{sum_cols}{<\code{\link[=dplyr_tidy_select]{tidy-select}}> An unquoted column +name (e.g., \code{cases}), multiple column names (e.g., \code{c(cases, deaths)}), +\link[tidyselect:language]{other tidy-select expression}, or a vector of +characters (e.g. \code{c("cases", "deaths")}). Variable names can be used as if +they were positions in the data frame, so expressions like \code{x:y} can be +used to select a range of variables.} \item{group_cols}{character vector of column names to group by. "time_value" is included by default.} @@ -22,3 +27,12 @@ Aggregates an \code{epi_df} object by the specified group columns, summing the \code{value} column, and returning an \code{epi_df}. If aggregating over \code{geo_value}, the resulting \code{epi_df} will have \code{geo_value} set to \code{"total"}. } +\examples{ +# This data has other_keys age_group and edu_qual: +grad_employ_subset + +# Aggregate num_graduates within each geo_value (and time_value): +grad_employ_subset \%>\% + sum_groups_epi_df(num_graduates, group_cols = "geo_value") + +} diff --git a/man/tidyselect_arg_roxygen.Rd b/man/tidyselect_arg_roxygen.Rd new file mode 100644 index 00000000..27cb264d --- /dev/null +++ b/man/tidyselect_arg_roxygen.Rd @@ -0,0 +1,16 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/inline-roxygen.R +\docType{data} +\name{tidyselect_arg_roxygen} +\alias{tidyselect_arg_roxygen} +\title{Description of a single arg that tidyselects value variables} +\format{ +An object of class \code{character} of length 1. +} +\usage{ +tidyselect_arg_roxygen +} +\description{ +Not meant for when describing tidyselect \code{...}. +} +\keyword{internal} diff --git a/tests/testthat/test-methods-epi_df.R b/tests/testthat/test-methods-epi_df.R index 3e5c180b..bc9f1e35 100644 --- a/tests/testthat/test-methods-epi_df.R +++ b/tests/testthat/test-methods-epi_df.R @@ -311,20 +311,25 @@ test_that("complete.epi_df works", { }) test_that("sum_groups_epi_df works", { - out <- toy_epi_df %>% sum_groups_epi_df(sum_cols = "x") + out <- toy_epi_df %>% sum_groups_epi_df("x") expected_out <- toy_epi_df %>% group_by(time_value) %>% summarize(x = sum(x)) %>% mutate(geo_value = "total") %>% as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of) expect_equal(out, expected_out) + out <- toy_epi_df %>% sum_groups_epi_df(x) + expect_equal(out, expected_out) out <- toy_epi_df %>% - sum_groups_epi_df(sum_cols = c("x", "y"), group_cols = c("time_value", "geo_value", "indic_var1")) + sum_groups_epi_df(c(x, y), group_cols = c("time_value", "geo_value", "indic_var1")) expected_out <- toy_epi_df %>% group_by(time_value, geo_value, indic_var1) %>% summarize(x = sum(x), y = sum(y), .groups = "drop") %>% as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of, other_keys = "indic_var1") %>% arrange_canonical() expect_equal(out, expected_out) + out <- toy_epi_df %>% + sum_groups_epi_df(x:y, group_cols = c("time_value", "geo_value", "indic_var1")) + expect_equal(out, expected_out) })