Skip to content

Commit e08a693

Browse files
feat: supply tidyselection for sum_groups_epi_df (#657)
Co-authored-by: Logan C. Brooks <[email protected]>
1 parent 121cc88 commit e08a693

File tree

8 files changed

+84
-30
lines changed

8 files changed

+84
-30
lines changed

DESCRIPTION

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
Type: Package
22
Package: epiprocess
33
Title: Tools for basic signal processing in epidemiology
4-
Version: 0.11.3
4+
Version: 0.11.4
55
Authors@R: c(
66
person("Jacob", "Bien", role = "ctb"),
77
person("Logan", "Brooks", , "[email protected]", role = c("aut", "cre")),
@@ -105,6 +105,7 @@ Collate:
105105
'methods-epi_archive.R'
106106
'grouped_epi_archive.R'
107107
'growth_rate.R'
108+
'inline-roxygen.R'
108109
'key_colnames.R'
109110
'methods-epi_df.R'
110111
'outliers.R'

NEWS.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicat
5858
- Various functions are now faster, using faster variants of core operations and
5959
avoiding reconstructing grouped `epi_df`s when unnecessary.
6060
- Add `autoplot.epi_archive()` to display revision patterns.
61+
- `sum_groups_epi_df()` now supports tidyselect syntax in it's second argument (#655).
6162

6263
## Bug fixes
6364

R/inline-roxygen.R

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
# Helpers here are meant to be used inside inline R expressions within roxygen2
2+
# documentation when @template is inappropriate.
3+
4+
#' Description of a single arg that tidyselects value variables
5+
#'
6+
#' Not meant for when describing tidyselect `...`.
7+
#'
8+
#' @keywords internal
9+
tidyselect_arg_roxygen <- '
10+
<[`tidy-select`][dplyr_tidy_select]> An unquoted column
11+
name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
12+
[other tidy-select expression][tidyselect::language], or a vector of
13+
characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
14+
they were positions in the data frame, so expressions like `x:y` can be
15+
used to select a range of variables.
16+
'

R/methods-epi_df.R

Lines changed: 25 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -499,34 +499,40 @@ group_epi_df <- function(x, exclude = character()) {
499499
#' the resulting `epi_df` will have `geo_value` set to `"total"`.
500500
#'
501501
#' @param .x an `epi_df`
502-
#' @param sum_cols character vector of the columns to aggregate
502+
#' @param sum_cols `r tidyselect_arg_roxygen`
503503
#' @param group_cols character vector of column names to group by. "time_value" is
504-
#' included by default.
504+
#' included by default.
505505
#' @return an `epi_df` object
506506
#'
507+
#' @examples
508+
#' # This data has other_keys age_group and edu_qual:
509+
#' grad_employ_subset
510+
#'
511+
#' # Aggregate num_graduates within each geo_value (and time_value):
512+
#' grad_employ_subset %>%
513+
#' sum_groups_epi_df(num_graduates, group_cols = "geo_value")
514+
#'
507515
#' @export
508-
sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character()) {
516+
sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
509517
assert_class(.x, "epi_df")
510-
assert_character(sum_cols)
511518
assert_character(group_cols)
512-
checkmate::assert_subset(sum_cols, setdiff(names(.x), key_colnames(.x)))
513519
checkmate::assert_subset(group_cols, key_colnames(.x))
514520
if (!"time_value" %in% group_cols) {
515521
group_cols <- c("time_value", group_cols)
516522
}
517-
518-
out <- .x %>%
519-
group_by(across(all_of(group_cols))) %>%
520-
dplyr::summarize(across(all_of(sum_cols), sum), .groups = "drop")
523+
# Attempt tidyselection ourselves to get "Error in `sum_groups_epi_df()`"
524+
# rather than "in `dplyr::summarize()`", before forwarding:
525+
sum_cols <- rlang::enquo(sum_cols)
526+
tidyselect::eval_select(sum_cols, .x)
527+
out <- group_by(.x, across(all_of(group_cols))) %>%
528+
dplyr::summarize(across(!!sum_cols, sum), .groups = "drop")
521529

522530
# To preserve epi_df-ness, we need to ensure that the `geo_value` column is
523531
# present.
524-
out <- if (!"geo_value" %in% group_cols) {
525-
out %>%
532+
if (!"geo_value" %in% group_cols) {
533+
out <- out %>%
526534
mutate(geo_value = "total") %>%
527-
relocate(geo_value, .before = 1)
528-
} else {
529-
out
535+
relocate(.data$geo_value, .before = 1)
530536
}
531537

532538
# The `geo_type` will be correctly inherited here by the following logic:
@@ -535,10 +541,10 @@ sum_groups_epi_df <- function(.x, sum_cols = "value", group_cols = character())
535541
# - if `geo_value` is not in `group_cols`, then the constructor will see
536542
# the unrecognizeable "total" value and will correctly infer the "custom"
537543
# geo_type.
538-
out %>%
539-
as_epi_df(
540-
as_of = attr(.x, "metadata")$as_of,
541-
other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols)
542-
) %>%
544+
as_epi_df(
545+
out,
546+
as_of = attr(.x, "metadata")$as_of,
547+
other_keys = intersect(attr(.x, "metadata")$other_keys, group_cols)
548+
) %>%
543549
arrange_canonical()
544550
}

R/slide.R

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -557,12 +557,7 @@ get_before_after_from_window <- function(window_size, align, time_type) {
557557
#' `vignette("epi_df")` for more examples.
558558
#'
559559
#' @template basic-slide-params
560-
#' @param .col_names <[`tidy-select`][dplyr_tidy_select]> An unquoted column
561-
#' name (e.g., `cases`), multiple column names (e.g., `c(cases, deaths)`),
562-
#' [other tidy-select expression][tidyselect::language], or a vector of
563-
#' characters (e.g. `c("cases", "deaths")`). Variable names can be used as if
564-
#' they were positions in the data frame, so expressions like `x:y` can be
565-
#' used to select a range of variables.
560+
#' @param .col_names `r tidyselect_arg_roxygen`
566561
#'
567562
#' The tidy-selection renaming interface is not supported, and cannot be used
568563
#' to provide output column names; if you want to customize the output column

man/sum_groups_epi_df.Rd

Lines changed: 16 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

man/tidyselect_arg_roxygen.Rd

Lines changed: 16 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-methods-epi_df.R

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -311,20 +311,25 @@ test_that("complete.epi_df works", {
311311
})
312312

313313
test_that("sum_groups_epi_df works", {
314-
out <- toy_epi_df %>% sum_groups_epi_df(sum_cols = "x")
314+
out <- toy_epi_df %>% sum_groups_epi_df("x")
315315
expected_out <- toy_epi_df %>%
316316
group_by(time_value) %>%
317317
summarize(x = sum(x)) %>%
318318
mutate(geo_value = "total") %>%
319319
as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of)
320320
expect_equal(out, expected_out)
321+
out <- toy_epi_df %>% sum_groups_epi_df(x)
322+
expect_equal(out, expected_out)
321323

322324
out <- toy_epi_df %>%
323-
sum_groups_epi_df(sum_cols = c("x", "y"), group_cols = c("time_value", "geo_value", "indic_var1"))
325+
sum_groups_epi_df(c(x, y), group_cols = c("time_value", "geo_value", "indic_var1"))
324326
expected_out <- toy_epi_df %>%
325327
group_by(time_value, geo_value, indic_var1) %>%
326328
summarize(x = sum(x), y = sum(y), .groups = "drop") %>%
327329
as_epi_df(as_of = attr(toy_epi_df, "metadata")$as_of, other_keys = "indic_var1") %>%
328330
arrange_canonical()
329331
expect_equal(out, expected_out)
332+
out <- toy_epi_df %>%
333+
sum_groups_epi_df(x:y, group_cols = c("time_value", "geo_value", "indic_var1"))
334+
expect_equal(out, expected_out)
330335
})

0 commit comments

Comments
 (0)