From 8546548612f8c09c61c11abcf694b0c012542695 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 10:38:39 -0700
Subject: [PATCH 01/15] feat: add filter.epi_archive

---
 NAMESPACE                 |   1 +
 NEWS.md                   |   6 ++
 R/methods-epi_archive.R   | 116 ++++++++++++++++++++++++++++++++++++++
 man/filter.epi_archive.Rd |  61 ++++++++++++++++++++
 4 files changed, 184 insertions(+)
 create mode 100644 man/filter.epi_archive.Rd

diff --git a/NAMESPACE b/NAMESPACE
index 2f97e5b3..3ad5e252 100644
--- a/NAMESPACE
+++ b/NAMESPACE
@@ -31,6 +31,7 @@ S3method(epix_slide,epi_archive)
 S3method(epix_slide,grouped_epi_archive)
 S3method(epix_truncate_versions_after,epi_archive)
 S3method(epix_truncate_versions_after,grouped_epi_archive)
+S3method(filter,epi_archive)
 S3method(group_by,epi_archive)
 S3method(group_by,epi_df)
 S3method(group_by,grouped_epi_archive)
diff --git a/NEWS.md b/NEWS.md
index 3ac814aa..50af8535 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -2,6 +2,12 @@
 
 Pre-1.0.0 numbering scheme: 0.x will indicate releases, while 0.x.y will indicate PR's.
 
+# epiprocess 0.12
+
+## New features
+
+- Added `dplyr::filter` implementation for `epi_archive`s.
+
 # epiprocess 0.11
 
 ## Breaking changes
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index a8421efc..5b93ba2b 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -984,3 +984,119 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
   attr(data, "epiprocess::col_modify_recorder_df::cols") <- cols
   data
 }
+
+
+
+#' [`dplyr::filter`] for `epi_archive`s
+#'
+#' @param .data an `epi_archive`
+#' @param ... as in [`dplyr::filter`]; using the `version` column is not allowed
+#'   unless you use `.format_aware = TRUE`; see details.
+#' @param .by as in [`dplyr::filter`]
+#' @param .format_aware optional, `TRUE` or `FALSE`; default `FALSE`. See
+#'   details.
+#'
+#' @details
+#'
+#' By default, using the `version` column is disabled as it's easy to
+#' get unexpected results. See if either [`epix_as_of`] or [`epix_slide`]
+#' works as an alternative. If they don't cover your use case, then you can
+#' set `.format_aware = TRUE` to enable usage of the `version` column, but be
+#' careful to:
+#' * Factor in that `.data$DT` may be using a "compact" format based on diffing
+#'   consecutive versions; see details of [`as_epi_archive`]
+#' * Set `clobberable_versions_start` and `versions_end` of the result
+#'   appropriately after the `filter` call. They will be initialized with the
+#'   same values as in `.data`.
+#'
+#' `dplyr::filter` also has an optional argument `.preserve`, which should not
+#' have an impact on (ungrouped) `epi_archive`s, and `grouped_epi_archive`s do
+#' not currently support `dplyr::filter`.
+#'
+#' @examples
+#'
+#' # Filter to one location and a particular time range:
+#' archive_cases_dv_subset %>%
+#'   filter(geo_value == "fl", time_value >= as.Date("2020-10-01"))
+#'
+#' # Convert to weekly by taking the Saturday data for each week, so that
+#' # `case_rate_7d_av` represents a Sun--Sat average:
+#' archive_cases_dv_subset %>%
+#'   filter(as.POSIXlt(time_value)$wday == 6L)
+#'
+#' # Filtering involving versions requires extra care. See epix_as_of and
+#' # epix_slide instead for some common operations. One semi-common operation
+#' # that ends up being fairly simple is treating observations as finalized
+#' # after some amount of time, and ignoring any revisions that were made after
+#' # that point:
+#' archive_cases_dv_subset %>%
+#'   filter(version <= time_value + as.difftime(60, units = "days"),
+#'     .format_aware = TRUE
+#'   )
+#'
+#' @export
+filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
+  in_tbl <- tibble::as_tibble(as.list(.data$DT), .name_repair = "minimal")
+  if (.format_aware) {
+    out_tbl <- in_tbl %>%
+      filter(..., .by = .by)
+  } else {
+    out_tbl <- in_tbl %>%
+      filter(
+        # Add our own fake filter arg to the user's ..., to update the data mask
+        # to prevent `version` column usage.
+        {
+          # We should be evaluating inside the data mask. To disable both
+          # `version` and `.data$version`, we need to go to the data mask's
+          # ------
+          e <- environment()
+          while (!identical(e, globalenv()) && !identical(e, emptyenv())) {
+            if ("version" %in% names(e)) {
+              # "version" is expected to be an active binding, and directly
+              # assigning over it has issues; explicitly `rm` first.
+              rm(list = "version", envir = e)
+              delayedAssign("version", cli::cli_abort(c(
+                "Using `version` in `filter` may produce unexpected results.",
+                ">" = "See if `epix_as_of` or `epix_slide` would work instead.",
+                ">" = "If not, see `?filter.epi_archive` details for how to proceed."
+              )), assign.env = e)
+              break
+            }
+            e <- parent.env(e)
+          }
+          TRUE
+        },
+        ...,
+        .by = .by
+      )
+  }
+  out_geo_type <-
+    if (.data$geo_type == "custom") {
+      # We might be going from a multi-resolution to single-resolution archive;
+      # e.g. national+state -> state; try to re-infer:
+      guess_geo_type(out_tbl$geo_value)
+    } else {
+      # We risk less-understandable inference failures such as inferring "hhs"
+      # from selecting hrr 10 data; just use the old geo_type:
+      .data$geo_type
+    }
+  # We might be going from daily to weekly; re-infer:
+  out_time_type <- guess_time_type(out_tbl$time_value)
+  # Even if they narrow down to just a single value of an other_keys column,
+  # it's probably still better (& simpler) to treat it as an other_keys column
+  # since it still exists in the result:
+  out_other_keys <- .data$other_keys
+  # `filter` makes no guarantees about not aliasing columns in its result when
+  # the filter condition is all TRUE, so don't setDT.
+  out_dtbl <- as.data.table(out_tbl, key = out_other_keys)
+  result <- new_epi_archive(
+    out_dtbl,
+    out_geo_type, out_time_type, out_other_keys,
+    # Assume version-related metadata unchanged; part of why we want to push
+    # back on filter expressions like `.data$version <= .env$as_of`:
+    .data$clobberable_versions_start, .data$versions_end
+  )
+  # Filtering down rows while keeping all (ukey) columns should preserve ukey
+  # uniqueness.
+  result
+}
diff --git a/man/filter.epi_archive.Rd b/man/filter.epi_archive.Rd
new file mode 100644
index 00000000..b0012bd2
--- /dev/null
+++ b/man/filter.epi_archive.Rd
@@ -0,0 +1,61 @@
+% Generated by roxygen2: do not edit by hand
+% Please edit documentation in R/methods-epi_archive.R
+\name{filter.epi_archive}
+\alias{filter.epi_archive}
+\title{\code{\link[dplyr:filter]{dplyr::filter}} for \code{epi_archive}s}
+\usage{
+\method{filter}{epi_archive}(.data, ..., .by = NULL, .format_aware = FALSE)
+}
+\arguments{
+\item{.data}{an \code{epi_archive}}
+
+\item{...}{as in \code{\link[dplyr:filter]{dplyr::filter}}; using the \code{version} column is not allowed
+unless you use \code{.format_aware = TRUE}; see details.}
+
+\item{.by}{as in \code{\link[dplyr:filter]{dplyr::filter}}}
+
+\item{.format_aware}{optional, \code{TRUE} or \code{FALSE}; default \code{FALSE}. See
+details.}
+}
+\description{
+\code{\link[dplyr:filter]{dplyr::filter}} for \code{epi_archive}s
+}
+\details{
+By default, using the \code{version} column is disabled as it's easy to
+get unexpected results. See if either \code{\link{epix_as_of}} or \code{\link{epix_slide}}
+works as an alternative. If they don't cover your use case, then you can
+set \code{.format_aware = TRUE} to enable usage of the \code{version} column, but be
+careful to:
+\itemize{
+\item Factor in that \code{.data$DT} may be using a "compact" format based on diffing
+consecutive versions; see details of \code{\link{as_epi_archive}}
+\item Set \code{clobberable_versions_start} and \code{versions_end} of the result
+appropriately after the \code{filter} call. They will be initialized with the
+same values as in \code{.data}.
+}
+
+\code{dplyr::filter} also has an optional argument \code{.preserve}, which should not
+have an impact on (ungrouped) \code{epi_archive}s, and \code{grouped_epi_archive}s do
+not currently support \code{dplyr::filter}.
+}
+\examples{
+
+# Filter to one location and a particular time range:
+archive_cases_dv_subset \%>\%
+  filter(geo_value == "fl", time_value >= as.Date("2020-10-01"))
+
+# Convert to weekly by taking the Saturday data for each week, so that
+# `case_rate_7d_av` represents a Sun--Sat average:
+archive_cases_dv_subset \%>\%
+  filter(as.POSIXlt(time_value)$wday == 6L)
+
+# Filtering involving versions requires extra care. See epix_as_of and
+# epix_slide instead for some common operations. One semi-common operation
+# that ends up being fairly simple is treating observations as finalized
+# after some amount of time, and ignoring any revisions that were made after
+# that point:
+archive_cases_dv_subset \%>\%
+  filter(version <= time_value + as.difftime(60, units = "days"),
+         .format_aware = TRUE)
+
+}

From 9a4a6018ba0ddba0a939c47dd34f682e7c63a736 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 17:35:45 -0700
Subject: [PATCH 02/15] In filter.epi_archive, favor avoiding incorrect
 geo_type reinferences

---
 R/methods-epi_archive.R | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 5b93ba2b..5c2534e3 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1070,16 +1070,11 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
         .by = .by
       )
   }
-  out_geo_type <-
-    if (.data$geo_type == "custom") {
-      # We might be going from a multi-resolution to single-resolution archive;
-      # e.g. national+state -> state; try to re-infer:
-      guess_geo_type(out_tbl$geo_value)
-    } else {
-      # We risk less-understandable inference failures such as inferring "hhs"
-      # from selecting hrr 10 data; just use the old geo_type:
-      .data$geo_type
-    }
+  # We could try to re-infer the geo_type, e.g., when filtering from
+  # national+state to just state. However, we risk inference failures such as
+  # "hrr" -> "hhs" from filtering to hrr 10, or "custom" -> USA-related when
+  # working with non-USA data:
+  out_geo_type <- .data$geo_type
   # We might be going from daily to weekly; re-infer:
   out_time_type <- guess_time_type(out_tbl$time_value)
   # Even if they narrow down to just a single value of an other_keys column,

From 1f2bd85386c6fe00a510efedbc32a27e054c3cc4 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 2 Apr 2025 18:08:24 -0700
Subject: [PATCH 03/15] feat(filter.epi_archive): also guard against using
 measurement columns

---
 R/methods-epi_archive.R   | 53 +++++++++++++++++++++++++--------------
 man/filter.epi_archive.Rd | 32 ++++++++++++-----------
 2 files changed, 52 insertions(+), 33 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 5c2534e3..5357212f 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -998,13 +998,15 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
 #'
 #' @details
 #'
-#' By default, using the `version` column is disabled as it's easy to
-#' get unexpected results. See if either [`epix_as_of`] or [`epix_slide`]
-#' works as an alternative. If they don't cover your use case, then you can
-#' set `.format_aware = TRUE` to enable usage of the `version` column, but be
-#' careful to:
-#' * Factor in that `.data$DT` may be using a "compact" format based on diffing
-#'   consecutive versions; see details of [`as_epi_archive`]
+#' By default, using the `version` column or measurement columns is disabled as
+#' it's easy to get unexpected results. See if either [`epix_as_of`] or
+#' [`epix_slide`] works as an alternative. If they don't cover your use case,
+#' then you can set `.format_aware = TRUE` to enable usage of these columns, but
+#' be careful to:
+#' * Factor in that `.data$DT` may have been converted into a compact format
+#'   based on diffing consecutive versions, and the last version of each
+#'   observation in `.data$DT` will always be carried forward to future
+#'   `version`s`; see details of [`as_epi_archive`].
 #' * Set `clobberable_versions_start` and `versions_end` of the result
 #'   appropriately after the `filter` call. They will be initialized with the
 #'   same values as in `.data`.
@@ -1024,13 +1026,14 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
 #' archive_cases_dv_subset %>%
 #'   filter(as.POSIXlt(time_value)$wday == 6L)
 #'
-#' # Filtering involving versions requires extra care. See epix_as_of and
-#' # epix_slide instead for some common operations. One semi-common operation
-#' # that ends up being fairly simple is treating observations as finalized
-#' # after some amount of time, and ignoring any revisions that were made after
-#' # that point:
+#' # Filtering involving the `version` column or measurement columns requires
+#' # extra care. See epix_as_of and epix_slide instead for some common
+#' # operations. One semi-common operation that ends up being fairly simple is
+#' # treating observations as finalized after some amount of time, and ignoring
+#' # any revisions that were made after that point:
 #' archive_cases_dv_subset %>%
-#'   filter(version <= time_value + as.difftime(60, units = "days"),
+#'   filter(
+#'     version <= time_value + as.difftime(60, units = "days"),
 #'     .format_aware = TRUE
 #'   )
 #'
@@ -1041,25 +1044,37 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
     out_tbl <- in_tbl %>%
       filter(..., .by = .by)
   } else {
+    measurement_colnames <- setdiff(names(.data$DT), key_colnames(.data))
+    forbidden_colnames <- c("version", measurement_colnames)
     out_tbl <- in_tbl %>%
       filter(
         # Add our own fake filter arg to the user's ..., to update the data mask
         # to prevent `version` column usage.
         {
           # We should be evaluating inside the data mask. To disable both
-          # `version` and `.data$version`, we need to go to the data mask's
-          # ------
+          # `version` and `.data$version` etc., we need to go to the ancestor
+          # environment containing the data mask's column bindings. This is
+          # likely just the parent env, but search to make sure, in a way akin
+          # to `<<-`:
           e <- environment()
           while (!identical(e, globalenv()) && !identical(e, emptyenv())) {
             if ("version" %in% names(e)) {
-              # "version" is expected to be an active binding, and directly
-              # assigning over it has issues; explicitly `rm` first.
-              rm(list = "version", envir = e)
+              # This is where the column bindings are. Replace the forbidden ones.
+              # They are expected to be active bindings, so directly
+              # assigning has issues; `rm` first.
+              rm(list = forbidden_colnames, envir = e)
               delayedAssign("version", cli::cli_abort(c(
-                "Using `version` in `filter` may produce unexpected results.",
+                "Using `version` in `filter.epi_archive` may produce unexpected results.",
                 ">" = "See if `epix_as_of` or `epix_slide` would work instead.",
                 ">" = "If not, see `?filter.epi_archive` details for how to proceed."
               )), assign.env = e)
+              for (measurement_colname in measurement_colnames) {
+                delayedAssign(measurement_colname, cli::cli_abort(c(
+                  "Using `{format_varname(measurement_colname)}`
+                 in `filter.epi_archive` may produce unexpected results.",
+                  ">" = "See `?filter.epi_archive` details for how to proceed."
+                )), assign.env = e)
+              }
               break
             }
             e <- parent.env(e)
diff --git a/man/filter.epi_archive.Rd b/man/filter.epi_archive.Rd
index b0012bd2..a007bdc5 100644
--- a/man/filter.epi_archive.Rd
+++ b/man/filter.epi_archive.Rd
@@ -21,14 +21,16 @@ details.}
 \code{\link[dplyr:filter]{dplyr::filter}} for \code{epi_archive}s
 }
 \details{
-By default, using the \code{version} column is disabled as it's easy to
-get unexpected results. See if either \code{\link{epix_as_of}} or \code{\link{epix_slide}}
-works as an alternative. If they don't cover your use case, then you can
-set \code{.format_aware = TRUE} to enable usage of the \code{version} column, but be
-careful to:
+By default, using the \code{version} column or measurement columns is disabled as
+it's easy to get unexpected results. See if either \code{\link{epix_as_of}} or
+\code{\link{epix_slide}} works as an alternative. If they don't cover your use case,
+then you can set \code{.format_aware = TRUE} to enable usage of these columns, but
+be careful to:
 \itemize{
-\item Factor in that \code{.data$DT} may be using a "compact" format based on diffing
-consecutive versions; see details of \code{\link{as_epi_archive}}
+\item Factor in that \code{.data$DT} may have been converted into a compact format
+based on diffing consecutive versions, and the last version of each
+observation in \code{.data$DT} will always be carried forward to future
+\code{version}s\verb{; see details of [}as_epi_archive`].
 \item Set \code{clobberable_versions_start} and \code{versions_end} of the result
 appropriately after the \code{filter} call. They will be initialized with the
 same values as in \code{.data}.
@@ -49,13 +51,15 @@ archive_cases_dv_subset \%>\%
 archive_cases_dv_subset \%>\%
   filter(as.POSIXlt(time_value)$wday == 6L)
 
-# Filtering involving versions requires extra care. See epix_as_of and
-# epix_slide instead for some common operations. One semi-common operation
-# that ends up being fairly simple is treating observations as finalized
-# after some amount of time, and ignoring any revisions that were made after
-# that point:
+# Filtering involving the `version` column or measurement columns requires
+# extra care. See epix_as_of and epix_slide instead for some common
+# operations. One semi-common operation that ends up being fairly simple is
+# treating observations as finalized after some amount of time, and ignoring
+# any revisions that were made after that point:
 archive_cases_dv_subset \%>\%
-  filter(version <= time_value + as.difftime(60, units = "days"),
-         .format_aware = TRUE)
+  filter(
+    version <= time_value + as.difftime(60, units = "days"),
+    .format_aware = TRUE
+  )
 
 }

From 319842952ddb4912d6a7650c505a324753838db4 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Thu, 3 Apr 2025 15:19:35 -0700
Subject: [PATCH 04/15] Test filter.epi_archive, fix&tweak some behaviors

---
 R/methods-epi_archive.R                   |  19 ++--
 tests/testthat/test-methods-epi_archive.R | 100 ++++++++++++++++++++++
 2 files changed, 113 insertions(+), 6 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 5357212f..8595e316 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1042,7 +1042,7 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
   in_tbl <- tibble::as_tibble(as.list(.data$DT), .name_repair = "minimal")
   if (.format_aware) {
     out_tbl <- in_tbl %>%
-      filter(..., .by = .by)
+      filter(..., .by = {{.by}})
   } else {
     measurement_colnames <- setdiff(names(.data$DT), key_colnames(.data))
     forbidden_colnames <- c("version", measurement_colnames)
@@ -1067,13 +1067,13 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
                 "Using `version` in `filter.epi_archive` may produce unexpected results.",
                 ">" = "See if `epix_as_of` or `epix_slide` would work instead.",
                 ">" = "If not, see `?filter.epi_archive` details for how to proceed."
-              )), assign.env = e)
+              ), class = "epiprocess__filter_archive__used_version"), assign.env = e)
               for (measurement_colname in measurement_colnames) {
                 delayedAssign(measurement_colname, cli::cli_abort(c(
                   "Using `{format_varname(measurement_colname)}`
                  in `filter.epi_archive` may produce unexpected results.",
                   ">" = "See `?filter.epi_archive` details for how to proceed."
-                )), assign.env = e)
+                ), class = "epiprocess__filter_archive__used_measurement"), assign.env = e)
               }
               break
             }
@@ -1082,7 +1082,7 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
           TRUE
         },
         ...,
-        .by = .by
+        .by = {{.by}}
       )
   }
   # We could try to re-infer the geo_type, e.g., when filtering from
@@ -1090,8 +1090,15 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
   # "hrr" -> "hhs" from filtering to hrr 10, or "custom" -> USA-related when
   # working with non-USA data:
   out_geo_type <- .data$geo_type
-  # We might be going from daily to weekly; re-infer:
-  out_time_type <- guess_time_type(out_tbl$time_value)
+  if (.data$time_type == "day") {
+    # We might be going from daily to weekly; re-infer:
+    out_time_type <- guess_time_type(out_tbl$time_value)
+  } else {
+    # We might be filtering weekly to a single time_value; avoid re-inferring to
+    # stay "week". Or in other cases, just skip inferring, as re-inferring is
+    # expected to match the input time_type:
+    out_time_type <- .data$time_type
+  }
   # Even if they narrow down to just a single value of an other_keys column,
   # it's probably still better (& simpler) to treat it as an other_keys column
   # since it still exists in the result:
diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index 45ba6ea1..9c80ffa1 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -128,3 +128,103 @@ test_that("group_vars works as expected", {
     "geo_value"
   )
 })
+
+test_that("filter.epi_archive works as expected", {
+
+  ea2 <- ea2_data %>%
+    as_epi_archive()
+
+  # Some basic output value checks:
+
+  expect_equal(
+    ea2 %>% filter(geo_value == "tn"),
+    new_epi_archive(
+      ea2$DT[FALSE],
+      ea2$geo_type, ea2$time_type, ea2$other_keys,
+      ea2$clobberable_versions_start, ea2$versions_end
+    )
+  )
+
+  expect_equal(
+    ea2 %>% filter(geo_value == "ca", time_value == as.Date("2020-06-02")),
+    new_epi_archive(
+      data.table::data.table(geo_value = "ca", time_value = as.Date("2020-06-02"),
+                             version = as.Date("2020-06-02") + 0:2, cases = 0:2),
+      ea2$geo_type, ea2$time_type, ea2$other_keys,
+      ea2$clobberable_versions_start, ea2$versions_end
+    )
+  )
+
+  # Output geo_type and time_type behavior:
+
+  hrr_day_ea <- tibble(
+    geo_value = c(rep(1, 14), 100),
+    time_value = as.Date("2020-01-01") - 1 + c(1:14, 14),
+    version = time_value + 3,
+    value = 1:15
+  ) %>%
+    as_epi_archive()
+
+  expect_equal(hrr_day_ea$geo_type, "hrr")
+  expect_equal(hrr_day_ea$time_type, "day")
+
+  hrr_week_ea <- hrr_day_ea %>%
+    filter(geo_value == 1, as.POSIXlt(time_value)$wday == 6L)
+
+  expect_equal(hrr_week_ea$geo_type, "hrr")
+  expect_equal(hrr_week_ea$time_type, "week")
+
+  hrr_one_week_ea <- hrr_week_ea %>%
+    filter(time_value == time_value[[1]])
+
+  expect_equal(hrr_one_week_ea$time_type, "week")
+
+  intcustom_day_ea <- hrr_day_ea
+  intcustom_day_ea$geo_type <- "custom"
+
+  intcustom_week_ea <- intcustom_day_ea %>%
+    filter(geo_value == 1, as.POSIXlt(time_value)$wday == 6L)
+
+  expect_equal(intcustom_week_ea$geo_type, "custom")
+  expect_equal(intcustom_week_ea$time_type, "week")
+
+  # Error-raising:
+  expect_error(
+    ea2 %>% filter(version == as.Date("2020-06-02")),
+    class = "epiprocess__filter_archive__used_version"
+  )
+  expect_error(
+    ea2 %>% filter(version <= as.Date("2020-06-02")),
+    class = "epiprocess__filter_archive__used_version"
+  )
+  expect_snapshot(
+    ea2 %>% filter(version <= as.Date("2020-06-02")),
+    error = TRUE, cnd_class = TRUE
+  )
+  expect_error(
+    ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
+    class = "epiprocess__filter_archive__used_measurement"
+  )
+  expect_snapshot(
+    ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
+    error = TRUE, cnd_class = TRUE
+  )
+  expect_error(
+    ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
+    class = "epiprocess__filter_archive__used_measurement"
+  )
+  expect_error(
+    ea2 %>% filter(cases >= median(cases), .by = geo_value),
+    class = "epiprocess__filter_archive__used_measurement"
+  )
+
+  # Escape hatch:
+  expect_equal(
+    ea2 %>%
+      filter(version <= time_value + as.difftime(1, units = "days"),
+             .format_aware = TRUE) %>%
+      .$DT,
+    ea2$DT[version <= time_value + as.difftime(1, units = "days"), ]
+  )
+
+})

From 24bda523f98b6a3c487d90180f37a93853586504 Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Thu, 3 Apr 2025 22:22:53 +0000
Subject: [PATCH 05/15] style: styler (GHA)

---
 R/methods-epi_archive.R                   |  4 ++--
 tests/testthat/test-methods-epi_archive.R | 11 ++++++-----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 8595e316..02cf2cf3 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1042,7 +1042,7 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
   in_tbl <- tibble::as_tibble(as.list(.data$DT), .name_repair = "minimal")
   if (.format_aware) {
     out_tbl <- in_tbl %>%
-      filter(..., .by = {{.by}})
+      filter(..., .by = {{ .by }})
   } else {
     measurement_colnames <- setdiff(names(.data$DT), key_colnames(.data))
     forbidden_colnames <- c("version", measurement_colnames)
@@ -1082,7 +1082,7 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
           TRUE
         },
         ...,
-        .by = {{.by}}
+        .by = {{ .by }}
       )
   }
   # We could try to re-infer the geo_type, e.g., when filtering from
diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index 9c80ffa1..6e08c990 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -130,7 +130,6 @@ test_that("group_vars works as expected", {
 })
 
 test_that("filter.epi_archive works as expected", {
-
   ea2 <- ea2_data %>%
     as_epi_archive()
 
@@ -148,8 +147,10 @@ test_that("filter.epi_archive works as expected", {
   expect_equal(
     ea2 %>% filter(geo_value == "ca", time_value == as.Date("2020-06-02")),
     new_epi_archive(
-      data.table::data.table(geo_value = "ca", time_value = as.Date("2020-06-02"),
-                             version = as.Date("2020-06-02") + 0:2, cases = 0:2),
+      data.table::data.table(
+        geo_value = "ca", time_value = as.Date("2020-06-02"),
+        version = as.Date("2020-06-02") + 0:2, cases = 0:2
+      ),
       ea2$geo_type, ea2$time_type, ea2$other_keys,
       ea2$clobberable_versions_start, ea2$versions_end
     )
@@ -222,9 +223,9 @@ test_that("filter.epi_archive works as expected", {
   expect_equal(
     ea2 %>%
       filter(version <= time_value + as.difftime(1, units = "days"),
-             .format_aware = TRUE) %>%
+        .format_aware = TRUE
+      ) %>%
       .$DT,
     ea2$DT[version <= time_value + as.difftime(1, units = "days"), ]
   )
-
 })

From 17a369cb6f6ab52ed1c4f765c0996a74dcd7d2e6 Mon Sep 17 00:00:00 2001
From: Dmitry Shemetov <dshemetov@ucdavis.edu>
Date: Tue, 15 Apr 2025 10:41:07 -0700
Subject: [PATCH 06/15] test: add environment variable test

---
 tests/testthat/test-methods-epi_archive.R | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index 6e08c990..2418af4e 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -189,6 +189,10 @@ test_that("filter.epi_archive works as expected", {
   expect_equal(intcustom_week_ea$geo_type, "custom")
   expect_equal(intcustom_week_ea$time_type, "week")
 
+  # Environment variables should be fine:
+  version <- as.Date("2020-06-02") + 1
+  expect_no_error(ea2 %>% filter(geo_value == "ca", .env$version <= time_value))
+
   # Error-raising:
   expect_error(
     ea2 %>% filter(version == as.Date("2020-06-02")),

From 80d787b9919b323787e05e00994dc349e762af91 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 11:16:12 -0700
Subject: [PATCH 07/15] fix(filter.epi_archive): for+lazy, left-behind bindings

---
 R/methods-epi_archive.R                      | 18 +++++++---
 tests/testthat/_snaps/methods-epi_archive.md | 35 ++++++++++++++++++++
 tests/testthat/test-methods-epi_archive.R    | 16 +++++++--
 3 files changed, 62 insertions(+), 7 deletions(-)
 create mode 100644 tests/testthat/_snaps/methods-epi_archive.md

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 02cf2cf3..4b443afa 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1069,16 +1069,24 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
                 ">" = "If not, see `?filter.epi_archive` details for how to proceed."
               ), class = "epiprocess__filter_archive__used_version"), assign.env = e)
               for (measurement_colname in measurement_colnames) {
-                delayedAssign(measurement_colname, cli::cli_abort(c(
-                  "Using `{format_varname(measurement_colname)}`
-                 in `filter.epi_archive` may produce unexpected results.",
-                  ">" = "See `?filter.epi_archive` details for how to proceed."
-                ), class = "epiprocess__filter_archive__used_measurement"), assign.env = e)
+                # Record current `measurement_colname` and set up delayed
+                # binding for error in a child environment, so that `for` loop
+                # updating its value and `rm` cleanup don't mess things up:
+                local({
+                  local_measurement_colname <- measurement_colname
+                  delayedAssign(measurement_colname, cli::cli_abort(c(
+                    "Using `{format_varname(local_measurement_colname)}`
+                     in `filter.epi_archive` may produce unexpected results.",
+                    ">" = "See `?filter.epi_archive` details for how to proceed."
+                  ), class = "epiprocess__filter_archive__used_measurement"), assign.env = e)
+                })
               }
               break
             }
             e <- parent.env(e)
           }
+          # Don't mask similarly-named user objects:
+          rm(list = c("e", "measurement_colname"))
           TRUE
         },
         ...,
diff --git a/tests/testthat/_snaps/methods-epi_archive.md b/tests/testthat/_snaps/methods-epi_archive.md
new file mode 100644
index 00000000..200e4202
--- /dev/null
+++ b/tests/testthat/_snaps/methods-epi_archive.md
@@ -0,0 +1,35 @@
+# filter.epi_archive works as expected
+
+    Code
+      ea2 %>% filter(version <= as.Date("2020-06-02"))
+    Condition <rlang_error>
+      Error in `filter()`:
+      i In argument: `version <= as.Date("2020-06-02")`.
+      Caused by error:
+      ! Using `version` in `filter.epi_archive` may produce unexpected results.
+      > See if `epix_as_of` or `epix_slide` would work instead.
+      > If not, see `?filter.epi_archive` details for how to proceed.
+
+---
+
+    Code
+      ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L)
+    Condition <rlang_error>
+      Error in `filter()`:
+      i In argument: `cases >= 2L`.
+      Caused by error:
+      ! Using `cases` in `filter.epi_archive` may produce unexpected results.
+      > See `?filter.epi_archive` details for how to proceed.
+
+---
+
+    Code
+      ea2p %>% filter(cases >= median(cases), .by = geo_value)
+    Condition <rlang_error>
+      Error in `filter()`:
+      i In argument: `cases >= median(cases)`.
+      i In group 1: `geo_value = "ca"`.
+      Caused by error:
+      ! Using `cases` in `filter.epi_archive` may produce unexpected results.
+      > See `?filter.epi_archive` details for how to proceed.
+
diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index 2418af4e..f9299d0f 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -191,7 +191,11 @@ test_that("filter.epi_archive works as expected", {
 
   # Environment variables should be fine:
   version <- as.Date("2020-06-02") + 1
-  expect_no_error(ea2 %>% filter(geo_value == "ca", .env$version <= time_value))
+  e <- version
+  expected <- ea2 %>% filter(geo_value == "ca", as.Date("2020-06-02") + 1 <= time_value)
+  expect_equal(ea2 %>% filter(geo_value == "ca", .env$version <= time_value), expected)
+  expect_equal(ea2 %>% filter(geo_value == "ca", e <= time_value), expected)
+  expect_equal(ea2 %>% filter(geo_value == "ca", .env$e <= time_value), expected)
 
   # Error-raising:
   expect_error(
@@ -218,10 +222,18 @@ test_that("filter.epi_archive works as expected", {
     ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
     class = "epiprocess__filter_archive__used_measurement"
   )
+  ea2p <- ea2_data %>%
+    # to check for `for` + `delayedAssign` mishap in expect_snapshot
+    mutate(deaths = 0) %>%
+    as_epi_archive()
   expect_error(
-    ea2 %>% filter(cases >= median(cases), .by = geo_value),
+    ea2p %>% filter(cases >= median(cases), .by = geo_value),
     class = "epiprocess__filter_archive__used_measurement"
   )
+  expect_snapshot(
+    ea2p %>% filter(cases >= median(cases), .by = geo_value),
+    error = TRUE, cnd_class = TRUE
+  )
 
   # Escape hatch:
   expect_equal(

From 07e54f40c340c57b4f956cf5e491963c408ac5a1 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 12:20:48 -0700
Subject: [PATCH 08/15] fix(filter.epi_archive): avoid other lazy eval traps

---
 R/methods-epi_archive.R                   | 46 +++++++++++++++--------
 tests/testthat/test-methods-epi_archive.R | 19 +++++++++-
 2 files changed, 49 insertions(+), 16 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 4b443afa..9f244aa9 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1063,30 +1063,46 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
               # They are expected to be active bindings, so directly
               # assigning has issues; `rm` first.
               rm(list = forbidden_colnames, envir = e)
-              delayedAssign("version", cli::cli_abort(c(
-                "Using `version` in `filter.epi_archive` may produce unexpected results.",
-                ">" = "See if `epix_as_of` or `epix_slide` would work instead.",
-                ">" = "If not, see `?filter.epi_archive` details for how to proceed."
-              ), class = "epiprocess__filter_archive__used_version"), assign.env = e)
+              eval_env <- new.env(parent = asNamespace("epiprocess")) # see (2) below
+              delayedAssign(
+                "version",
+                cli_abort(c(
+                  "Using `version` in `filter.epi_archive` may produce unexpected results.",
+                  ">" = "See if `epix_as_of` or `epix_slide` would work instead.",
+                  ">" = "If not, see `?filter.epi_archive` details for how to proceed."
+                ), class = "epiprocess__filter_archive__used_version"),
+                eval.env = eval_env,
+                assign.env = e
+              )
               for (measurement_colname in measurement_colnames) {
-                # Record current `measurement_colname` and set up delayed
-                # binding for error in a child environment, so that `for` loop
-                # updating its value and `rm` cleanup don't mess things up:
-                local({
-                  local_measurement_colname <- measurement_colname
-                  delayedAssign(measurement_colname, cli::cli_abort(c(
+                # Record current `measurement_colname` and set up execution for
+                # the promise for the error in its own dedicated environment, so
+                # that (1) `for` loop updating its value and `rm` cleanup don't
+                # mess things up. We can also (2) prevent changes to data mask
+                # ancestry (to involve user's quosure env rather than our
+                # quosure env) or contents (from edge case of user binding
+                # functions inside the mask) from potentially interfering by
+                # setting the promise's execution environment to skip over the
+                # data mask.
+                eval_env <- new.env(parent = asNamespace("epiprocess"))
+                eval_env[["local_measurement_colname"]] <- measurement_colname
+                delayedAssign(
+                  measurement_colname,
+                  cli_abort(c(
                     "Using `{format_varname(local_measurement_colname)}`
                      in `filter.epi_archive` may produce unexpected results.",
                     ">" = "See `?filter.epi_archive` details for how to proceed."
-                  ), class = "epiprocess__filter_archive__used_measurement"), assign.env = e)
-                })
+                  ), class = "epiprocess__filter_archive__used_measurement"),
+                  eval.env = eval_env,
+                  assign.env = e
+                )
               }
               break
             }
             e <- parent.env(e)
           }
-          # Don't mask similarly-named user objects:
-          rm(list = c("e", "measurement_colname"))
+          # Don't mask similarly-named user objects in ancestor envs:
+          rm(list = c("e", "measurement_colname", "eval_env"))
           TRUE
         },
         ...,
diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index f9299d0f..dafa5521 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -222,8 +222,10 @@ test_that("filter.epi_archive works as expected", {
     ea2 %>% filter(time_value >= as.Date("2020-06-02"), cases >= 2L),
     class = "epiprocess__filter_archive__used_measurement"
   )
+  # Check for `for` + `delayedAssign` mishap in `expect_snapshot` (we should say
+  # something about `cases` (the relevant colname), not `deaths` (the last
+  # measurement colname)):
   ea2p <- ea2_data %>%
-    # to check for `for` + `delayedAssign` mishap in expect_snapshot
     mutate(deaths = 0) %>%
     as_epi_archive()
   expect_error(
@@ -234,6 +236,21 @@ test_that("filter.epi_archive works as expected", {
     ea2p %>% filter(cases >= median(cases), .by = geo_value),
     error = TRUE, cnd_class = TRUE
   )
+  # Check that we are insulated from other lazy eval traps:
+  expected <- rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))
+  expect_class(expected$parent, "epiprocess__filter_archive__used_measurement")
+  with(list(cli_abort = function(...) stop("now, pretend user didn't have cli attached")), {
+    expect_equal(rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))$parent$message,
+                 expected$parent$message)
+  })
+  expect_equal(
+    rlang::catch_cnd(ea2p %>% filter({
+      c <- function(...) stop("and that they overwrote `c` to try to debug their own code")
+      cases >= median(cases)
+    }, .by = geo_value))$parent$message,
+    expected$parent$message
+  )
+
 
   # Escape hatch:
   expect_equal(

From 751f547c177f00cf0e4bba82e4cc887dd318bebc Mon Sep 17 00:00:00 2001
From: brookslogan <brookslogan@users.noreply.github.com>
Date: Wed, 16 Apr 2025 19:24:52 +0000
Subject: [PATCH 09/15] style: styler (GHA)

---
 tests/testthat/test-methods-epi_archive.R | 17 +++++++++++------
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/tests/testthat/test-methods-epi_archive.R b/tests/testthat/test-methods-epi_archive.R
index dafa5521..a073594f 100644
--- a/tests/testthat/test-methods-epi_archive.R
+++ b/tests/testthat/test-methods-epi_archive.R
@@ -240,14 +240,19 @@ test_that("filter.epi_archive works as expected", {
   expected <- rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))
   expect_class(expected$parent, "epiprocess__filter_archive__used_measurement")
   with(list(cli_abort = function(...) stop("now, pretend user didn't have cli attached")), {
-    expect_equal(rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))$parent$message,
-                 expected$parent$message)
+    expect_equal(
+      rlang::catch_cnd(ea2p %>% filter(cases >= median(cases), .by = geo_value))$parent$message,
+      expected$parent$message
+    )
   })
   expect_equal(
-    rlang::catch_cnd(ea2p %>% filter({
-      c <- function(...) stop("and that they overwrote `c` to try to debug their own code")
-      cases >= median(cases)
-    }, .by = geo_value))$parent$message,
+    rlang::catch_cnd(ea2p %>% filter(
+      {
+        c <- function(...) stop("and that they overwrote `c` to try to debug their own code")
+        cases >= median(cases)
+      },
+      .by = geo_value
+    ))$parent$message,
     expected$parent$message
   )
 

From 7fb25fb32c49693eb74c5808bd35c7ed63bf47d2 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 12:28:37 -0700
Subject: [PATCH 10/15] Bump DESCRIPTION version

---
 DESCRIPTION | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/DESCRIPTION b/DESCRIPTION
index c7a651d8..b008956d 100755
--- a/DESCRIPTION
+++ b/DESCRIPTION
@@ -1,7 +1,7 @@
 Type: Package
 Package: epiprocess
 Title: Tools for basic signal processing in epidemiology
-Version: 0.11.5
+Version: 0.11.6
 Authors@R: c(
     person("Jacob", "Bien", role = "ctb"),
     person("Logan", "Brooks", , "lcbrooks+github@andrew.cmu.edu", role = c("aut", "cre")),

From 8f63ad8e11ebed25f71c947e121a26302789735e Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 12:39:01 -0700
Subject: [PATCH 11/15] Address failing checks

---
 R/methods-epi_archive.R | 2 +-
 _pkgdown.yml            | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 3f2c987a..cff16aed 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1093,7 +1093,7 @@ filter.epi_archive <- function(.data, ..., .by = NULL, .format_aware = FALSE) {
           # likely just the parent env, but search to make sure, in a way akin
           # to `<<-`:
           e <- environment()
-          while (!identical(e, globalenv()) && !identical(e, emptyenv())) {
+          while (!identical(e, globalenv()) && !identical(e, emptyenv())) { # nolint:vector_logic_linter
             if ("version" %in% names(e)) {
               # This is where the column bindings are. Replace the forbidden ones.
               # They are expected to be active bindings, so directly
diff --git a/_pkgdown.yml b/_pkgdown.yml
index 2953039c..ebd73dc8 100644
--- a/_pkgdown.yml
+++ b/_pkgdown.yml
@@ -72,8 +72,9 @@ reference:
       - epix_as_of
       - epix_as_of_current
       - epix_slide
-      - epix_merge
       - revision_summary
+      - epix_merge
+      - filter.epi_archive
       - epix_fill_through_version
       - epix_truncate_versions_after
       - set_versions_end

From 8b569ff96a9cdb6e006b700d0a15f54430c0fd09 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 12:50:04 -0700
Subject: [PATCH 12/15] docs(filter.epi_archive): suggest alternative for meas
 col filtering

---
 R/methods-epi_archive.R   | 9 ++++++---
 man/filter.epi_archive.Rd | 9 ++++++---
 man/revision_analysis.Rd  | 2 +-
 3 files changed, 13 insertions(+), 7 deletions(-)

diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index cff16aed..33f73141 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -1036,9 +1036,12 @@ dplyr_col_modify.col_modify_recorder_df <- function(data, cols) {
 #'
 #' By default, using the `version` column or measurement columns is disabled as
 #' it's easy to get unexpected results. See if either [`epix_as_of`] or
-#' [`epix_slide`] works as an alternative. If they don't cover your use case,
-#' then you can set `.format_aware = TRUE` to enable usage of these columns, but
-#' be careful to:
+#' [`epix_slide`] works for any version selection you have in mind: for version
+#' selection, see the `version` or `.versions` args, respectively; for
+#' measurement column-based filtering, try `filter`ing after `epix_as_of` or
+#' inside the `.f` in `epix_slide()`. If they don't cover your use case, then
+#' you can set `.format_aware = TRUE` to enable usage of these columns, but be
+#' careful to:
 #' * Factor in that `.data$DT` may have been converted into a compact format
 #'   based on diffing consecutive versions, and the last version of each
 #'   observation in `.data$DT` will always be carried forward to future
diff --git a/man/filter.epi_archive.Rd b/man/filter.epi_archive.Rd
index a007bdc5..5f9d72db 100644
--- a/man/filter.epi_archive.Rd
+++ b/man/filter.epi_archive.Rd
@@ -23,9 +23,12 @@ details.}
 \details{
 By default, using the \code{version} column or measurement columns is disabled as
 it's easy to get unexpected results. See if either \code{\link{epix_as_of}} or
-\code{\link{epix_slide}} works as an alternative. If they don't cover your use case,
-then you can set \code{.format_aware = TRUE} to enable usage of these columns, but
-be careful to:
+\code{\link{epix_slide}} works for any version selection you have in mind: for version
+selection, see the \code{version} or \code{.versions} args, respectively; for
+measurement column-based filtering, try \code{filter}ing after \code{epix_as_of} or
+inside the \code{.f} in \code{epix_slide()}. If they don't cover your use case, then
+you can set \code{.format_aware = TRUE} to enable usage of these columns, but be
+careful to:
 \itemize{
 \item Factor in that \code{.data$DT} may have been converted into a compact format
 based on diffing consecutive versions, and the last version of each
diff --git a/man/revision_analysis.Rd b/man/revision_analysis.Rd
index 23ddf021..1c7336b3 100644
--- a/man/revision_analysis.Rd
+++ b/man/revision_analysis.Rd
@@ -56,7 +56,7 @@ of the \code{versions_end} are removed. \code{min_waiting_period} should charact
 the typical time during which most significant revisions occur. The default
 of 60 days corresponds to a typical near-final value for case counts as
 reported in the context of insurance. To avoid this filtering, either set
-to \code{NULL} or 0. This will be rounded up to the appropriate \code{time_type} if
+to \code{NULL} or 0. A \code{difftime} will be rounded up to the appropriate \code{time_type} if
 necessary (that is 5 days will be rounded to 1 week if the data is weekly).}
 
 \item{within_latest}{double between 0 and 1. Determines the threshold

From 26721facbfec803d8b97b03cd1d20b838632b894 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 13:33:44 -0700
Subject: [PATCH 13/15] tests: `expect_equal` when comparing Dates due to
 inconsistent backing typeof

---
 tests/testthat/test-epi_slide.R | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tests/testthat/test-epi_slide.R b/tests/testthat/test-epi_slide.R
index 0aa4aca7..b9be125d 100644
--- a/tests/testthat/test-epi_slide.R
+++ b/tests/testthat/test-epi_slide.R
@@ -610,7 +610,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
   before <- 2L
   after <- 1L
 
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = days) %>%
@@ -627,7 +627,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
       pad_late_dates = as.Date(c("2022-01-08"))
     )
   )
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = weeks) %>%
@@ -677,7 +677,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
   before <- 5L
   after <- 0L
 
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = days) %>%
@@ -701,7 +701,7 @@ test_that("epi_slide_opt helper `full_date_seq` returns expected date values", {
   before <- 0L
   after <- 3L
 
-  expect_identical(
+  expect_equal(
     full_date_seq(
       epi_data_missing %>%
         mutate(time_value = days) %>%

From e4177567771a1135b97db42f98d69aa40082e7af Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 14:06:46 -0700
Subject: [PATCH 14/15] Relax typeof checks due to Date backing typeof
 inconsistencies

---
 R/archive.R                                  | 41 ++++++++------------
 R/methods-epi_archive.R                      | 11 +-----
 man/epi_archive.Rd                           | 34 ++++++++--------
 tests/testthat/test-archive-version-bounds.R | 15 +++++--
 tests/testthat/test-time-utils.R             |  4 +-
 5 files changed, 48 insertions(+), 57 deletions(-)

diff --git a/R/archive.R b/R/archive.R
index 922371f1..88f8d704 100644
--- a/R/archive.R
+++ b/R/archive.R
@@ -52,13 +52,6 @@ validate_version_bound <- function(version_bound, x, na_ok = FALSE,
         class = "epiprocess__version_bound_mismatched_class"
       )
     }
-    if (!identical(typeof(version_bound), typeof(x[["version"]]))) {
-      cli_abort(
-        "{version_bound_arg} must have the same `typeof` as x$version,
-        which has a `typeof` of {typeof(x$version)}",
-        class = "epiprocess__version_bound_mismatched_typeof"
-      )
-    }
   }
 
   return(invisible(NULL))
@@ -207,23 +200,23 @@ next_after.Date <- function(x) x + 1L
 #'   undergo tiny nonmeaningful revisions and the archive object with the
 #'   default setting is too large.
 #' @param clobberable_versions_start Optional; `length`-1; either a value of the
-#'   same `class` and `typeof` as `x$version`, or an `NA` of any `class` and
-#'   `typeof`: specifically, either (a) the earliest version that could be
-#'   subject to "clobbering" (being overwritten with different update data, but
-#'   using the *same* version tag as the old update data), or (b) `NA`, to
-#'   indicate that no versions are clobberable. There are a variety of reasons
-#'   why versions could be clobberable under routine circumstances, such as (a)
-#'   today's version of one/all of the columns being published after initially
-#'   being filled with `NA` or LOCF, (b) a buggy version of today's data being
-#'   published but then fixed and republished later in the day, or (c) data
-#'   pipeline delays (e.g., publisher uploading, periodic scraping, database
-#'   syncing, periodic fetching, etc.) that make events (a) or (b) reflected
-#'   later in the day (or even on a different day) than expected; potential
-#'   causes vary between different data pipelines. The default value is `NA`,
-#'   which doesn't consider any versions to be clobberable. Another setting that
-#'   may be appropriate for some pipelines is `max_version_with_row_in(x)`.
-#' @param versions_end Optional; length-1, same `class` and `typeof` as
-#'   `x$version`: what is the last version we have observed? The default is
+#'   same `class` as `x$version`, or an `NA` of any `class`: specifically,
+#'   either (a) the earliest version that could be subject to "clobbering"
+#'   (being overwritten with different update data, but using the *same* version
+#'   tag as the old update data), or (b) `NA`, to indicate that no versions are
+#'   clobberable. There are a variety of reasons why versions could be
+#'   clobberable under routine circumstances, such as (a) today's version of
+#'   one/all of the columns being published after initially being filled with
+#'   `NA` or LOCF, (b) a buggy version of today's data being published but then
+#'   fixed and republished later in the day, or (c) data pipeline delays (e.g.,
+#'   publisher uploading, periodic scraping, database syncing, periodic
+#'   fetching, etc.) that make events (a) or (b) reflected later in the day (or
+#'   even on a different day) than expected; potential causes vary between
+#'   different data pipelines. The default value is `NA`, which doesn't consider
+#'   any versions to be clobberable. Another setting that may be appropriate for
+#'   some pipelines is `max_version_with_row_in(x)`.
+#' @param versions_end Optional; length-1, same `class` as `x$version`: what is
+#'   the last version we have observed? The default is
 #'   `max_version_with_row_in(x)`, but values greater than this could also be
 #'   valid, and would indicate that we observed additional versions of the data
 #'   beyond `max(x$version)`, but they all contained empty updates. (The default
diff --git a/R/methods-epi_archive.R b/R/methods-epi_archive.R
index 33f73141..faf3c128 100644
--- a/R/methods-epi_archive.R
+++ b/R/methods-epi_archive.R
@@ -80,19 +80,13 @@ epix_as_of <- function(x, version, min_time_value = -Inf, all_versions = FALSE,
       "`version` must have the same `class` vector as `epi_archive$DT$version`."
     )
   }
-  if (!identical(typeof(version), typeof(x$DT$version))) {
-    cli_abort(
-      "`version` must have the same `typeof` as `epi_archive$DT$version`."
-    )
-  }
   assert_scalar(version, na.ok = FALSE)
   if (version > x$versions_end) {
     cli_abort("`version` must be at most `epi_archive$versions_end`.")
   }
   assert_scalar(min_time_value, na.ok = FALSE)
   min_time_value_inf <- is.infinite(min_time_value) && min_time_value < 0
-  min_time_value_same_type <- typeof(min_time_value) == typeof(x$DT$time_value) &
-    class(min_time_value) == class(x$DT$time_value)
+  min_time_value_same_type <- identical(class(min_time_value), class(x$DT$time_value))
   if (!min_time_value_inf && !min_time_value_same_type) {
     cli_abort("`min_time_value` must be either -Inf or a time_value of the same type and
       class as `epi_archive$time_value`.")
@@ -941,9 +935,6 @@ epix_truncate_versions_after.epi_archive <- function(x, max_version) {
   if (!identical(class(max_version), class(x$DT$version))) {
     cli_abort("`max_version` must have the same `class` as `epi_archive$DT$version`.")
   }
-  if (!identical(typeof(max_version), typeof(x$DT$version))) {
-    cli_abort("`max_version` must have the same `typeof` as `epi_archive$DT$version`.")
-  }
   assert_scalar(max_version, na.ok = FALSE)
   if (max_version > x$versions_end) {
     cli_abort("`max_version` must be at most `epi_archive$versions_end`.")
diff --git a/man/epi_archive.Rd b/man/epi_archive.Rd
index b92cd505..f91834f3 100644
--- a/man/epi_archive.Rd
+++ b/man/epi_archive.Rd
@@ -63,21 +63,21 @@ undergo tiny nonmeaningful revisions and the archive object with the
 default setting is too large.}
 
 \item{clobberable_versions_start}{Optional; \code{length}-1; either a value of the
-same \code{class} and \code{typeof} as \code{x$version}, or an \code{NA} of any \code{class} and
-\code{typeof}: specifically, either (a) the earliest version that could be
-subject to "clobbering" (being overwritten with different update data, but
-using the \emph{same} version tag as the old update data), or (b) \code{NA}, to
-indicate that no versions are clobberable. There are a variety of reasons
-why versions could be clobberable under routine circumstances, such as (a)
-today's version of one/all of the columns being published after initially
-being filled with \code{NA} or LOCF, (b) a buggy version of today's data being
-published but then fixed and republished later in the day, or (c) data
-pipeline delays (e.g., publisher uploading, periodic scraping, database
-syncing, periodic fetching, etc.) that make events (a) or (b) reflected
-later in the day (or even on a different day) than expected; potential
-causes vary between different data pipelines. The default value is \code{NA},
-which doesn't consider any versions to be clobberable. Another setting that
-may be appropriate for some pipelines is \code{max_version_with_row_in(x)}.}
+same \code{class} as \code{x$version}, or an \code{NA} of any \code{class}: specifically,
+either (a) the earliest version that could be subject to "clobbering"
+(being overwritten with different update data, but using the \emph{same} version
+tag as the old update data), or (b) \code{NA}, to indicate that no versions are
+clobberable. There are a variety of reasons why versions could be
+clobberable under routine circumstances, such as (a) today's version of
+one/all of the columns being published after initially being filled with
+\code{NA} or LOCF, (b) a buggy version of today's data being published but then
+fixed and republished later in the day, or (c) data pipeline delays (e.g.,
+publisher uploading, periodic scraping, database syncing, periodic
+fetching, etc.) that make events (a) or (b) reflected later in the day (or
+even on a different day) than expected; potential causes vary between
+different data pipelines. The default value is \code{NA}, which doesn't consider
+any versions to be clobberable. Another setting that may be appropriate for
+some pipelines is \code{max_version_with_row_in(x)}.}
 
 \item{.versions_end}{location based versions_end, used to avoid prefix
 \code{version = issue} from being assigned to \code{versions_end} instead of being
@@ -86,8 +86,8 @@ used to rename columns.}
 \item{...}{used for specifying column names, as in \code{\link[dplyr:rename]{dplyr::rename}}. For
 example \code{version = release_date}}
 
-\item{versions_end}{Optional; length-1, same \code{class} and \code{typeof} as
-\code{x$version}: what is the last version we have observed? The default is
+\item{versions_end}{Optional; length-1, same \code{class} as \code{x$version}: what is
+the last version we have observed? The default is
 \code{max_version_with_row_in(x)}, but values greater than this could also be
 valid, and would indicate that we observed additional versions of the data
 beyond \code{max(x$version)}, but they all contained empty updates. (The default
diff --git a/tests/testthat/test-archive-version-bounds.R b/tests/testthat/test-archive-version-bounds.R
index ee500d30..878cde1c 100644
--- a/tests/testthat/test-archive-version-bounds.R
+++ b/tests/testthat/test-archive-version-bounds.R
@@ -71,14 +71,21 @@ test_that("`validate_version_bound` validate and class checks together allow and
   # Bad:
   expect_error(validate_version_bound(3.5, x_int, TRUE, "vb"), regexp = "must have the same `class`")
   expect_error(validate_version_bound(.Machine$integer.max, x_dbl, TRUE, "vb"), regexp = "must have the same `class`")
-  expect_error(validate_version_bound(
-    `class<-`(list(2), "clazz"),
-    tibble::tibble(version = `class<-`(5L, "clazz")), TRUE, "vb"
-  ), regexp = "must have the same `typeof`", class = "epiprocess__version_bound_mismatched_typeof")
   # Maybe questionable:
   expect_error(validate_version_bound(3, x_int, TRUE, "vb"))
   expect_error(validate_version_bound(3L, x_dbl, TRUE, "vb"))
+  # Maybe questionable, but accept to relax things a bit, as this is happening
+  # with Dates in some R(?) versions. Might need to turn some things into
+  # vec_cast_common, but idea is just make Date stuff work for now:
+  validate_version_bound(
+    `class<-`(list(2), "clazz"),
+    tibble::tibble(version = `class<-`(5L, "clazz")), TRUE, "vb"
+  )
   # Good:
+  validate_version_bound(
+    `class<-`(2, "Date"),
+    tibble::tibble(version = `class<-`(5L, "Date")), TRUE, "vb"
+  )
   validate_version_bound(my_int, x_int, TRUE, "vb")
   validate_version_bound(my_dbl, x_dbl, TRUE, "vb")
   validate_version_bound(my_list, x_list, TRUE, "vb")
diff --git a/tests/testthat/test-time-utils.R b/tests/testthat/test-time-utils.R
index 6fe8d78a..7ddd70c0 100644
--- a/tests/testthat/test-time-utils.R
+++ b/tests/testthat/test-time-utils.R
@@ -17,11 +17,11 @@ test_that("guess_period works", {
   # On Dates:
   daily_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "day")
   weekly_dates <- seq(as.Date("2020-01-01"), as.Date("2020-01-15"), by = "week")
-  expect_identical(
+  expect_equal(
     daily_dates[[1L]] + guess_period(daily_dates) * (seq_along(daily_dates) - 1L),
     daily_dates
   )
-  expect_identical(
+  expect_equal(
     weekly_dates[[1L]] + guess_period(weekly_dates) * (seq_along(weekly_dates) - 1L),
     weekly_dates
   )

From 332362bd17505cbe35ad5523f1094fe17b170fd7 Mon Sep 17 00:00:00 2001
From: "Logan C. Brooks" <lcbrooks+github@andrew.cmu.edu>
Date: Wed, 16 Apr 2025 14:08:56 -0700
Subject: [PATCH 15/15] Address tidyselect warnings on .data$

---
 R/methods-epi_df.R | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/R/methods-epi_df.R b/R/methods-epi_df.R
index 7870dede..51f6cf33 100644
--- a/R/methods-epi_df.R
+++ b/R/methods-epi_df.R
@@ -532,7 +532,7 @@ sum_groups_epi_df <- function(.x, sum_cols, group_cols = "time_value") {
   if (!"geo_value" %in% group_cols) {
     out <- out %>%
       mutate(geo_value = "total") %>%
-      relocate(.data$geo_value, .before = 1)
+      relocate("geo_value", .before = 1)
   }
 
   # The `geo_type` will be correctly inherited here by the following logic: