From c5799f56a3a1d7ee2de24911f505b6b9716a07bd Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Sun, 24 Jul 2022 09:05:32 +0900 Subject: [PATCH 1/6] Drop non-constant aesthetics more thoroughly --- R/stat-.r | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/R/stat-.r b/R/stat-.r index 9fe0fa07dc..58ac238371 100644 --- a/R/stat-.r +++ b/R/stat-.r @@ -120,13 +120,21 @@ Stat <- ggproto("Stat", self$compute_group(data = group, scales = scales, ...) }) + # record dropped columns + dropped_columns <- new_environment() stats <- mapply(function(new, old) { if (empty(new)) return(data_frame0()) - unique <- uniquecols(old) - missing <- !(names(unique) %in% names(new)) - vec_cbind( + + # ignore the columns that will be overwritten by `new` + old <- old[, !(names(old) %in% names(new)), drop = FALSE] + + # drop columns that are not constant within group + unique_idx <- vapply(old, function(x) length(unique0(x)) == 1, logical(1L)) + env_bind(dropped_columns, !!!set_names(names(old)[!unique_idx])) + + result <- vec_cbind( new, - unique[rep(1, nrow(new)), missing,drop = FALSE] + old[rep(1, nrow(new)), unique_idx, drop = FALSE] ) }, stats, groups, SIMPLIFY = FALSE) @@ -135,7 +143,8 @@ Stat <- ggproto("Stat", # The above code will drop columns that are not constant within groups and not # carried over/recreated by the stat. This can produce unexpected results, # and hence we warn about it. - dropped <- base::setdiff(names(data), base::union(self$dropped_aes, names(data_new))) + dropped <- ls(dropped_columns) + dropped <- dropped[!dropped %in% self$dropped_aes] if (length(dropped) > 0) { cli::cli_warn(c( "The following aesthetics were dropped during statistical transformation: {.field {glue_collapse(dropped, sep = ', ')}}", From 5e032c945c76dbe93c444caecdf0d6f3623f6d2d Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Sun, 24 Jul 2022 09:10:12 +0900 Subject: [PATCH 2/6] Drop --- R/stat-.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat-.r b/R/stat-.r index 58ac238371..a0e500bbb3 100644 --- a/R/stat-.r +++ b/R/stat-.r @@ -152,7 +152,7 @@ Stat <- ggproto("Stat", "i" = "Did you forget to specify a {.code group} aesthetic or to convert a numerical variable into a factor?" )) } - data_new + data_new[, !names(data_new) %in% dropped, drop = FALSE] }, compute_group = function(self, data, scales) { From 4a7617301bd2e6f03b194f991e057715488650a6 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Wed, 24 Aug 2022 00:58:17 +0900 Subject: [PATCH 3/6] List all names including the one that starts with "." --- R/stat-.r | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/R/stat-.r b/R/stat-.r index a0e500bbb3..2ef9d62424 100644 --- a/R/stat-.r +++ b/R/stat-.r @@ -143,7 +143,7 @@ Stat <- ggproto("Stat", # The above code will drop columns that are not constant within groups and not # carried over/recreated by the stat. This can produce unexpected results, # and hence we warn about it. - dropped <- ls(dropped_columns) + dropped <- ls(dropped_columns, all.names = TRUE) dropped <- dropped[!dropped %in% self$dropped_aes] if (length(dropped) > 0) { cli::cli_warn(c( From 37da18e875e2be9f3bb269ba1fec0227a0974be6 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Wed, 24 Aug 2022 08:11:42 +0900 Subject: [PATCH 4/6] Use simpler way --- R/stat-.r | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/R/stat-.r b/R/stat-.r index 2ef9d62424..b2dc9bd205 100644 --- a/R/stat-.r +++ b/R/stat-.r @@ -121,7 +121,7 @@ Stat <- ggproto("Stat", }) # record dropped columns - dropped_columns <- new_environment() + dropped <- character(0) stats <- mapply(function(new, old) { if (empty(new)) return(data_frame0()) @@ -130,9 +130,9 @@ Stat <- ggproto("Stat", # drop columns that are not constant within group unique_idx <- vapply(old, function(x) length(unique0(x)) == 1, logical(1L)) - env_bind(dropped_columns, !!!set_names(names(old)[!unique_idx])) + dropped <<- c(dropped, names(old)[!unique_idx]) - result <- vec_cbind( + vec_cbind( new, old[rep(1, nrow(new)), unique_idx, drop = FALSE] ) @@ -143,7 +143,7 @@ Stat <- ggproto("Stat", # The above code will drop columns that are not constant within groups and not # carried over/recreated by the stat. This can produce unexpected results, # and hence we warn about it. - dropped <- ls(dropped_columns, all.names = TRUE) + dropped <- unique0(dropped) dropped <- dropped[!dropped %in% self$dropped_aes] if (length(dropped) > 0) { cli::cli_warn(c( From fda735223aeb25c61230d3c9f1a2769c552074f2 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Wed, 24 Aug 2022 09:12:15 +0900 Subject: [PATCH 5/6] Add more tests --- tests/testthat/test-stats.r | 51 ++++++++++++++++++++++++++++++++++--- 1 file changed, 48 insertions(+), 3 deletions(-) diff --git a/tests/testthat/test-stats.r b/tests/testthat/test-stats.r index 243fcc18c3..2cd71ab089 100644 --- a/tests/testthat/test-stats.r +++ b/tests/testthat/test-stats.r @@ -21,7 +21,10 @@ test_that("error message is thrown when aesthetics are missing", { }) test_that("erroneously dropped aesthetics are found and issue a warning", { - df <- data_frame( + + # case 1) dropped completely + + df1 <- data_frame( x = c( # arbitrary random numbers 0.42986445, 1.11153170, -1.22318013, 0.90982003, 0.46454276, -0.42300004, -1.76139834, -0.75060412, @@ -29,6 +32,48 @@ test_that("erroneously dropped aesthetics are found and issue a warning", { ), g = rep(1:2, each = 5) ) - p <- ggplot(df, aes(x, fill = g)) + geom_density() - expect_warning(ggplot_build(p), "aesthetics were dropped") + p1 <- ggplot(df1, aes(x, fill = g)) + geom_density() + expect_warning(ggplot_build(p1), "aesthetics were dropped") + + # case 2-1) dropped partially + + df2 <- data_frame( + id = c("a", "a", "b", "b", "c"), + colour = c( 0, 1, 10, 10, 20), # a should be dropped + fill = c( 0, 0, 10, 11, 20) # b should be dropped + ) + + p2 <- ggplot(df2, aes(id, colour = colour, fill = fill)) + geom_bar() + expect_warning( + b2 <- ggplot_build(p2), + "The following aesthetics were dropped during statistical transformation: .*colour.*, .*fill.*" + ) + + # colour is dropped because group a's colour is not constant (GeomBar$default_aes$colour is NA) + expect_true(all(is.na(b2$data[[1]]$colour))) + # fill is dropped because group b's fill is not constant + expect_true(all(b2$data[[1]]$fill == GeomBar$default_aes$fill)) + + # case 2-1) dropped partially with NA + + df3 <- data_frame( + id = c("a", "a", "b", "b", "c"), + colour = c( 0, NA, 10, 10, 20), # a should be dropped + fill = c( NA, NA, 10, 10, 20) # a should not be dropped + ) + + p3 <- ggplot(df3, aes(id, colour = colour, fill = fill)) + geom_bar() + + scale_fill_continuous(na.value = "#123") + expect_warning( + b3 <- ggplot_build(p3), + "The following aesthetics were dropped during statistical transformation: .*colour.*" + ) + + # colour is dropped because group a's colour is not constant (GeomBar$default_aes$colour is NA) + expect_true(all(is.na(b3$data[[1]]$colour))) + # fill is NOT dropped. Group a's fill is na.value, but others are mapped. + expect_equal( + b3$data[[1]]$fill == "#123", + c(TRUE, FALSE, FALSE) + ) }) From 73be3838e2bb8efa052a86aaa2e2e579095cd186 Mon Sep 17 00:00:00 2001 From: Hiroaki Yutani Date: Wed, 24 Aug 2022 22:56:25 +0900 Subject: [PATCH 6/6] Add more comments --- R/stat-.r | 48 +++++++++++++++++++++++++++++++++++------------- 1 file changed, 35 insertions(+), 13 deletions(-) diff --git a/R/stat-.r b/R/stat-.r index b2dc9bd205..449d3eb822 100644 --- a/R/stat-.r +++ b/R/stat-.r @@ -120,31 +120,50 @@ Stat <- ggproto("Stat", self$compute_group(data = group, scales = scales, ...) }) - # record dropped columns - dropped <- character(0) + # Record columns that are not constant within groups. We will drop them later. + non_constant_columns <- character(0) + stats <- mapply(function(new, old) { + # In this function, + # + # - `new` is the computed result. All the variables will be picked. + # - `old` is the original data. There are 3 types of variables: + # 1) If the variable is already included in `new`, it's ignored + # because the values of `new` will be used. + # 2) If the variable is not included in `new` and the value is + # constant within the group, it will be picked. + # 3) If the variable is not included in `new` and the value is not + # constant within the group, it will be dropped. We need to record + # the dropped columns to drop it consistently later. + if (empty(new)) return(data_frame0()) - # ignore the columns that will be overwritten by `new` + # First, filter out the columns already included `new` (type 1). old <- old[, !(names(old) %in% names(new)), drop = FALSE] - # drop columns that are not constant within group - unique_idx <- vapply(old, function(x) length(unique0(x)) == 1, logical(1L)) - dropped <<- c(dropped, names(old)[!unique_idx]) + # Then, check whether the rest of the columns have constant values (type 2) + # or not (type 3). + non_constant <- vapply(old, function(x) length(unique0(x)) > 1, logical(1L)) + + # Record the non-constant columns. + non_constant_columns <<- c(non_constant_columns, names(old)[non_constant]) vec_cbind( new, - old[rep(1, nrow(new)), unique_idx, drop = FALSE] + # Note that, while the non-constant columns should be dropped, we don't + # do this here because it can be filled by vec_rbind() later if either + # one of the group has a constant value (see #4394 for the details). + old[rep(1, nrow(new)), , drop = FALSE] ) }, stats, groups, SIMPLIFY = FALSE) - data_new <- vec_rbind(!!!stats) + non_constant_columns <- unique0(non_constant_columns) - # The above code will drop columns that are not constant within groups and not + # We are going to drop columns that are not constant within groups and not # carried over/recreated by the stat. This can produce unexpected results, - # and hence we warn about it. - dropped <- unique0(dropped) - dropped <- dropped[!dropped %in% self$dropped_aes] + # and hence we warn about it (variables in dropped_aes are expected so + # ignored here). + dropped <- non_constant_columns[!non_constant_columns %in% self$dropped_aes] if (length(dropped) > 0) { cli::cli_warn(c( "The following aesthetics were dropped during statistical transformation: {.field {glue_collapse(dropped, sep = ', ')}}", @@ -152,7 +171,10 @@ Stat <- ggproto("Stat", "i" = "Did you forget to specify a {.code group} aesthetic or to convert a numerical variable into a factor?" )) } - data_new[, !names(data_new) %in% dropped, drop = FALSE] + + # Finally, combine the results and drop columns that are not constant. + data_new <- vec_rbind(!!!stats) + data_new[, !names(data_new) %in% non_constant_columns, drop = FALSE] }, compute_group = function(self, data, scales) {