Skip to content

Commit df1b901

Browse files
authored
Discarding boxplot outliers (#5379)
* Add `outliers` param to boxplot * Add test * Redocument * Add news bullet
1 parent 466344a commit df1b901

File tree

4 files changed

+38
-14
lines changed

4 files changed

+38
-14
lines changed

NEWS.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,10 @@
11
# ggplot2 (development version)
22

3+
* `geom_boxplot()` gains an `outliers` argument to switch outliers on or off,
4+
in a manner that does affects the scale range. For hiding outliers that does
5+
not affect the scale range, you can continue to use `outlier.shape = NA`
6+
(@teunbrand, #4892).
7+
38
* Binned scales now treat `NA`s in limits the same way continuous scales do
49
(#5355).
510

@@ -9,6 +14,7 @@
914
deprecated. The `hjust` setting of the `legend.text` and `legend.title`
1015
elements continues to fulfil the role of text alignment (@teunbrand, #5347).
1116

17+
1218
* Integers are once again valid input to theme arguments that expect numeric
1319
input (@teunbrand, #5369)
1420

R/geom-boxplot.R

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -33,19 +33,19 @@
3333
#' @inheritParams geom_bar
3434
#' @param geom,stat Use to override the default connection between
3535
#' `geom_boxplot()` and `stat_boxplot()`.
36+
#' @param outliers Whether to display (`TRUE`) or discard (`FALSE`) outliers
37+
#' from the plot. Hiding or discarding outliers can be useful when, for
38+
#' example, raw data points need to be displayed on top of the boxplot.
39+
#' By discarding outliers, the axis limits will adapt to the box and whiskers
40+
#' only, not the full data range. If outliers need to be hidden and the axes
41+
#' needs to show the full data range, please use `outlier.shape = NA` instead.
3642
#' @param outlier.colour,outlier.color,outlier.fill,outlier.shape,outlier.size,outlier.stroke,outlier.alpha
3743
#' Default aesthetics for outliers. Set to `NULL` to inherit from the
3844
#' aesthetics used for the box.
3945
#'
4046
#' In the unlikely event you specify both US and UK spellings of colour, the
4147
#' US spelling will take precedence.
4248
#'
43-
#' Sometimes it can be useful to hide the outliers, for example when overlaying
44-
#' the raw data points on top of the boxplot. Hiding the outliers can be achieved
45-
#' by setting `outlier.shape = NA`. Importantly, this does not remove the outliers,
46-
#' it only hides them, so the range calculated for the y-axis will be the
47-
#' same with outliers shown and outliers hidden.
48-
#'
4949
#' @param notch If `FALSE` (default) make a standard box plot. If
5050
#' `TRUE`, make a notched box plot. Notches are used to compare groups;
5151
#' if the notches of two boxes do not overlap, this suggests that the medians
@@ -109,6 +109,7 @@
109109
geom_boxplot <- function(mapping = NULL, data = NULL,
110110
stat = "boxplot", position = "dodge2",
111111
...,
112+
outliers = TRUE,
112113
outlier.colour = NULL,
113114
outlier.color = NULL,
114115
outlier.fill = NULL,
@@ -133,6 +134,7 @@ geom_boxplot <- function(mapping = NULL, data = NULL,
133134
position$preserve <- "single"
134135
}
135136
}
137+
check_bool(outliers)
136138

137139
layer(
138140
data = data,
@@ -143,6 +145,7 @@ geom_boxplot <- function(mapping = NULL, data = NULL,
143145
show.legend = show.legend,
144146
inherit.aes = inherit.aes,
145147
params = list2(
148+
outliers = outliers,
146149
outlier.colour = outlier.color %||% outlier.colour,
147150
outlier.fill = outlier.fill,
148151
outlier.shape = outlier.shape,
@@ -167,7 +170,7 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom,
167170

168171
# need to declare `width` here in case this geom is used with a stat that
169172
# doesn't have a `width` parameter (e.g., `stat_identity`).
170-
extra_params = c("na.rm", "width", "orientation"),
173+
extra_params = c("na.rm", "width", "orientation", "outliers"),
171174

172175
setup_params = function(data, params) {
173176
params$flipped_aes <- has_flipped_aes(data, params)
@@ -180,6 +183,10 @@ GeomBoxplot <- ggproto("GeomBoxplot", Geom,
180183
data$width <- data$width %||%
181184
params$width %||% (resolution(data$x, FALSE) * 0.9)
182185

186+
if (isFALSE(params$outliers)) {
187+
data$outliers <- NULL
188+
}
189+
183190
if (!is.null(data$outliers)) {
184191
suppressWarnings({
185192
out_min <- vapply(data$outliers, min, numeric(1))

man/geom_boxplot.Rd

Lines changed: 9 additions & 7 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

tests/testthat/test-geom-boxplot.R

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,15 @@ test_that("geom_boxplot range includes all outliers", {
88

99
expect_true(miny <= min(dat$y))
1010
expect_true(maxy >= max(dat$y))
11+
12+
# Unless specifically directed not to
13+
p <- ggplot_build(ggplot(dat, aes(x, y)) + geom_boxplot(outliers = FALSE))
14+
15+
miny <- p$layout$panel_params[[1]]$y.range[1]
16+
maxy <- p$layout$panel_params[[1]]$y.range[2]
17+
18+
expect_lte(maxy, max(dat$y))
19+
expect_gte(miny, min(dat$y))
1120
})
1221

1322
test_that("geom_boxplot works in both directions", {

0 commit comments

Comments
 (0)