diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 9f2112729a503..6b79ed8028d24 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -5601,53 +5601,102 @@ def clip(self, lower=None, upper=None, axis=None, inplace=False, """ Trim values at input threshold(s). + Elements above/below the upper/lower thresholds will be changed to + upper/lower thresholds. Clipping data is a method for dealing with + out-of-range elements. If some elements are too large or too small, + clipping is one way to transform the data into a reasonable range. + Parameters ---------- - lower : float or array_like, default None - upper : float or array_like, default None - axis : int or string axis name, optional - Align object with lower and upper along the given axis. + lower : float, array-like or None, default None + Lower threshold for clipping. Values smaller than `lower` will be + converted to `lower`. + upper : float, array-like or None, default None + Upper threshold for clipping. Values larger than `upper` will be + converted to `upper`. + axis : {0 or 'index', 1 or 'columns', None}, default None + Apply clip by index (i.e. by rows) or columns. inplace : boolean, default False Whether to perform the operation in place on the data - .. versionadded:: 0.21.0 + .. versionadded:: 0.21.0. + *args, **kwargs + Additional keywords have no effect but might be accepted + for compatibility with numpy. Returns ------- - clipped : Series + `Series` or `DataFrame`. + Original input with those values above/below the + `upper`/`lower` thresholds set to the threshold values. + + References + ----- + .. [1] Tukey, John W. "The future of data analysis." The annals of + mathematical statistics 33.1 (1962): 1-67. + + See Also + -------- + DataFrame.clip : Trim values at input threshold(s). + Series.clip : Trim values at input threshold(s). + Series.clip_lower : Return copy of the input with values below given + value(s) truncated. + Series.clip_upper : Return copy of input with values above given + value(s) truncated. + DataFrame.clip_lower : Return copy of the input with values below given + value(s) truncated. + DataFrame.clip_upper : Return copy of input with values above given + value(s) truncated. + DataFrame.quantile : Return values at the given quantile over requested + axis, a la numpy.percentile. Examples -------- + >>> df = pd.DataFrame({'a': [-1, -2, -100], + ... 'b': [1, 2, 100]}, + ... index=['foo', 'bar', 'foobar']) >>> df - 0 1 - 0 0.335232 -1.256177 - 1 -1.367855 0.746646 - 2 0.027753 -1.176076 - 3 0.230930 -0.679613 - 4 1.261967 0.570967 - - >>> df.clip(-1.0, 0.5) - 0 1 - 0 0.335232 -1.000000 - 1 -1.000000 0.500000 - 2 0.027753 -1.000000 - 3 0.230930 -0.679613 - 4 0.500000 0.500000 - - >>> t - 0 -0.3 - 1 -0.2 - 2 -0.1 - 3 0.0 - 4 0.1 - dtype: float64 - - >>> df.clip(t, t + 1, axis=0) - 0 1 - 0 0.335232 -0.300000 - 1 -0.200000 0.746646 - 2 0.027753 -0.100000 - 3 0.230930 0.000000 - 4 1.100000 0.570967 + a b + foo -1 1 + bar -2 2 + foobar -100 100 + + >>> df.clip(lower=-10, upper=10) + a b + foo -1 1 + bar -2 2 + foobar -10 10 + + You can clip each column or row with different thresholds by passing + a ``Series`` to the lower/upper argument. Use the axis argument to clip + by column or rows. + + >>> col_thresh = pd.Series({'a': -5, 'b': 5}) + >>> df.clip(lower=col_thresh, axis='columns') + a b + foo -1 5 + bar -2 5 + foobar -5 100 + + Clip the foo, bar, and foobar rows with lower thresholds 5, 7, and 10. + + >>> row_thresh = pd.Series({'foo': 0, 'bar': 1, 'foobar': 10}) + >>> df.clip(lower=row_thresh, axis='index') + a b + foo 0 1 + bar 1 2 + foobar 10 100 + + Winsorizing [1]_ is a related method, whereby the data are clipped at + the 5th and 95th percentiles. The ``DataFrame.quantile`` method returns + a ``Series`` with column names as index and the quantiles as values. + Use ``axis='columns'`` to apply clipping to columns. + + >>> lower, upper = df.quantile(0.05), df.quantile(0.95) + >>> df.clip(lower=lower, upper=upper, axis='columns') + a b + foo -1.1 1.1 + bar -2.0 2.0 + foobar -90.2 90.2 """ if isinstance(self, ABCPanel): raise NotImplementedError("clip is not supported yet for panels")