diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 931d18dc349f3..7828e05892887 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -125,6 +125,7 @@ Other enhancements - Added ``validate`` argument to :meth:`DataFrame.join` (:issue:`46622`) - A :class:`errors.PerformanceWarning` is now thrown when using ``string[pyarrow]`` dtype with methods that don't dispatch to ``pyarrow.compute`` methods (:issue:`42613`) - Added ``numeric_only`` argument to :meth:`Resampler.sum`, :meth:`Resampler.prod`, :meth:`Resampler.min`, :meth:`Resampler.max`, :meth:`Resampler.first`, and :meth:`Resampler.last` (:issue:`46442`) +- Added validation of boolean kwargs in string series and DataFrame methods, along with tests (:issue:`16714`) .. --------------------------------------------------------------------------- .. _whatsnew_150.notable_bug_fixes: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index ef5e6dd1d6757..073df7b512b88 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -95,6 +95,7 @@ validate_axis_style_args, validate_bool_kwarg, validate_percentile, + validate_bool_kwargs_from_keywords, ) from pandas.core.dtypes.cast import ( @@ -816,6 +817,7 @@ def __init__( NDFrame.__init__(self, mgr) # ---------------------------------------------------------------------- + @validate_bool_kwargs_from_keywords('nan_as_null', 'allow_copy') def __dataframe__( self, nan_as_null: bool = False, allow_copy: bool = True ) -> DataFrameXchg: @@ -986,6 +988,7 @@ def _repr_fits_vertical_(self) -> bool: max_rows = get_option("display.max_rows") return len(self) <= max_rows + @validate_bool_kwargs_from_keywords('ignore_width') def _repr_fits_horizontal_(self, ignore_width: bool = False) -> bool: """ Check if full repr fits in horizontal boundaries imposed by the display @@ -1165,6 +1168,7 @@ def to_string( "references the column, while the value defines the space to use.", ) @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + @validate_bool_kwargs_from_keywords('index', 'index_names', 'show_dimensions') def to_string( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -1379,7 +1383,7 @@ def iterrows(self) -> Iterable[tuple[Hashable, Series]]: for k, v in zip(self.index, self.values): s = klass(v, index=columns, name=k).__finalize__(self) yield k, s - + @validate_bool_kwargs_from_keywords('index') def itertuples( self, index: bool = True, name: str | None = "Pandas" ) -> Iterable[tuple[Any, ...]]: @@ -1740,6 +1744,7 @@ def create_index(indexlist, namelist): columns = create_index(data["columns"], data["column_names"]) return cls(realdata, index=index, columns=columns, dtype=dtype) + @validate_bool_kwargs_from_keywords('copy') def to_numpy( self, dtype: npt.DTypeLike | None = None, @@ -2011,6 +2016,7 @@ def to_dict(self, orient: str = "dict", into=dict): else: raise ValueError(f"orient '{orient}' not understood") + @validate_bool_kwargs_from_keywords('reauth', 'auth_local_webserver', 'progress_bar') def to_gbq( self, destination_table: str, @@ -2496,6 +2502,7 @@ def to_records( return np.rec.fromarrays(arrays, dtype={"names": names, "formats": formats}) @classmethod + @validate_bool_kwargs_from_keywords('verify_integrity') def _from_arrays( cls, arrays, @@ -2550,6 +2557,7 @@ def _from_arrays( compression_options=_shared_docs["compression_options"] % "path", ) @deprecate_kwarg(old_arg_name="fname", new_arg_name="path") + @validate_bool_kwargs_from_keywords('write_index') def to_stata( self, path: FilePath | WriteBuffer[bytes], @@ -2911,6 +2919,7 @@ def to_parquet( " Ability to use str", ) @Substitution(shared_params=fmt.common_docstring, returns=fmt.return_docstring) + @validate_bool_kwargs_from_keywords('index', 'index_names', 'bold_rows', 'escape', 'notebook', 'render_links') def to_html( self, buf: FilePath | WriteBuffer[str] | None = None, @@ -3001,6 +3010,7 @@ def to_html( storage_options=_shared_docs["storage_options"], compression_options=_shared_docs["compression_options"] % "path_or_buffer", ) + @validate_bool_kwargs_from_keywords('index') def to_xml( self, path_or_buffer: FilePath | WriteBuffer[bytes] | WriteBuffer[str] | None = None, @@ -3233,6 +3243,7 @@ def info( show_counts=show_counts, ) + @validate_bool_kwargs_from_keywords('index', 'deep') def memory_usage(self, index: bool = True, deep: bool = False) -> Series: """ Return the memory usage of each column in bytes. @@ -3333,6 +3344,7 @@ def memory_usage(self, index: bool = True, deep: bool = False) -> Series: result = index_memory_usage._append(result) return result + @validate_bool_kwargs_from_keywords('copy') def transpose(self, *args, copy: bool = False) -> DataFrame: """ Transpose index and columns. @@ -3652,6 +3664,7 @@ def _getitem_multilevel(self, key): # loc is neither a slice nor ndarray, so must be an int return self._ixs(loc, axis=1) + @validate_bool_kwargs_from_keywords('takeable') def _get_value(self, index, col, takeable: bool = False) -> Scalar: """ Quickly retrieve single value at passed column and index. @@ -4004,6 +4017,7 @@ def _reset_cacher(self) -> None: # no-op for DataFrame pass + @validate_bool_kwargs_from_keywords('inplace') def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: """ The object has called back to us saying maybe it has changed. @@ -4021,6 +4035,7 @@ def _maybe_cache_changed(self, item, value: Series, inplace: bool) -> None: # ---------------------------------------------------------------------- # Unsorted + @validate_bool_kwargs_from_keywords('inplace') def query(self, expr: str, inplace: bool = False, **kwargs): """ Query the columns of a DataFrame with a boolean expression. @@ -4186,6 +4201,7 @@ def query(self, expr: str, inplace: bool = False, **kwargs): else: return result + @validate_bool_kwargs_from_keywords('inplace') def eval(self, expr: str, inplace: bool = False, **kwargs): """ Evaluate a string describing operations on DataFrame columns. @@ -4758,6 +4774,7 @@ def _reindex_multi( ) @doc(NDFrame.align, **_shared_doc_kwargs) + @validate_bool_kwargs_from_keywords('copy') def align( self, other, @@ -4844,6 +4861,7 @@ def set_axis( see_also_sub=" or columns", ) @Appender(NDFrame.set_axis.__doc__) + @validate_bool_kwargs_from_keywords('inplace') def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) @@ -4913,6 +4931,7 @@ def drop( # error: Signature of "drop" incompatible with supertype "NDFrame" # github.com/python/mypy/issues/12387 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @validate_bool_kwargs_from_keywords('inplace') def drop( # type: ignore[override] self, labels: Hashable | list[Hashable] = None, @@ -5115,6 +5134,7 @@ def rename( ) -> DataFrame | None: ... + @validate_bool_kwargs_from_keywords('inplace', 'copy') def rename( self, mapper: Renamer | None = None, @@ -5362,6 +5382,7 @@ def fillna( @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "value"]) @doc(NDFrame.fillna, **_shared_doc_kwargs) + @validate_bool_kwargs_from_keywords('inplace') def fillna( self, value: object | ArrayLike | None = None, @@ -5423,6 +5444,7 @@ def pop(self, item: Hashable) -> Series: """ return super().pop(item=item) + @validate_bool_kwargs_from_keywords('inplace') @doc(NDFrame.replace, **_shared_doc_kwargs) def replace( self, @@ -5559,6 +5581,7 @@ def shift( ) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "keys"]) + @validate_bool_kwargs_from_keywords('inplace', 'drop', 'append', 'verify_integrity') def set_index( self, keys, @@ -5838,6 +5861,7 @@ def reset_index( ... @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) + @validate_bool_kwargs_from_keywords('inplace', 'drop') def reset_index( self, level: Hashable | Sequence[Hashable] | None = None, @@ -6110,6 +6134,7 @@ def notnull(self) -> DataFrame: return ~self.isna() @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace') def dropna( self, axis: Axis = 0, @@ -6273,6 +6298,7 @@ def dropna( return result @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "subset"]) + @validate_bool_kwargs_from_keywords('inplace', 'ignore_index') def drop_duplicates( self, subset: Hashable | Sequence[Hashable] | None = None, @@ -6519,6 +6545,7 @@ def f(vals) -> tuple[np.ndarray, int]: @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "by"]) @Substitution(**_shared_doc_kwargs) @Appender(NDFrame.sort_values.__doc__) + @validate_bool_kwargs_from_keywords('inplace', 'ignore_index') # error: Signature of "sort_values" incompatible with supertype "NDFrame" def sort_values( # type: ignore[override] self, @@ -6642,6 +6669,7 @@ def sort_index( # error: Signature of "sort_index" incompatible with supertype "NDFrame" @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace', 'sort_remaining', 'ignore_index') def sort_index( # type: ignore[override] self, axis: Axis = 0, @@ -6755,6 +6783,7 @@ def sort_index( # type: ignore[override] key=key, ) + @validate_bool_kwargs_from_keywords('normalize', 'sort', 'ascending', 'dropna') def value_counts( self, subset: Sequence[Hashable] | None = None, @@ -7482,6 +7511,7 @@ def __rdivmod__(self, other) -> tuple[DataFrame, DataFrame]: """, klass=_shared_doc_kwargs["klass"], ) + @validate_bool_kwargs_from_keywords('keep_shape', 'keep_equal') def compare( self, other: DataFrame, @@ -7496,6 +7526,7 @@ def compare( keep_equal=keep_equal, ) + @validate_bool_kwargs_from_keywords('overwrite') def combine( self, other: DataFrame, func, fill_value=None, overwrite: bool = True ) -> DataFrame: @@ -7741,6 +7772,7 @@ def combiner(x, y): return combined + @validate_bool_kwargs_from_keywords('overwrite') def update( self, other, @@ -8000,6 +8032,7 @@ def update( """ ) @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) + @validate_bool_kwargs_from_keywords('as_index', 'sort', 'observed', 'dropna') def groupby( self, by=None, @@ -8373,6 +8406,7 @@ def pivot_table( sort=sort, ) + @validate_bool_kwargs_from_keywords('dropna') def stack(self, level: Level = -1, dropna: bool = True): """ Stack the prescribed level(s) from columns to index. @@ -8548,6 +8582,7 @@ def stack(self, level: Level = -1, dropna: bool = True): return result.__finalize__(self, method="stack") + @validate_bool_kwargs_from_keywords('ignore_index') def explode( self, column: IndexLabel, @@ -9003,6 +9038,7 @@ def transform( assert isinstance(result, DataFrame) return result + @validate_bool_kwargs_from_keywords('raw') def apply( self, func: AggFuncType, @@ -9250,6 +9286,7 @@ def infer(x): # ---------------------------------------------------------------------- # Merging / joining methods + @validate_bool_kwargs_from_keywords('ignore_index', 'verify_integrity', 'sort') def append( self, other, @@ -9362,6 +9399,7 @@ def append( return self._append(other, ignore_index, verify_integrity, sort) + @validate_bool_kwargs_from_keywords('ignore_index', 'verify_integrity', 'sort') def _append( self, other, @@ -9421,6 +9459,7 @@ def _append( result = result.reindex(combined_columns, axis=1) return result.__finalize__(self, method="append") + @validate_bool_kwargs_from_keywords('sort') def join( self, other: DataFrame | Series, @@ -9594,6 +9633,7 @@ def join( validate=validate, ) + @validate_bool_kwargs_from_keywords('sort') def _join_compat( self, other: DataFrame | Series, @@ -9677,6 +9717,7 @@ def _join_compat( @Substitution("") @Appender(_merge_doc, indents=2) + @validate_bool_kwargs_from_keywords('left_index', 'right_index', 'copy', 'indicator', 'sort') def merge( self, right: DataFrame | Series, @@ -9829,6 +9870,7 @@ def _series_round(ser: Series, decimals: int): # ---------------------------------------------------------------------- # Statistical methods, etc. + @validate_bool_kwargs_from_keywords('numeric_only') def corr( self, method: str | Callable[[np.ndarray, np.ndarray], float] = "pearson", @@ -9942,6 +9984,7 @@ def corr( return self._constructor(correl, index=idx, columns=cols) + @validate_bool_kwargs_from_keywords('numeric_only') def cov( self, min_periods: int | None = None, @@ -10071,6 +10114,7 @@ def cov( return self._constructor(base_cov, index=idx, columns=cols) + @validate_bool_kwargs_from_keywords('numeric_only') def corrwith( self, other, @@ -10235,6 +10279,7 @@ def c(x): # ---------------------------------------------------------------------- # ndarray-like stats methods + @validate_bool_kwargs_from_keywords('numeric_only') def count( self, axis: Axis = 0, level: Level | None = None, numeric_only: bool = False ): @@ -10340,6 +10385,7 @@ def count( return result.astype("int64").__finalize__(self, method="count") + @validate_bool_kwargs_from_keywords('numeric_only') def _count_level(self, level: Level, axis: int = 0, numeric_only: bool = False): if numeric_only: frame = self._get_numeric_data() @@ -10385,6 +10431,7 @@ def _count_level(self, level: Level, axis: int = 0, numeric_only: bool = False): return result + @validate_bool_kwargs_from_keywords('numeric_only') def _reduce( self, op, @@ -10537,6 +10584,7 @@ def _get_data() -> DataFrame: result = self._constructor_sliced(result, index=labels) return result + @validate_bool_kwargs_from_keywords('skipna') def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: """ Special case for _reduce to try to avoid a potentially-expensive transpose. @@ -10565,6 +10613,7 @@ def _reduce_axis1(self, name: str, func, skipna: bool) -> Series: res_ser = self._constructor_sliced(result, index=self.index) return res_ser + @validate_bool_kwargs_from_keywords('dropna') def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: """ Count number of distinct elements in specified axis. @@ -10606,6 +10655,7 @@ def nunique(self, axis: Axis = 0, dropna: bool = True) -> Series: return self.apply(Series.nunique, axis=axis, dropna=dropna) @doc(_shared_docs["idxmin"], numeric_only_default="False") + @validate_bool_kwargs_from_keywords('skipna', 'numeric_only') def idxmin( self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False ) -> Series: @@ -10630,6 +10680,7 @@ def idxmin( return data._constructor_sliced(result, index=data._get_agg_axis(axis)) @doc(_shared_docs["idxmax"], numeric_only_default="False") + @validate_bool_kwargs_from_keywords('skipna', 'numeric_only') def idxmax( self, axis: Axis = 0, skipna: bool = True, numeric_only: bool = False ) -> Series: @@ -10665,6 +10716,7 @@ def _get_agg_axis(self, axis_num: int) -> Index: else: raise ValueError(f"Axis must be 0 or 1 (got {repr(axis_num)})") + @validate_bool_kwargs_from_keywords('skipna', 'dropna') def mode( self, axis: Axis = 0, numeric_only: bool = False, dropna: bool = True ) -> DataFrame: @@ -10884,6 +10936,7 @@ def quantile( return result.__finalize__(self, method="quantile") @doc(NDFrame.asfreq, **_shared_doc_kwargs) + @validate_bool_kwargs_from_keywords('normalize') def asfreq( self, freq: Frequency, @@ -10933,6 +10986,7 @@ def resample( group_keys=group_keys, ) + @validate_bool_kwargs_from_keywords('copy') def to_timestamp( self, freq: Frequency | None = None, @@ -10971,6 +11025,7 @@ def to_timestamp( setattr(new_obj, axis_name, new_ax) return new_obj + @validate_bool_kwargs_from_keywords('copy') def to_period( self, freq: Frequency | None = None, axis: Axis = 0, copy: bool = True ) -> DataFrame: @@ -11170,6 +11225,7 @@ def _AXIS_NAMES(self) -> dict[int, str]: # ---------------------------------------------------------------------- # Internal Interface Methods + @validate_bool_kwargs_from_keywords('copy') def _to_dict_of_blocks(self, copy: bool = True): """ Return a dict of dtype -> Constructor Types that @@ -11264,6 +11320,7 @@ def values(self) -> np.ndarray: return self._mgr.as_array() @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace') def ffill( self: DataFrame, axis: None | Axis = None, @@ -11274,6 +11331,7 @@ def ffill( return super().ffill(axis, inplace, limit, downcast) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace') def bfill( self: DataFrame, axis: None | Axis = None, @@ -11286,6 +11344,7 @@ def bfill( @deprecate_nonkeyword_arguments( version=None, allowed_args=["self", "lower", "upper"] ) + @validate_bool_kwargs_from_keywords('inplace') def clip( self: DataFrame, lower=None, @@ -11298,6 +11357,7 @@ def clip( return super().clip(lower, upper, axis, inplace, *args, **kwargs) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + @validate_bool_kwargs_from_keywords('inplace') def interpolate( self: DataFrame, method: str = "linear", diff --git a/pandas/core/series.py b/pandas/core/series.py index 1d3509cac0edd..b84b195ddb2b1 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -64,6 +64,7 @@ validate_ascending, validate_bool_kwarg, validate_percentile, + validate_bool_kwargs_from_keywords, ) from pandas.core.dtypes.cast import ( @@ -330,6 +331,7 @@ class Series(base.IndexOpsMixin, NDFrame): # ---------------------------------------------------------------------- # Constructors + @validate_bool_kwargs_from_keywords('copy', 'fastpath') def __init__( self, data=None, @@ -1055,6 +1057,7 @@ def _get_values(self, indexer: slice | npt.NDArray[np.bool_]) -> Series: new_mgr = self._mgr.getitem_mgr(indexer) return self._constructor(new_mgr).__finalize__(self) + @validate_bool_kwargs_from_keywords('takeable') def _get_value(self, label, takeable: bool = False): """ Quickly retrieve single value at passed index label. @@ -1201,6 +1204,7 @@ def _set_values(self, key, value) -> None: self._mgr = self._mgr.setitem(indexer=key, value=value) self._maybe_update_cacher() + @validate_bool_kwargs_from_keywords('takeable') def _set_value(self, label, value, takeable: bool = False): """ Quickly set single value at passed label. @@ -1272,6 +1276,7 @@ def _check_is_chained_assignment_possible(self) -> bool: return True return super()._check_is_chained_assignment_possible() + @validate_bool_kwargs_from_keywords('clear', 'verify_is_copy', 'inplace') def _maybe_update_cacher( self, clear: bool = False, verify_is_copy: bool = True, inplace: bool = False ) -> None: @@ -1368,6 +1373,7 @@ def repeat(self, repeats, axis=None) -> Series: ) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "level"]) + @validate_bool_kwargs_from_keywords('allow_duplicates') def reset_index( self, level=None, @@ -1639,6 +1645,7 @@ def to_string( +----+----------+""" ), ) + @validate_bool_kwargs_from_keywords('index') def to_markdown( self, buf: IO[str] | None = None, @@ -1824,6 +1831,7 @@ def to_frame(self, name: Hashable = lib.no_default) -> DataFrame: df = self._constructor_expanddim(mgr) return df.__finalize__(self, method="to_frame") + @validate_bool_kwargs_from_keywords('inplace') def _set_name(self, name, inplace=False) -> Series: """ Set the Series name. @@ -1921,6 +1929,7 @@ def _set_name(self, name, inplace=False) -> Series: Name: Max Speed, dtype: float64 """ ) + @validate_bool_kwargs_from_keywords('as_index', 'sort', 'observed', 'dropna') @Appender(_shared_docs["groupby"] % _shared_doc_kwargs) def groupby( self, @@ -2032,6 +2041,7 @@ def count(self, level=None): self, method="count" ) + @validate_bool_kwargs_from_keywords('dropna') def mode(self, dropna: bool = True) -> Series: """ Return the mode(s) of the Series. @@ -2890,6 +2900,7 @@ def searchsorted( # type: ignore[override] # ------------------------------------------------------------------- # Combination + @validate_bool_kwargs_from_keywords('ignore_index', 'verify_integrity') def append( self, to_append, ignore_index: bool = False, verify_integrity: bool = False ): @@ -2976,6 +2987,7 @@ def append( return self._append(to_append, ignore_index, verify_integrity) + @validate_bool_kwargs_from_keywords('ignore_index', 'verify_integrity') def _append( self, to_append, ignore_index: bool = False, verify_integrity: bool = False ): @@ -3129,6 +3141,7 @@ def _construct_result( """, klass=_shared_doc_kwargs["klass"], ) + @validate_bool_kwargs_from_keywords('keep_shape', 'keep_equal') def compare( self, other: Series, @@ -3371,6 +3384,7 @@ def update(self, other) -> None: # Reindexing, sorting @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace', 'ignore_index') def sort_values( self, axis=0, @@ -3630,6 +3644,7 @@ def sort_index( # error: Signature of "sort_index" incompatible with supertype "NDFrame" @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace', 'sort_remaining', 'ignore_index') def sort_index( # type: ignore[override] self, axis: Axis = 0, @@ -4132,6 +4147,7 @@ def reorder_levels(self, order) -> Series: result.index = result.index.reorder_levels(order) return result + @validate_bool_kwargs_from_keywords('ignore_index') def explode(self, ignore_index: bool = False) -> Series: """ Transform each element of a list-like to a row. @@ -4412,6 +4428,7 @@ def transform( ).transform() return result + @validate_bool_kwargs_from_keywords('convert_dtype') def apply( self, func: AggFuncType, @@ -4657,6 +4674,7 @@ def rename( ) -> Series | None: ... + @validate_bool_kwargs_from_keywords('copy', 'inplace') def rename( self, index: Renamer | Hashable | None = None, @@ -4789,6 +4807,7 @@ def set_axis(self, labels, axis: Axis = ..., inplace: bool = ...) -> Series | No see_also_sub="", ) @Appender(NDFrame.set_axis.__doc__) + @validate_bool_kwargs_from_keywords('inplace') def set_axis(self, labels, axis: Axis = 0, inplace: bool = False): return super().set_axis(labels, axis=axis, inplace=inplace) @@ -4857,6 +4876,7 @@ def drop( # error: Signature of "drop" incompatible with supertype "NDFrame" # github.com/python/mypy/issues/12387 @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "labels"]) + @validate_bool_kwargs_from_keywords('inplace') def drop( # type: ignore[override] self, labels: Hashable | list[Hashable] = None, @@ -5152,6 +5172,7 @@ def replace( ) @doc(INFO_DOCSTRING, **series_sub_kwargs) + @validate_bool_kwargs_from_keywords('show_counts') def info( self, verbose: bool | None = None, @@ -5167,6 +5188,7 @@ def info( show_counts=show_counts, ) + @validate_bool_kwargs_from_keywords('inplace') def _replace_single(self, to_replace, method: str, inplace: bool, limit): """ Replaces values in a Series using the fill method specified when no @@ -5196,6 +5218,7 @@ def shift(self, periods=1, freq=None, axis=0, fill_value=None) -> Series: periods=periods, freq=freq, axis=axis, fill_value=fill_value ) + @validate_bool_kwargs_from_keywords('index', 'deep') def memory_usage(self, index: bool = True, deep: bool = False) -> int: """ Return the memory usage of the Series. @@ -5430,6 +5453,7 @@ def between(self, left, right, inclusive="both") -> Series: # ---------------------------------------------------------------------- # Convert to types that support pd.NA + @validate_bool_kwargs_from_keywords('infer_objects', 'convert_string', 'convert_integer', 'convert_boolean', 'convert_floating') def _convert_dtypes( self, infer_objects: bool = True, @@ -5577,6 +5601,7 @@ def dropna(self, axis=0, inplace=False, how=None): # error: Cannot determine type of 'asfreq' @doc(NDFrame.asfreq, **_shared_doc_kwargs) # type: ignore[has-type] + @validate_bool_kwargs_from_keywords('normalize') def asfreq( self, freq, @@ -5684,6 +5709,7 @@ def to_period(self, freq=None, copy=True) -> Series: ) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace') def ffill( self: Series, axis: None | Axis = None, @@ -5694,6 +5720,7 @@ def ffill( return super().ffill(axis, inplace, limit, downcast) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self"]) + @validate_bool_kwargs_from_keywords('inplace') def bfill( self: Series, axis: None | Axis = None, @@ -5706,6 +5733,7 @@ def bfill( @deprecate_nonkeyword_arguments( version=None, allowed_args=["self", "lower", "upper"] ) + @validate_bool_kwargs_from_keywords('inplace') def clip( self: Series, lower=None, @@ -5718,6 +5746,7 @@ def clip( return super().clip(lower, upper, axis, inplace, *args, **kwargs) @deprecate_nonkeyword_arguments(version=None, allowed_args=["self", "method"]) + @validate_bool_kwargs_from_keywords('inplace') def interpolate( self: Series, method: str = "linear", diff --git a/pandas/tests/frame/test_validate.py b/pandas/tests/frame/test_validate.py index e99e0a6863848..02dc0cf8e2605 100644 --- a/pandas/tests/frame/test_validate.py +++ b/pandas/tests/frame/test_validate.py @@ -39,3 +39,10 @@ def test_validate_bool_args(self, dataframe, func, inplace): with pytest.raises(ValueError, match=msg): getattr(dataframe, func)(**kwargs) + +@pytest.mark.parametrize('keyword', ('nan_as_null', 'allow_copy', 'ignore_width', 'index', 'index_names', 'show_dimensions', 'copy', 'inplace', 'reauth', 'auth_local_webserver', 'progress_bar', 'verify_integrity', 'write_index', 'bold_rows', 'escape', 'notebook', 'render_links', 'deep', 'takeable', 'drop', 'append', 'ignore_index', 'sort_remaining', 'normalize', 'ascending', 'dropna', 'keep_shape', 'keep_equal', 'overwrite', 'as_index', 'observed', 'sort', 'raw', 'left_index', 'right_index', 'numeric_only', 'skipna')) +def test_set_index_validation(dataframe, func, keyword): + msg = 'For argument "{}" expected type bool'.format(keyword) + kwargs = {keyword: 'hello'} + with pytest.raises(ValueError, match=msg): + getattr(dataframe, func)(**kwargs) \ No newline at end of file diff --git a/pandas/tests/series/test_validate.py b/pandas/tests/series/test_validate.py index 3c867f7582b7d..843f460649fb0 100644 --- a/pandas/tests/series/test_validate.py +++ b/pandas/tests/series/test_validate.py @@ -24,3 +24,10 @@ def test_validate_bool_args(string_series, func, inplace): with pytest.raises(ValueError, match=msg): getattr(string_series, func)(**kwargs) + +@pytest.mark.parametrize('keyword', ('copy', 'fastpath', 'takeable', 'clear', 'verify_is_copy', 'inplace', 'allow_duplicates', 'index', 'as_index', 'sort', 'observed', 'dropna', 'ignore_index', 'verify_integrity', 'keep_shape', 'keep_equal', 'inplace', 'sort_remaining' , 'convert_dtype', 'show_counts', 'deep', 'infer_objects', 'convert_string', 'convert_integer', 'convert_boolean', 'convert_floating', 'normalize')) +def test_set_index_validation(string_series, func, keyword): + msg = 'For argument "{}" expected type bool'.format(keyword) + kwargs = {keyword: 'hello'} + with pytest.raises(ValueError, match=msg): + getattr(string_series, func)(**kwargs) \ No newline at end of file diff --git a/pandas/util/_validators.py b/pandas/util/_validators.py index 8e3de9404fbee..757c84feedb3f 100644 --- a/pandas/util/_validators.py +++ b/pandas/util/_validators.py @@ -12,6 +12,8 @@ import numpy as np +import functools + from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import ( @@ -518,3 +520,39 @@ def validate_insert_loc(loc: int, length: int) -> int: if not 0 <= loc <= length: raise IndexError(f"loc must be an integer between -{length} and {length}") return loc + +def validate_bool_kwargs_from_keywords(*keywords): + """ + Takes keywords and ensures all are type bool, using validate_bool_kwarg + + Example Usage: + @validate_bool_kwargs_from_keywords('copy', 'inplace') + def method(##that takes bool kwargs## copy: bool = False, inplace: bool = False): + + Used as a decorator above methods. Uses functools. + + validate_bool_kwarg: + def validate_bool_kwarg(value, arg_name, none_allowed=True, int_allowed=False): + good_value = is_bool(value) + if none_allowed: + good_value = good_value or value is None + + if int_allowed: + good_value = good_value or isinstance(value, int) + + if not good_value: + raise ValueError( + f'For argument "{arg_name}" expected type bool, received ' + f"type {type(value).__name__}." + ) + return value + """ + words = set(keywords) + def validate_bool_kwargs_from_keywords_inner(func): + @functools.wraps(func) + def validator(*args, **kwargs): + for word in words.intersection(kwargs.keys()): + validate_bool_kwarg(kwargs[word], word) + return func(*args, **kwargs) + return validator + return validate_bool_kwargs_from_keywords_inner \ No newline at end of file