From 20f4d0b347e1f1beae04e05ed3c6b93fb0225d60 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sat, 18 Mar 2023 22:39:49 +0100 Subject: [PATCH 1/2] CoW: Switch to copy=False everywhere for Series constructor --- pandas/core/algorithms.py | 6 +++--- pandas/core/arrays/_mixins.py | 2 +- pandas/core/arrays/arrow/array.py | 2 +- pandas/core/arrays/categorical.py | 12 ++++++++---- pandas/core/arrays/masked.py | 4 ++-- pandas/core/arrays/sparse/accessor.py | 1 + pandas/core/arrays/sparse/array.py | 2 +- pandas/core/arrays/sparse/scipy_sparse.py | 2 +- pandas/core/groupby/generic.py | 2 +- pandas/core/groupby/groupby.py | 2 +- pandas/core/indexes/accessors.py | 10 ++++++---- pandas/core/reshape/encoding.py | 4 ++-- pandas/core/strings/accessor.py | 4 ++-- pandas/core/strings/object_array.py | 2 +- pandas/core/tools/datetimes.py | 2 +- pandas/core/window/ewm.py | 4 ++-- pandas/core/window/rolling.py | 8 ++++---- pandas/io/stata.py | 4 ++-- pandas/plotting/_matplotlib/boxplot.py | 2 +- 19 files changed, 41 insertions(+), 34 deletions(-) diff --git a/pandas/core/algorithms.py b/pandas/core/algorithms.py index db45f140c268e..eacd6d7803e6e 100644 --- a/pandas/core/algorithms.py +++ b/pandas/core/algorithms.py @@ -838,7 +838,7 @@ def value_counts( if bins is not None: from pandas.core.reshape.tile import cut - values = Series(values) + values = Series(values, copy=False) try: ii = cut(values, bins, include_lowest=True) except TypeError as err: @@ -861,7 +861,7 @@ def value_counts( else: if is_extension_array_dtype(values): # handle Categorical and sparse, - result = Series(values)._values.value_counts(dropna=dropna) + result = Series(values, copy=False)._values.value_counts(dropna=dropna) result.name = name result.index.name = index_name counts = result._values @@ -893,7 +893,7 @@ def value_counts( idx = idx.astype(object) idx.name = index_name - result = Series(counts, index=idx, name=name) + result = Series(counts, index=idx, name=name, copy=False) if sort: result = result.sort_values(ascending=ascending) diff --git a/pandas/core/arrays/_mixins.py b/pandas/core/arrays/_mixins.py index c0cca1852b446..6d4f51e0f9543 100644 --- a/pandas/core/arrays/_mixins.py +++ b/pandas/core/arrays/_mixins.py @@ -447,7 +447,7 @@ def value_counts(self, dropna: bool = True) -> Series: index_arr = self._from_backing_data(np.asarray(result.index._data)) index = Index(index_arr, name=result.index.name) - return Series(result._values, index=index, name=result.name) + return Series(result._values, index=index, name=result.name, copy=False) def _quantile( self, diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 2313e28950de7..9bbe2f0b1bb65 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -1124,7 +1124,7 @@ def value_counts(self, dropna: bool = True) -> Series: index = Index(type(self)(values)) - return Series(counts, index=index, name="count") + return Series(counts, index=index, name="count", copy=False) @classmethod def _concat_same_type(cls, to_concat) -> Self: diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 9de83933690f4..1a6c7b3416b5d 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -1501,7 +1501,9 @@ def value_counts(self, dropna: bool = True) -> Series: ix = coerce_indexer_dtype(ix, self.dtype.categories) ix = self._from_backing_data(ix) - return Series(count, index=CategoricalIndex(ix), dtype="int64", name="count") + return Series( + count, index=CategoricalIndex(ix), dtype="int64", name="count", copy=False + ) # error: Argument 2 of "_empty" is incompatible with supertype # "NDArrayBackedExtensionArray"; supertype defines the argument type as @@ -1759,7 +1761,9 @@ def _values_for_rank(self): # reorder the categories (so rank can use the float codes) # instead of passing an object array to rank values = np.array( - self.rename_categories(Series(self.categories).rank().values) + self.rename_categories( + Series(self.categories, copy=False).rank().values + ) ) return values @@ -2504,7 +2508,7 @@ def codes(self) -> Series: """ from pandas import Series - return Series(self._parent.codes, index=self._index) + return Series(self._parent.codes, index=self._index, copy=False) def _delegate_method(self, name, *args, **kwargs): from pandas import Series @@ -2512,7 +2516,7 @@ def _delegate_method(self, name, *args, **kwargs): method = getattr(self._parent, name) res = method(*args, **kwargs) if res is not None: - return Series(res, index=self._index, name=self._name) + return Series(res, index=self._index, name=self._name, copy=False) # utility routines diff --git a/pandas/core/arrays/masked.py b/pandas/core/arrays/masked.py index 8591cf2d3a4c5..517db7429b767 100644 --- a/pandas/core/arrays/masked.py +++ b/pandas/core/arrays/masked.py @@ -995,7 +995,7 @@ def value_counts(self, dropna: bool = True) -> Series: ) if dropna: - res = Series(value_counts, index=keys, name="count") + res = Series(value_counts, index=keys, name="count", copy=False) res.index = res.index.astype(self.dtype) res = res.astype("Int64") return res @@ -1011,7 +1011,7 @@ def value_counts(self, dropna: bool = True) -> Series: mask = np.zeros(len(counts), dtype="bool") counts_array = IntegerArray(counts, mask) - return Series(counts_array, index=index, name="count") + return Series(counts_array, index=index, name="count", copy=False) @doc(ExtensionArray.equals) def equals(self, other) -> bool: diff --git a/pandas/core/arrays/sparse/accessor.py b/pandas/core/arrays/sparse/accessor.py index ca1e73d3e6865..1302dd7c89672 100644 --- a/pandas/core/arrays/sparse/accessor.py +++ b/pandas/core/arrays/sparse/accessor.py @@ -219,6 +219,7 @@ def to_dense(self) -> Series: self._parent.array.to_dense(), index=self._parent.index, name=self._parent.name, + copy=False, ) diff --git a/pandas/core/arrays/sparse/array.py b/pandas/core/arrays/sparse/array.py index 77dcfc463ed0c..e489aa0ab79de 100644 --- a/pandas/core/arrays/sparse/array.py +++ b/pandas/core/arrays/sparse/array.py @@ -886,7 +886,7 @@ def value_counts(self, dropna: bool = True) -> Series: index = Index(keys) else: index = keys - return Series(counts, index=index) + return Series(counts, index=index, copy=False) # -------- # Indexing diff --git a/pandas/core/arrays/sparse/scipy_sparse.py b/pandas/core/arrays/sparse/scipy_sparse.py index 1d190ad1919b3..7f6a9c589c486 100644 --- a/pandas/core/arrays/sparse/scipy_sparse.py +++ b/pandas/core/arrays/sparse/scipy_sparse.py @@ -195,7 +195,7 @@ def coo_to_sparse_series( from pandas import SparseDtype try: - ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col))) + ser = Series(A.data, MultiIndex.from_arrays((A.row, A.col)), copy=False) except AttributeError as err: raise TypeError( f"Expected coo_matrix. Got {type(A).__name__} instead." diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index ce4cd3476ec83..20b15f6816c91 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -707,7 +707,7 @@ def value_counts( llab = lambda lab, inc: lab[inc] else: # lab is a Categorical with categories an IntervalIndex - cat_ser = cut(Series(val), bins, include_lowest=True) + cat_ser = cut(Series(val, copy=False), bins, include_lowest=True) cat_obj = cast("Categorical", cat_ser._values) lev = cat_obj.categories lab = lev.take( diff --git a/pandas/core/groupby/groupby.py b/pandas/core/groupby/groupby.py index 94b2d4b28ea53..bbc3824bab455 100644 --- a/pandas/core/groupby/groupby.py +++ b/pandas/core/groupby/groupby.py @@ -1486,7 +1486,7 @@ def _agg_py_fallback( if values.ndim == 1: # For DataFrameGroupBy we only get here with ExtensionArray - ser = Series(values) + ser = Series(values, copy=False) else: # We only get here with values.dtype == object # TODO: special case not needed with ArrayManager diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 85460a04298e6..02532d51f0ec8 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -102,7 +102,9 @@ def _delegate_property_get(self, name): else: index = self._parent.index # return the result as a Series, which is by definition a copy - result = Series(result, index=index, name=self.name).__finalize__(self._parent) + result = Series(result, index=index, name=self.name, copy=False).__finalize__( + self._parent + ) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ( @@ -130,9 +132,9 @@ def _delegate_method(self, name, *args, **kwargs): if not is_list_like(result): return result - result = Series(result, index=self._parent.index, name=self.name).__finalize__( - self._parent - ) + result = Series( + result, index=self._parent.index, name=self.name, copy=False + ).__finalize__(self._parent) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ( diff --git a/pandas/core/reshape/encoding.py b/pandas/core/reshape/encoding.py index 92d556a582262..7ac403d2e68fb 100644 --- a/pandas/core/reshape/encoding.py +++ b/pandas/core/reshape/encoding.py @@ -239,7 +239,7 @@ def _get_dummies_1d( from pandas.core.reshape.concat import concat # Series avoids inconsistent NaN handling - codes, levels = factorize_from_iterable(Series(data)) + codes, levels = factorize_from_iterable(Series(data, copy=False)) if dtype is None: dtype = np.dtype(bool) @@ -313,7 +313,7 @@ def get_empty_frame(data) -> DataFrame: fill_value=fill_value, dtype=dtype, ) - sparse_series.append(Series(data=sarr, index=index, name=col)) + sparse_series.append(Series(data=sarr, index=index, name=col, copy=False)) return concat(sparse_series, axis=1, copy=False) diff --git a/pandas/core/strings/accessor.py b/pandas/core/strings/accessor.py index 1c4727fda4e64..1efd890a499ff 100644 --- a/pandas/core/strings/accessor.py +++ b/pandas/core/strings/accessor.py @@ -379,7 +379,7 @@ def _get_series_list(self, others): if isinstance(others, ABCSeries): return [others] elif isinstance(others, ABCIndex): - return [Series(others._values, index=idx, dtype=others.dtype)] + return [Series(others, index=idx, dtype=others.dtype)] elif isinstance(others, ABCDataFrame): return [others[x] for x in others] elif isinstance(others, np.ndarray) and others.ndim == 2: @@ -634,7 +634,7 @@ def cat( else: dtype = self._orig.dtype res_ser = Series( - result, dtype=dtype, index=data.index, name=self._orig.name + result, dtype=dtype, index=data.index, name=self._orig.name, copy=False ) out = res_ser.__finalize__(self._orig, method="str_cat") return out diff --git a/pandas/core/strings/object_array.py b/pandas/core/strings/object_array.py index f8e3f0756dfbd..098f08752ff4d 100644 --- a/pandas/core/strings/object_array.py +++ b/pandas/core/strings/object_array.py @@ -377,7 +377,7 @@ def _str_get_dummies(self, sep: str = "|"): arr = sep + arr.astype(str) + sep tags: set[str] = set() - for ts in Series(arr).str.split(sep): + for ts in Series(arr, copy=False).str.split(sep): tags.update(ts) tags2 = sorted(tags - {""}) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 0265b4404d6ab..5f4ad168d63d0 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -247,7 +247,7 @@ def _maybe_cache( cache_dates = convert_listlike(unique_dates, format) # GH#45319 try: - cache_array = Series(cache_dates, index=unique_dates) + cache_array = Series(cache_dates, index=unique_dates, copy=False) except OutOfBoundsDatetime: return cache_array # GH#39882 and GH#35888 in case of None and NaT we get duplicates diff --git a/pandas/core/window/ewm.py b/pandas/core/window/ewm.py index 34dc49ff4a82a..8dec71b692700 100644 --- a/pandas/core/window/ewm.py +++ b/pandas/core/window/ewm.py @@ -733,7 +733,7 @@ def cov_func(x, y): self.ignore_na, bias, ) - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, cov_func, numeric_only @@ -810,7 +810,7 @@ def _cov(X, Y): x_var = _cov(x_array, x_array) y_var = _cov(y_array, y_array) result = cov / zsqrt(x_var * y_var) - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, cov_func, numeric_only diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ed0de80e381c3..b344b04b30d73 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -384,7 +384,7 @@ def _insert_on_column(self, result: DataFrame, obj: DataFrame) -> None: if self.on is not None and not self._on.equals(obj.index): name = self._on.name - extra_col = Series(self._on, index=self.obj.index, name=name) + extra_col = Series(self._on, index=self.obj.index, name=name, copy=False) if name in result.columns: # TODO: sure we want to overwrite results? result[name] = extra_col @@ -1418,7 +1418,7 @@ def _generate_cython_apply_func( def apply_func(values, begin, end, min_periods, raw=raw): if not raw: # GH 45912 - values = Series(values, index=self._on) + values = Series(values, index=self._on, copy=False) return window_func(values, begin, end, min_periods) return apply_func @@ -1675,7 +1675,7 @@ def cov_func(x, y): notna(x_array + y_array).astype(np.float64), start, end, 0 ) result = (mean_x_y - mean_x * mean_y) * (count_x_y / (count_x_y - ddof)) - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, cov_func, numeric_only @@ -1732,7 +1732,7 @@ def corr_func(x, y): ) denominator = (x_var * y_var) ** 0.5 result = numerator / denominator - return Series(result, index=x.index, name=x.name) + return Series(result, index=x.index, name=x.name, copy=False) return self._apply_pairwise( self._selected_obj, other, pairwise, corr_func, numeric_only diff --git a/pandas/io/stata.py b/pandas/io/stata.py index 5b6326685d63e..2a625f8dad32a 100644 --- a/pandas/io/stata.py +++ b/pandas/io/stata.py @@ -1989,7 +1989,7 @@ def _do_convert_categoricals( # TODO: if we get a non-copying rename_categories, use that cat_data = cat_data.rename_categories(categories) except ValueError as err: - vc = Series(categories).value_counts() + vc = Series(categories, copy=False).value_counts() repeated_cats = list(vc.index[vc > 1]) repeats = "-" * 80 + "\n" + "\n".join(repeated_cats) # GH 25772 @@ -2006,7 +2006,7 @@ def _do_convert_categoricals( """ raise ValueError(msg) from err # TODO: is the next line needed above in the data(...) method? - cat_series = Series(cat_data, index=data.index) + cat_series = Series(cat_data, index=data.index, copy=False) cat_converted_data.append((col, cat_series)) else: cat_converted_data.append((col, data[col])) diff --git a/pandas/plotting/_matplotlib/boxplot.py b/pandas/plotting/_matplotlib/boxplot.py index b39fc93f4f024..193e233883534 100644 --- a/pandas/plotting/_matplotlib/boxplot.py +++ b/pandas/plotting/_matplotlib/boxplot.py @@ -289,7 +289,7 @@ def _grouped_plot_by_column( ax_values.append(re_plotf) ax.grid(grid) - result = pd.Series(ax_values, index=columns) + result = pd.Series(ax_values, index=columns, copy=False) # Return axes in multiplot case, maybe revisit later # 985 if return_type is None: From ef64f094e470015c4474369f478b129de49b90f7 Mon Sep 17 00:00:00 2001 From: Patrick Hoefler Date: Sun, 2 Apr 2023 21:05:21 +0200 Subject: [PATCH 2/2] Remove --- pandas/core/arrays/categorical.py | 4 ++-- pandas/core/indexes/accessors.py | 12 +++++------- pandas/io/formats/string.py | 2 +- 3 files changed, 8 insertions(+), 10 deletions(-) diff --git a/pandas/core/arrays/categorical.py b/pandas/core/arrays/categorical.py index 1a6c7b3416b5d..fb0b989ee2109 100644 --- a/pandas/core/arrays/categorical.py +++ b/pandas/core/arrays/categorical.py @@ -2508,7 +2508,7 @@ def codes(self) -> Series: """ from pandas import Series - return Series(self._parent.codes, index=self._index, copy=False) + return Series(self._parent.codes, index=self._index) def _delegate_method(self, name, *args, **kwargs): from pandas import Series @@ -2516,7 +2516,7 @@ def _delegate_method(self, name, *args, **kwargs): method = getattr(self._parent, name) res = method(*args, **kwargs) if res is not None: - return Series(res, index=self._index, name=self._name, copy=False) + return Series(res, index=self._index, name=self._name) # utility routines diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index 02532d51f0ec8..52641f7a20abd 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -101,10 +101,8 @@ def _delegate_property_get(self, name): index = self.orig.index else: index = self._parent.index - # return the result as a Series, which is by definition a copy - result = Series(result, index=index, name=self.name, copy=False).__finalize__( - self._parent - ) + # return the result as a Series + result = Series(result, index=index, name=self.name).__finalize__(self._parent) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ( @@ -132,9 +130,9 @@ def _delegate_method(self, name, *args, **kwargs): if not is_list_like(result): return result - result = Series( - result, index=self._parent.index, name=self.name, copy=False - ).__finalize__(self._parent) + result = Series(result, index=self._parent.index, name=self.name).__finalize__( + self._parent + ) # setting this object will show a SettingWithCopyWarning/Error result._is_copy = ( diff --git a/pandas/io/formats/string.py b/pandas/io/formats/string.py index 071afc059b166..8ad545270dec1 100644 --- a/pandas/io/formats/string.py +++ b/pandas/io/formats/string.py @@ -166,7 +166,7 @@ def _fit_strcols_to_terminal_width(self, strcols: list[list[str]]) -> str: dif = max_len - width # '+ 1' to avoid too wide repr (GH PR #17023) adj_dif = dif + 1 - col_lens = Series([Series(ele).apply(len).max() for ele in strcols]) + col_lens = Series([Series(ele).str.len().max() for ele in strcols]) n_cols = len(col_lens) counter = 0 while adj_dif > 0 and n_cols > 1: