Skip to content

Commit 7e089aa

Browse files
authored
Merge branch 'main' into issue-50977
2 parents b9ed3a8 + 541d092 commit 7e089aa

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

69 files changed

+837
-623
lines changed

.pre-commit-config.yaml

+1-1
Original file line numberDiff line numberDiff line change
@@ -92,7 +92,7 @@ repos:
9292
args: [--disable=all, --enable=redefined-outer-name]
9393
stages: [manual]
9494
- repo: https://github.com/PyCQA/isort
95-
rev: 5.11.4
95+
rev: 5.12.0
9696
hooks:
9797
- id: isort
9898
- repo: https://github.com/asottile/pyupgrade

LICENSE

+1-1
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ BSD 3-Clause License
33
Copyright (c) 2008-2011, AQR Capital Management, LLC, Lambda Foundry, Inc. and PyData Development Team
44
All rights reserved.
55

6-
Copyright (c) 2011-2022, Open source contributors.
6+
Copyright (c) 2011-2023, Open source contributors.
77

88
Redistribution and use in source and binary forms, with or without
99
modification, are permitted provided that the following conditions are met:

ci/code_checks.sh

+68-2
Original file line numberDiff line numberDiff line change
@@ -83,7 +83,7 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8383
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX04,GL01,GL02,GL03,GL04,GL05,GL06,GL07,GL09,GL10,PR03,PR04,PR05,PR06,PR08,PR09,PR10,RT01,RT02,RT04,RT05,SA02,SA03,SA04,SS01,SS02,SS03,SS04,SS05,SS06
8484
RET=$(($RET + $?)) ; echo $MSG "DONE"
8585

86-
MSG='Partially validate docstrings (EX01)' ; echo $MSG
86+
MSG='Partially validate docstrings (EX01)' ; echo $MSG
8787
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
8888
pandas.Series.index \
8989
pandas.Series.dtype \
@@ -574,7 +574,73 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
574574
pandas.DataFrame.sparse.to_coo \
575575
pandas.DataFrame.to_gbq \
576576
pandas.DataFrame.style \
577-
pandas.DataFrame.__dataframe__ \
577+
pandas.DataFrame.__dataframe__
578+
RET=$(($RET + $?)) ; echo $MSG "DONE"
579+
580+
MSG='Partially validate docstrings (EX02)' ; echo $MSG
581+
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX02 --ignore_functions \
582+
pandas.DataFrame.copy \
583+
pandas.DataFrame.plot.line \
584+
pandas.DataFrame.std \
585+
pandas.DataFrame.var \
586+
pandas.Index.factorize \
587+
pandas.Period.strftime \
588+
pandas.Series.copy \
589+
pandas.Series.factorize \
590+
pandas.Series.floordiv \
591+
pandas.Series.plot.line \
592+
pandas.Series.rfloordiv \
593+
pandas.Series.sparse.density \
594+
pandas.Series.sparse.npoints \
595+
pandas.Series.sparse.sp_values \
596+
pandas.Series.std \
597+
pandas.Series.var \
598+
pandas.Timestamp.fromtimestamp \
599+
pandas.api.types.infer_dtype \
600+
pandas.api.types.is_bool_dtype \
601+
pandas.api.types.is_categorical_dtype \
602+
pandas.api.types.is_complex_dtype \
603+
pandas.api.types.is_datetime64_any_dtype \
604+
pandas.api.types.is_datetime64_dtype \
605+
pandas.api.types.is_datetime64_ns_dtype \
606+
pandas.api.types.is_datetime64tz_dtype \
607+
pandas.api.types.is_dict_like \
608+
pandas.api.types.is_file_like \
609+
pandas.api.types.is_float_dtype \
610+
pandas.api.types.is_hashable \
611+
pandas.api.types.is_int64_dtype \
612+
pandas.api.types.is_integer_dtype \
613+
pandas.api.types.is_interval_dtype \
614+
pandas.api.types.is_iterator \
615+
pandas.api.types.is_list_like \
616+
pandas.api.types.is_named_tuple \
617+
pandas.api.types.is_numeric_dtype \
618+
pandas.api.types.is_object_dtype \
619+
pandas.api.types.is_period_dtype \
620+
pandas.api.types.is_re \
621+
pandas.api.types.is_re_compilable \
622+
pandas.api.types.is_signed_integer_dtype \
623+
pandas.api.types.is_sparse \
624+
pandas.api.types.is_string_dtype \
625+
pandas.api.types.is_timedelta64_dtype \
626+
pandas.api.types.is_timedelta64_ns_dtype \
627+
pandas.api.types.is_unsigned_integer_dtype \
628+
pandas.core.groupby.DataFrameGroupBy.take \
629+
pandas.core.groupby.SeriesGroupBy.take \
630+
pandas.factorize \
631+
pandas.io.formats.style.Styler.concat \
632+
pandas.io.formats.style.Styler.export \
633+
pandas.io.formats.style.Styler.set_td_classes \
634+
pandas.io.formats.style.Styler.use \
635+
pandas.io.json.build_table_schema \
636+
pandas.merge_ordered \
637+
pandas.option_context \
638+
pandas.plotting.andrews_curves \
639+
pandas.plotting.autocorrelation_plot \
640+
pandas.plotting.lag_plot \
641+
pandas.plotting.parallel_coordinates \
642+
pandas.plotting.radviz \
643+
pandas.tseries.frequencies.to_offset
578644
RET=$(($RET + $?)) ; echo $MSG "DONE"
579645

580646
fi

doc/source/reference/frame.rst

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ Binary operator functions
8383
.. autosummary::
8484
:toctree: api/
8585

86+
DataFrame.__add__
8687
DataFrame.add
8788
DataFrame.sub
8889
DataFrame.mul

doc/source/whatsnew/v2.0.0.rst

+8-3
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,7 @@ Other enhancements
183183
- Added ``copy`` parameter to :meth:`Series.infer_objects` and :meth:`DataFrame.infer_objects`, passing ``False`` will avoid making copies for series or columns that are already non-object or where no better dtype can be inferred (:issue:`50096`)
184184
- :meth:`DataFrame.plot.hist` now recognizes ``xlabel`` and ``ylabel`` arguments (:issue:`49793`)
185185
- :meth:`Series.drop_duplicates` has gained ``ignore_index`` keyword to reset index (:issue:`48304`)
186+
- :meth:`Series.dropna` and :meth:`DataFrame.dropna` has gained ``ignore_index`` keyword to reset index (:issue:`31725`)
186187
- Improved error message in :func:`to_datetime` for non-ISO8601 formats, informing users about the position of the first error (:issue:`50361`)
187188
- Improved error message when trying to align :class:`DataFrame` objects (for example, in :func:`DataFrame.compare`) to clarify that "identically labelled" refers to both index and columns (:issue:`50083`)
188189
- Added :meth:`DatetimeIndex.as_unit` and :meth:`TimedeltaIndex.as_unit` to convert to different resolutions; supported resolutions are "s", "ms", "us", and "ns" (:issue:`50616`)
@@ -619,10 +620,11 @@ Other API changes
619620
new DataFrame (shallow copy) instead of the original DataFrame, consistent with other
620621
methods to get a full slice (for example ``df.loc[:]`` or ``df[:]``) (:issue:`49469`)
621622
- Disallow computing ``cumprod`` for :class:`Timedelta` object; previously this returned incorrect values (:issue:`50246`)
623+
- Instantiating an :class:`Index` with an numeric numpy dtype with data containing :class:`NA` and/or :class:`NaT` now raises a ``ValueError``. Previously a ``TypeError`` was raised (:issue:`51050`)
622624
- Loading a JSON file with duplicate columns using ``read_json(orient='split')`` renames columns to avoid duplicates, as :func:`read_csv` and the other readers do (:issue:`50370`)
623625
- The levels of the index of the :class:`Series` returned from ``Series.sparse.from_coo`` now always have dtype ``int32``. Previously they had dtype ``int64`` (:issue:`50926`)
624626
- :func:`to_datetime` with ``unit`` of either "Y" or "M" will now raise if a sequence contains a non-round ``float`` value, matching the ``Timestamp`` behavior (:issue:`50301`)
625-
-
627+
- The methods :meth:`Series.round`, :meth:`DataFrame.__invert__`, :meth:`Series.__invert__`, :meth:`DataFrame.swapaxes`, :meth:`DataFrame.first`, :meth:`DataFrame.last`, :meth:`Series.first`, :meth:`Series.last` and :meth:`DataFrame.align` will now always return new objects (:issue:`51032`)
626628

627629
.. ---------------------------------------------------------------------------
628630
.. _whatsnew_200.deprecations:
@@ -639,6 +641,7 @@ Deprecations
639641
- :meth:`Index.is_categorical` has been deprecated. Use :func:`pandas.api.types.is_categorical_dtype` instead (:issue:`50042`)
640642
- :meth:`Index.is_object` has been deprecated. Use :func:`pandas.api.types.is_object_dtype` instead (:issue:`50042`)
641643
- :meth:`Index.is_interval` has been deprecated. Use :func:`pandas.api.types.is_intterval_dtype` instead (:issue:`50042`)
644+
- Deprecated ``all`` and ``any`` reductions with ``datetime64`` and :class:`DatetimeTZDtype` dtypes, use e.g. ``(obj != pd.Timestamp(0), tz=obj.tz).all()`` instead (:issue:`34479`)
642645
-
643646

644647
.. ---------------------------------------------------------------------------
@@ -758,7 +761,7 @@ Removal of prior version deprecations/changes
758761
- Disallow passing non-keyword arguments to :meth:`DataFrame.replace`, :meth:`Series.replace` except for ``to_replace`` and ``value`` (:issue:`47587`)
759762
- Disallow passing non-keyword arguments to :meth:`DataFrame.sort_values` except for ``by`` (:issue:`41505`)
760763
- Disallow passing non-keyword arguments to :meth:`Series.sort_values` (:issue:`41505`)
761-
- Disallow passing 2 non-keyword arguments to :meth:`DataFrame.reindex` (:issue:`17966`)
764+
- Disallow passing non-keyword arguments to :meth:`DataFrame.reindex` except for ``labels`` (:issue:`17966`)
762765
- Disallow :meth:`Index.reindex` with non-unique :class:`Index` objects (:issue:`42568`)
763766
- Disallowed constructing :class:`Categorical` with scalar ``data`` (:issue:`38433`)
764767
- Disallowed constructing :class:`CategoricalIndex` without passing ``data`` (:issue:`38944`)
@@ -1965,6 +1968,7 @@ Conversion
19651968
- Bug in :class:`.arrays.ArrowExtensionArray` that would raise ``NotImplementedError`` when passed a sequence of strings or binary (:issue:`49172`)
19661969
- Bug in :meth:`Series.astype` raising ``pyarrow.ArrowInvalid`` when converting from a non-pyarrow string dtype to a pyarrow numeric type (:issue:`50430`)
19671970
- Bug in :meth:`Series.to_numpy` converting to NumPy array before applying ``na_value`` (:issue:`48951`)
1971+
- Bug in :meth:`DataFrame.astype` not copying data when converting to pyarrow dtype (:issue:`50984`)
19681972
- Bug in :func:`to_datetime` was not respecting ``exact`` argument when ``format`` was an ISO8601 format (:issue:`12649`)
19691973
- Bug in :meth:`TimedeltaArray.astype` raising ``TypeError`` when converting to a pyarrow duration type (:issue:`49795`)
19701974
-
@@ -2051,6 +2055,7 @@ Period
20512055
- Bug in adding a :class:`Period` object to an array of :class:`DateOffset` objects incorrectly raising ``TypeError`` (:issue:`50162`)
20522056
- Bug in :class:`Period` where passing a string with finer resolution than nanosecond would result in a ``KeyError`` instead of dropping the extra precision (:issue:`50417`)
20532057
- Bug in parsing strings representing Week-periods e.g. "2017-01-23/2017-01-29" as minute-frequency instead of week-frequency (:issue:`50803`)
2058+
- Bug in :meth:`GroupBy.sum`, :meth:`GroupBy.cumsum`, :meth:`GroupBy.prod`, :meth:`GroupBy.cumprod` with :class:`PeriodDtype` failing to raise ``TypeError`` (:issue:`51040`)
20542059
-
20552060

20562061
Plotting
@@ -2101,7 +2106,7 @@ Sparse
21012106
^^^^^^
21022107
- Bug in :meth:`Series.astype` when converting a ``SparseDtype`` with ``datetime64[ns]`` subtype to ``int64`` dtype raising, inconsistent with the non-sparse behavior (:issue:`49631`,:issue:`50087`)
21032108
- Bug in :meth:`Series.astype` when converting a from ``datetime64[ns]`` to ``Sparse[datetime64[ns]]`` incorrectly raising (:issue:`50082`)
2104-
-
2109+
- Bug in :meth:`Series.sparse.to_coo` raising ``SystemError`` when :class:`MultiIndex` contains a ``ExtensionArray`` (:issue:`50996`)
21052110

21062111
ExtensionArray
21072112
^^^^^^^^^^^^^^

pandas/_testing/__init__.py

-2
Original file line numberDiff line numberDiff line change
@@ -100,7 +100,6 @@
100100
get_obj,
101101
)
102102
from pandas._testing.contexts import (
103-
RNGContext,
104103
decompress_file,
105104
ensure_clean,
106105
ensure_safe_environment_variables,
@@ -1135,7 +1134,6 @@ def shares_memory(left, right) -> bool:
11351134
"raise_assert_detail",
11361135
"rands",
11371136
"reset_display_options",
1138-
"RNGContext",
11391137
"raises_chained_assignment_error",
11401138
"round_trip_localpath",
11411139
"round_trip_pathlib",

pandas/_testing/contexts.py

-37
Original file line numberDiff line numberDiff line change
@@ -4,16 +4,13 @@
44
import os
55
from pathlib import Path
66
import tempfile
7-
from types import TracebackType
87
from typing import (
98
IO,
109
Any,
1110
Generator,
1211
)
1312
import uuid
1413

15-
import numpy as np
16-
1714
from pandas.compat import PYPY
1815
from pandas.errors import ChainedAssignmentError
1916

@@ -198,40 +195,6 @@ def use_numexpr(use, min_elements=None) -> Generator[None, None, None]:
198195
set_option("compute.use_numexpr", olduse)
199196

200197

201-
class RNGContext:
202-
"""
203-
Context manager to set the numpy random number generator speed. Returns
204-
to the original value upon exiting the context manager.
205-
206-
Parameters
207-
----------
208-
seed : int
209-
Seed for numpy.random.seed
210-
211-
Examples
212-
--------
213-
with RNGContext(42):
214-
np.random.randn()
215-
"""
216-
217-
def __init__(self, seed) -> None:
218-
self.seed = seed
219-
220-
def __enter__(self) -> None:
221-
222-
self.start_state = np.random.get_state()
223-
np.random.seed(self.seed)
224-
225-
def __exit__(
226-
self,
227-
exc_type: type[BaseException] | None,
228-
exc_value: BaseException | None,
229-
traceback: TracebackType | None,
230-
) -> None:
231-
232-
np.random.set_state(self.start_state)
233-
234-
235198
def raises_chained_assignment_error():
236199

237200
if PYPY:

pandas/conftest.py

-2
Original file line numberDiff line numberDiff line change
@@ -149,8 +149,6 @@ def pytest_collection_modifyitems(items, config) -> None:
149149
ignored_doctest_warnings = [
150150
# Docstring divides by zero to show behavior difference
151151
("missing.mask_zero_div_zero", "divide by zero encountered"),
152-
# Docstring demonstrates the call raises a warning
153-
("_validators.validate_axis_style_args", "Use named arguments"),
154152
]
155153

156154
for item in items:

pandas/core/algorithms.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1197,8 +1197,10 @@ def is_valid_dtype_n_method(dtype: DtypeObj) -> bool:
11971197
nsmallest/nlargest methods
11981198
"""
11991199
return (
1200-
is_numeric_dtype(dtype) and not is_complex_dtype(dtype)
1201-
) or needs_i8_conversion(dtype)
1200+
not is_complex_dtype(dtype)
1201+
if is_numeric_dtype(dtype)
1202+
else needs_i8_conversion(dtype)
1203+
)
12021204

12031205

12041206
class SelectNSeries(SelectN):

pandas/core/arraylike.py

+86
Original file line numberDiff line numberDiff line change
@@ -97,6 +97,92 @@ def _arith_method(self, other, op):
9797

9898
@unpack_zerodim_and_defer("__add__")
9999
def __add__(self, other):
100+
"""
101+
Get Addition of DataFrame and other, column-wise.
102+
103+
Equivalent to ``DataFrame.add(other)``.
104+
105+
Parameters
106+
----------
107+
other : scalar, sequence, Series, dict or DataFrame
108+
Object to be added to the DataFrame.
109+
110+
Returns
111+
-------
112+
DataFrame
113+
The result of adding ``other`` to DataFrame.
114+
115+
See Also
116+
--------
117+
DataFrame.add : Add a DataFrame and another object, with option for index-
118+
or column-oriented addition.
119+
120+
Examples
121+
--------
122+
>>> df = pd.DataFrame({'height': [1.5, 2.6], 'weight': [500, 800]},
123+
... index=['elk', 'moose'])
124+
>>> df
125+
height weight
126+
elk 1.5 500
127+
moose 2.6 800
128+
129+
Adding a scalar affects all rows and columns.
130+
131+
>>> df[['height', 'weight']] + 1.5
132+
height weight
133+
elk 3.0 501.5
134+
moose 4.1 801.5
135+
136+
Each element of a list is added to a column of the DataFrame, in order.
137+
138+
>>> df[['height', 'weight']] + [0.5, 1.5]
139+
height weight
140+
elk 2.0 501.5
141+
moose 3.1 801.5
142+
143+
Keys of a dictionary are aligned to the DataFrame, based on column names;
144+
each value in the dictionary is added to the corresponding column.
145+
146+
>>> df[['height', 'weight']] + {'height': 0.5, 'weight': 1.5}
147+
height weight
148+
elk 2.0 501.5
149+
moose 3.1 801.5
150+
151+
When `other` is a :class:`Series`, the index of `other` is aligned with the
152+
columns of the DataFrame.
153+
154+
>>> s1 = pd.Series([0.5, 1.5], index=['weight', 'height'])
155+
>>> df[['height', 'weight']] + s1
156+
height weight
157+
elk 3.0 500.5
158+
moose 4.1 800.5
159+
160+
Even when the index of `other` is the same as the index of the DataFrame,
161+
the :class:`Series` will not be reoriented. If index-wise alignment is desired,
162+
:meth:`DataFrame.add` should be used with `axis='index'`.
163+
164+
>>> s2 = pd.Series([0.5, 1.5], index=['elk', 'moose'])
165+
>>> df[['height', 'weight']] + s2
166+
elk height moose weight
167+
elk NaN NaN NaN NaN
168+
moose NaN NaN NaN NaN
169+
170+
>>> df[['height', 'weight']].add(s2, axis='index')
171+
height weight
172+
elk 2.0 500.5
173+
moose 4.1 801.5
174+
175+
When `other` is a :class:`DataFrame`, both columns names and the
176+
index are aligned.
177+
178+
>>> other = pd.DataFrame({'height': [0.2, 0.4, 0.6]},
179+
... index=['elk', 'moose', 'deer'])
180+
>>> df[['height', 'weight']] + other
181+
height weight
182+
deer NaN NaN
183+
elk 1.7 NaN
184+
moose 3.0 NaN
185+
"""
100186
return self._arith_method(other, operator.add)
101187

102188
@unpack_zerodim_and_defer("__radd__")

pandas/core/arrays/arrow/array.py

+4
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
from __future__ import annotations
22

3+
from copy import deepcopy
34
from typing import (
45
TYPE_CHECKING,
56
Any,
@@ -220,6 +221,9 @@ def _from_sequence(cls, scalars, *, dtype: Dtype | None = None, copy: bool = Fal
220221
if isinstance(scalars, cls):
221222
scalars = scalars._data
222223
elif not isinstance(scalars, (pa.Array, pa.ChunkedArray)):
224+
if copy and is_array_like(scalars):
225+
# pa array should not get updated when numpy array is updated
226+
scalars = deepcopy(scalars)
223227
try:
224228
scalars = pa.array(scalars, type=pa_dtype, from_pandas=True)
225229
except pa.ArrowInvalid:

pandas/core/arrays/datetimelike.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -2034,11 +2034,12 @@ def ceil(
20342034
# Reductions
20352035

20362036
def any(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2037-
# GH#34479 discussion of desired behavior long-term
2037+
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
20382038
return nanops.nanany(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
20392039

20402040
def all(self, *, axis: AxisInt | None = None, skipna: bool = True) -> bool:
2041-
# GH#34479 discussion of desired behavior long-term
2041+
# GH#34479 the nanops call will issue a FutureWarning for non-td64 dtype
2042+
20422043
return nanops.nanall(self._ndarray, axis=axis, skipna=skipna, mask=self.isna())
20432044

20442045
# --------------------------------------------------------------

0 commit comments

Comments
 (0)