Skip to content

Commit ceb6823

Browse files
committed
Merge branch 'main' into poc-future-2
2 parents dd1dd44 + f676c5f commit ceb6823

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+464
-183
lines changed

.github/actions/setup-conda/action.yml

+3-12
Original file line numberDiff line numberDiff line change
@@ -3,23 +3,14 @@ inputs:
33
environment-file:
44
description: Conda environment file to use.
55
default: environment.yml
6-
environment-name:
7-
description: Name to use for the Conda environment
8-
default: test
9-
extra-specs:
10-
description: Extra packages to install
11-
required: false
126
runs:
137
using: composite
148
steps:
159
- name: Install ${{ inputs.environment-file }}
16-
uses: mamba-org/provision-with-micromamba@v15
10+
uses: mamba-org/setup-micromamba@v1
1711
with:
1812
environment-file: ${{ inputs.environment-file }}
19-
environment-name: ${{ inputs.environment-name }}
20-
extra-specs: ${{ inputs.extra-specs }}
21-
channels: conda-forge
22-
channel-priority: 'strict'
13+
environment-name: test
2314
condarc-file: ci/condarc.yml
24-
cache-env: true
15+
cache-environment: true
2516
cache-downloads: true

.github/workflows/package-checks.yml

+3-5
Original file line numberDiff line numberDiff line change
@@ -67,17 +67,15 @@ jobs:
6767
fetch-depth: 0
6868

6969
- name: Set up Python
70-
uses: mamba-org/provision-with-micromamba@v15
70+
uses: mamba-org/setup-micromamba@v1
7171
with:
72-
environment-file: false
7372
environment-name: recipe-test
74-
extra-specs: |
73+
create-args: >-
7574
python=${{ matrix.python-version }}
7675
boa
7776
conda-verify
78-
channels: conda-forge
7977
cache-downloads: true
80-
cache-env: true
78+
cache-environment: true
8179

8280
- name: Build conda package
8381
run: conda mambabuild ci --no-anaconda-upload --verify --strict-verify --output --output-folder .

.github/workflows/unit-tests.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@ jobs:
232232
python -m pip install -U pip wheel setuptools meson[ninja]==1.0.1 meson-python==0.13.1
233233
python -m pip install --no-cache-dir versioneer[toml] cython numpy python-dateutil pytz pytest>=7.0.0 pytest-xdist>=2.2.0 pytest-asyncio>=0.17 hypothesis>=6.46.1
234234
python -m pip install --no-cache-dir --no-build-isolation -e .
235-
python -m pip list
235+
python -m pip list --no-cache-dir
236236
export PANDAS_CI=1
237237
python -m pytest -m 'not slow and not network and not clipboard and not single_cpu' pandas --junitxml=test-data.xml
238238
concurrency:

.pre-commit-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ repos:
8383
hooks:
8484
- id: pylint
8585
stages: [manual]
86+
args: [--load-plugins=pylint.extensions.redefined_loop_name]
8687
- id: pylint
8788
alias: redefined-outer-name
8889
name: Redefining name from outer scope

ci/code_checks.sh

-9
Original file line numberDiff line numberDiff line change
@@ -80,12 +80,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
8080

8181
MSG='Partially validate docstrings (EX01)' ; echo $MSG
8282
$BASE_DIR/scripts/validate_docstrings.py --format=actions --errors=EX01 --ignore_functions \
83-
pandas.Series.item \
84-
pandas.Series.pipe \
85-
pandas.Series.mode \
86-
pandas.Series.is_unique \
87-
pandas.Series.is_monotonic_increasing \
88-
pandas.Series.is_monotonic_decreasing \
8983
pandas.Series.backfill \
9084
pandas.Series.bfill \
9185
pandas.Series.ffill \
@@ -319,7 +313,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
319313
pandas.Index.fillna \
320314
pandas.Index.dropna \
321315
pandas.Index.astype \
322-
pandas.Index.item \
323316
pandas.Index.map \
324317
pandas.Index.ravel \
325318
pandas.Index.to_list \
@@ -462,8 +455,6 @@ if [[ -z "$CHECK" || "$CHECK" == "docstrings" ]]; then
462455
pandas.core.groupby.SeriesGroupBy.cumsum \
463456
pandas.core.groupby.SeriesGroupBy.diff \
464457
pandas.core.groupby.SeriesGroupBy.ffill \
465-
pandas.core.groupby.SeriesGroupBy.is_monotonic_increasing \
466-
pandas.core.groupby.SeriesGroupBy.is_monotonic_decreasing \
467458
pandas.core.groupby.SeriesGroupBy.max \
468459
pandas.core.groupby.SeriesGroupBy.median \
469460
pandas.core.groupby.SeriesGroupBy.min \

ci/condarc.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ always_yes: true
1111
# The number seconds conda will wait for your client to establish a
1212
# connection to a remote url resource.
1313
#
14-
remote_connect_timeout_secs: 30.0
14+
remote_connect_timeout_secs: 30
1515

1616
# remote_max_retries (int)
1717
# The maximum number of retries each HTTP connection should attempt.

doc/source/conf.py

+3-3
Original file line numberDiff line numberDiff line change
@@ -237,14 +237,14 @@
237237

238238
html_theme_options = {
239239
"external_links": [],
240-
"footer_items": ["pandas_footer", "sphinx-version"],
240+
"footer_start": ["pandas_footer", "sphinx-version"],
241241
"github_url": "https://github.com/pandas-dev/pandas",
242242
"twitter_url": "https://twitter.com/pandas_dev",
243-
"google_analytics_id": "UA-27880019-2",
243+
"analytics": {"google_analytics_id": "UA-27880019-2"},
244244
"logo": {"image_dark": "https://pandas.pydata.org/static/img/pandas_white.svg"},
245245
"navbar_end": ["version-switcher", "theme-switcher", "navbar-icon-links"],
246246
"switcher": {
247-
"json_url": "/versions.json",
247+
"json_url": "https://pandas.pydata.org/versions.json",
248248
"version_match": switcher_version,
249249
},
250250
"icon_links": [

doc/source/development/contributing_environment.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -225,7 +225,7 @@ To compile pandas with meson, run::
225225
# Build and install pandas
226226
python -m pip install -ve . --no-build-isolation
227227

228-
** Build options **
228+
**Build options**
229229

230230
It is possible to pass options from the pip frontend to the meson backend if you would like to configure your
231231
install. Occasionally, you'll want to use this to adjust the build directory, and/or toggle debug/optimization levels.

doc/source/whatsnew/v2.0.2.rst

+3
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ including other versions of pandas.
1313

1414
Fixed regressions
1515
~~~~~~~~~~~~~~~~~
16+
- Fixed performance regression in :meth:`GroupBy.apply` (:issue:`53195`)
1617
- Fixed regression in :func:`read_sql` dropping columns with duplicated column names (:issue:`53117`)
1718
- Fixed regression in :meth:`DataFrame.loc` losing :class:`MultiIndex` name when enlarging object (:issue:`53053`)
1819
- Fixed regression in :meth:`DataFrame.to_string` printing a backslash at the end of the first row of data, instead of headers, when the DataFrame doesn't fit the line width (:issue:`53054`)
@@ -27,10 +28,12 @@ Bug fixes
2728
- Bug in :func:`api.interchange.from_dataframe` was raising ``IndexError`` on empty categorical data (:issue:`53077`)
2829
- Bug in :func:`api.interchange.from_dataframe` was returning :class:`DataFrame`'s of incorrect sizes when called on slices (:issue:`52824`)
2930
- Bug in :func:`api.interchange.from_dataframe` was unnecessarily raising on bitmasks (:issue:`49888`)
31+
- Bug in :func:`merge` when merging on datetime columns on different resolutions (:issue:`53200`)
3032
- Bug in :func:`to_timedelta` was raising ``ValueError`` with ``pandas.NA`` (:issue:`52909`)
3133
- Bug in :meth:`DataFrame.__getitem__` not preserving dtypes for :class:`MultiIndex` partial keys (:issue:`51895`)
3234
- Bug in :meth:`DataFrame.convert_dtypes` ignores ``convert_*`` keywords when set to False ``dtype_backend="pyarrow"`` (:issue:`52872`)
3335
- Bug in :meth:`Series.describe` treating pyarrow-backed timestamps and timedeltas as categorical data (:issue:`53001`)
36+
- Bug in :meth:`Series.rename` not making a lazy copy when Copy-on-Write is enabled when a scalar is passed to it (:issue:`52450`)
3437
- Bug in :meth:`pd.array` raising for ``NumPy`` array and ``pa.large_string`` or ``pa.large_binary`` (:issue:`52590`)
3538

3639

doc/source/whatsnew/v2.1.0.rst

+6-1
Original file line numberDiff line numberDiff line change
@@ -92,13 +92,13 @@ Other enhancements
9292
- Implemented ``__pandas_priority__`` to allow custom types to take precedence over :class:`DataFrame`, :class:`Series`, :class:`Index`, or :class:`ExtensionArray` for arithmetic operations, :ref:`see the developer guide <extending.pandas_priority>` (:issue:`48347`)
9393
- Improve error message when having incompatible columns using :meth:`DataFrame.merge` (:issue:`51861`)
9494
- Improve error message when setting :class:`DataFrame` with wrong number of columns through :meth:`DataFrame.isetitem` (:issue:`51701`)
95+
- Improved error handling when using :meth:`DataFrame.to_json` with incompatible ``index`` and ``orient`` arguments (:issue:`52143`)
9596
- Improved error message when creating a DataFrame with empty data (0 rows), no index and an incorrect number of columns. (:issue:`52084`)
9697
- Let :meth:`DataFrame.to_feather` accept a non-default :class:`Index` and non-string column names (:issue:`51787`)
9798
- Performance improvement in :func:`read_csv` (:issue:`52632`) with ``engine="c"``
9899
- :meth:`Categorical.from_codes` has gotten a ``validate`` parameter (:issue:`50975`)
99100
- Performance improvement in :func:`concat` with homogeneous ``np.float64`` or ``np.float32`` dtypes (:issue:`52685`)
100101
- Performance improvement in :meth:`DataFrame.filter` when ``items`` is given (:issue:`52941`)
101-
-
102102

103103
.. ---------------------------------------------------------------------------
104104
.. _whatsnew_210.notable_bug_fixes:
@@ -261,6 +261,7 @@ Deprecations
261261
- Deprecated unused "closed" keyword in the :class:`TimedeltaIndex` constructor (:issue:`52628`)
262262
- Deprecated logical operation between two non boolean :class:`Series` with different indexes always coercing the result to bool dtype. In a future version, this will maintain the return type of the inputs. (:issue:`52500`, :issue:`52538`)
263263
- Deprecated allowing ``downcast`` keyword other than ``None``, ``False``, "infer", or a dict with these as values in :meth:`Series.fillna`, :meth:`DataFrame.fillna` (:issue:`40988`)
264+
- Deprecated allowing arbitrary ``fill_value`` in :class:`SparseDtype`, in a future version the ``fill_value`` will need to be compatible with the ``dtype.subtype``, either a scalar that can be held by that subtype or ``NaN`` for integer or bool subtypes (:issue:`23124`)
264265
- Deprecated constructing :class:`SparseArray` from scalar data, pass a sequence instead (:issue:`53039`)
265266
-
266267

@@ -350,6 +351,7 @@ Conversion
350351
- Bug in :meth:`ArrowDtype.numpy_dtype` returning nanosecond units for non-nanosecond ``pyarrow.timestamp`` and ``pyarrow.duration`` types (:issue:`51800`)
351352
- Bug in :meth:`DataFrame.__repr__` incorrectly raising a ``TypeError`` when the dtype of a column is ``np.record`` (:issue:`48526`)
352353
- Bug in :meth:`DataFrame.info` raising ``ValueError`` when ``use_numba`` is set (:issue:`51922`)
354+
- Bug in :meth:`DataFrame.insert` raising ``TypeError`` if ``loc`` is ``np.int64`` (:issue:`53193`)
353355
-
354356

355357
Strings
@@ -413,11 +415,13 @@ Groupby/resample/rolling
413415
the function operated on the whole index rather than each element of the index. (:issue:`51979`)
414416
- Bug in :meth:`DataFrameGroupBy.apply` causing an error to be raised when the input :class:`DataFrame` was subset as a :class:`DataFrame` after groupby (``[['a']]`` and not ``['a']``) and the given callable returned :class:`Series` that were not all indexed the same. (:issue:`52444`)
415417
- Bug in :meth:`GroupBy.groups` with a datetime key in conjunction with another key produced incorrect number of group keys (:issue:`51158`)
418+
- Bug in :meth:`GroupBy.quantile` may implicitly sort the result index with ``sort=False`` (:issue:`53009`)
416419
- Bug in :meth:`GroupBy.var` failing to raise ``TypeError`` when called with datetime64, timedelta64 or :class:`PeriodDtype` values (:issue:`52128`, :issue:`53045`)
417420
-
418421

419422
Reshaping
420423
^^^^^^^^^
424+
- Bug in :func:`crosstab` when ``dropna=False`` would not keep ``np.nan`` in the result (:issue:`10772`)
421425
- Bug in :meth:`DataFrame.agg` and :meth:`Series.agg` on non-unique columns would return incorrect type when dist-like argument passed in (:issue:`51099`)
422426
- Bug in :meth:`DataFrame.stack` losing extension dtypes when columns is a :class:`MultiIndex` and frame contains mixed dtypes (:issue:`45740`)
423427
- Bug in :meth:`DataFrame.transpose` inferring dtype for object column (:issue:`51546`)
@@ -426,6 +430,7 @@ Reshaping
426430

427431
Sparse
428432
^^^^^^
433+
- Bug in :class:`SparseDtype` constructor failing to raise ``TypeError`` when given an incompatible ``dtype`` for its subtype, which must be a ``numpy`` dtype (:issue:`53160`)
429434
- Bug in :meth:`arrays.SparseArray.map` allowed the fill value to be included in the sparse values (:issue:`52095`)
430435
-
431436

environment.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ dependencies:
8989
- gitdb
9090
- natsort # DataFrame.sort_values doctest
9191
- numpydoc
92-
- pydata-sphinx-theme<0.11
92+
- pydata-sphinx-theme
9393
- pytest-cython # doctest
9494
- sphinx
9595
- sphinx-design

pandas/core/arrays/sparse/dtype.py

+43-12
Original file line numberDiff line numberDiff line change
@@ -18,18 +18,23 @@
1818
ExtensionDtype,
1919
register_extension_dtype,
2020
)
21+
from pandas.core.dtypes.cast import can_hold_element
2122
from pandas.core.dtypes.common import (
2223
is_bool_dtype,
2324
is_object_dtype,
2425
is_scalar,
2526
is_string_dtype,
2627
pandas_dtype,
2728
)
29+
from pandas.core.dtypes.dtypes import CategoricalDtype
2830
from pandas.core.dtypes.missing import (
31+
is_valid_na_for_dtype,
2932
isna,
3033
na_value_for_dtype,
3134
)
3235

36+
from pandas.core.construction import ensure_wrapped_if_datetimelike
37+
3338
if TYPE_CHECKING:
3439
from pandas._typing import (
3540
Dtype,
@@ -91,6 +96,9 @@ def __init__(self, dtype: Dtype = np.float64, fill_value: Any = None) -> None:
9196
dtype = pandas_dtype(dtype)
9297
if is_string_dtype(dtype):
9398
dtype = np.dtype("object")
99+
if not isinstance(dtype, np.dtype):
100+
# GH#53160
101+
raise TypeError("SparseDtype subtype must be a numpy dtype")
94102

95103
if fill_value is None:
96104
fill_value = na_value_for_dtype(dtype)
@@ -161,18 +169,41 @@ def _check_fill_value(self):
161169
raise ValueError(
162170
f"fill_value must be a scalar. Got {self._fill_value} instead"
163171
)
164-
# TODO: Right now we can use Sparse boolean array
165-
# with any fill_value. Here was an attempt
166-
# to allow only 3 value: True, False or nan
167-
# but plenty test has failed.
168-
# see pull 44955
169-
# if self._is_boolean and not (
170-
# is_bool(self._fill_value) or isna(self._fill_value)
171-
# ):
172-
# raise ValueError(
173-
# "fill_value must be True, False or nan "
174-
# f"for boolean type. Got {self._fill_value} instead"
175-
# )
172+
173+
# GH#23124 require fill_value and subtype to match
174+
val = self._fill_value
175+
if isna(val):
176+
if not is_valid_na_for_dtype(val, self.subtype):
177+
warnings.warn(
178+
"Allowing arbitrary scalar fill_value in SparseDtype is "
179+
"deprecated. In a future version, the fill_value must be "
180+
"a valid value for the SparseDtype.subtype.",
181+
FutureWarning,
182+
stacklevel=find_stack_level(),
183+
)
184+
elif isinstance(self.subtype, CategoricalDtype):
185+
# TODO: is this even supported? It is reached in
186+
# test_dtype_sparse_with_fill_value_not_present_in_data
187+
if self.subtype.categories is None or val not in self.subtype.categories:
188+
warnings.warn(
189+
"Allowing arbitrary scalar fill_value in SparseDtype is "
190+
"deprecated. In a future version, the fill_value must be "
191+
"a valid value for the SparseDtype.subtype.",
192+
FutureWarning,
193+
stacklevel=find_stack_level(),
194+
)
195+
else:
196+
dummy = np.empty(0, dtype=self.subtype)
197+
dummy = ensure_wrapped_if_datetimelike(dummy)
198+
199+
if not can_hold_element(dummy, val):
200+
warnings.warn(
201+
"Allowing arbitrary scalar fill_value in SparseDtype is "
202+
"deprecated. In a future version, the fill_value must be "
203+
"a valid value for the SparseDtype.subtype.",
204+
FutureWarning,
205+
stacklevel=find_stack_level(),
206+
)
176207

177208
@property
178209
def _is_na_fill_value(self) -> bool:

pandas/core/base.py

+44-2
Original file line numberDiff line numberDiff line change
@@ -357,12 +357,24 @@ def item(self):
357357
Returns
358358
-------
359359
scalar
360-
The first element of Series.
360+
The first element of Series or Index.
361361
362362
Raises
363363
------
364364
ValueError
365-
If the data is not length-1.
365+
If the data is not length = 1.
366+
367+
Examples
368+
--------
369+
>>> s = pd.Series([1])
370+
>>> s.item()
371+
1
372+
373+
For an index:
374+
375+
>>> s = pd.Series([1], index=['a'])
376+
>>> s.index.item()
377+
'a'
366378
"""
367379
if len(self) == 1:
368380
return next(iter(self))
@@ -965,6 +977,16 @@ def is_unique(self) -> bool:
965977
Returns
966978
-------
967979
bool
980+
981+
Examples
982+
--------
983+
>>> s = pd.Series([1, 2, 3])
984+
>>> s.is_unique
985+
True
986+
987+
>>> s = pd.Series([1, 2, 3, 1])
988+
>>> s.is_unique
989+
False
968990
"""
969991
return self.nunique(dropna=False) == len(self)
970992

@@ -976,6 +998,16 @@ def is_monotonic_increasing(self) -> bool:
976998
Returns
977999
-------
9781000
bool
1001+
1002+
Examples
1003+
--------
1004+
>>> s = pd.Series([1, 2, 2])
1005+
>>> s.is_monotonic_increasing
1006+
True
1007+
1008+
>>> s = pd.Series([3, 2, 1])
1009+
>>> s.is_monotonic_increasing
1010+
False
9791011
"""
9801012
from pandas import Index
9811013

@@ -989,6 +1021,16 @@ def is_monotonic_decreasing(self) -> bool:
9891021
Returns
9901022
-------
9911023
bool
1024+
1025+
Examples
1026+
--------
1027+
>>> s = pd.Series([3, 2, 2, 1])
1028+
>>> s.is_monotonic_decreasing
1029+
True
1030+
1031+
>>> s = pd.Series([1, 2, 3])
1032+
>>> s.is_monotonic_decreasing
1033+
False
9921034
"""
9931035
from pandas import Index
9941036

pandas/core/frame.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -4800,9 +4800,10 @@ def insert(
48004800
if not allow_duplicates and column in self.columns:
48014801
# Should this be a different kind of error??
48024802
raise ValueError(f"cannot insert {column}, already exists")
4803-
if not isinstance(loc, int):
4803+
if not is_integer(loc):
48044804
raise TypeError("loc must be int")
4805-
4805+
# convert non stdlib ints to satisfy typing checks
4806+
loc = int(loc)
48064807
if isinstance(value, DataFrame) and len(value.columns) > 1:
48074808
raise ValueError(
48084809
f"Expected a one-dimensional object, got a DataFrame with "

0 commit comments

Comments
 (0)