Skip to content

DEPR: Remove method and tolerance in Index.get_loc, bump xarray #49630

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 10 commits into from
Dec 6, 2022
2 changes: 1 addition & 1 deletion ci/deps/actions-38-minimum_versions.yaml
Original file line number Diff line number Diff line change
@@ -54,7 +54,7 @@ dependencies:
- sqlalchemy=1.4.16
- tabulate=0.8.9
- tzdata=2022a
- xarray=0.19.0
- xarray=0.21.0
- xlrd=2.0.1
- xlsxwriter=1.4.3
- zstandard=0.15.2
2 changes: 1 addition & 1 deletion doc/source/getting_started/install.rst
Original file line number Diff line number Diff line change
@@ -333,7 +333,7 @@ Installable with ``pip install "pandas[computation]"``.
Dependency Minimum Version pip extra Notes
========================= ================== =============== =============================================================
SciPy 1.7.1 computation Miscellaneous statistical functions
xarray 0.19.0 computation pandas-like API for N-dimensional data
xarray 0.21.0 computation pandas-like API for N-dimensional data
========================= ================== =============== =============================================================

Excel files
3 changes: 3 additions & 0 deletions doc/source/whatsnew/v2.0.0.rst
Original file line number Diff line number Diff line change
@@ -338,6 +338,8 @@ Optional libraries below the lowest tested version may still work, but are not c
+-----------------+-----------------+---------+
| fastparquet | 0.6.3 | X |
+-----------------+-----------------+---------+
| xarray | 0.21.0 | X |
+-----------------+-----------------+---------+

See :ref:`install.dependencies` and :ref:`install.optional_dependencies` for more.

@@ -520,6 +522,7 @@ Removal of prior version deprecations/changes
- Removed the ``closed`` argument in :meth:`date_range` and :meth:`bdate_range` in favor of ``inclusive`` argument (:issue:`40245`)
- Removed the ``center`` keyword in :meth:`DataFrame.expanding` (:issue:`20647`)
- Removed the ``truediv`` keyword from :func:`eval` (:issue:`29812`)
- Removed the ``method`` and ``tolerance`` arguments in :meth:`Index.get_loc`. Use ``index.get_indexer([label], method=..., tolerance=...)`` instead (:issue:`42269`)
- Removed the ``pandas.datetime`` submodule (:issue:`30489`)
- Removed the ``pandas.np`` submodule (:issue:`30296`)
- Removed ``pandas.util.testing`` in favor of ``pandas.testing`` (:issue:`30745`)
2 changes: 1 addition & 1 deletion pandas/compat/_optional.py
Original file line number Diff line number Diff line change
@@ -41,7 +41,7 @@
"sqlalchemy": "1.4.16",
"tables": "3.6.1",
"tabulate": "0.8.9",
"xarray": "0.19.0",
"xarray": "0.21.0",
"xlrd": "2.0.1",
"xlsxwriter": "1.4.3",
"zstandard": "0.15.2",
67 changes: 12 additions & 55 deletions pandas/core/indexes/base.py
Original file line number Diff line number Diff line change
@@ -3429,27 +3429,13 @@ def _convert_can_do_setop(self, other) -> tuple[Index, Hashable]:
# --------------------------------------------------------------------
# Indexing Methods

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location, slice or boolean mask for requested label.

Parameters
----------
key : label
method : {None, 'pad'/'ffill', 'backfill'/'bfill', 'nearest'}, optional
* default: exact matches only.
* pad / ffill: find the PREVIOUS index value if no exact match.
* backfill / bfill: use NEXT index value if no exact match
* nearest: use the NEAREST index value if no exact match. Tied
distances are broken by preferring the larger index value.

.. deprecated:: 1.4
Use index.get_indexer([item], method=...) instead.

tolerance : int or float, optional
Maximum distance from index value for inexact matches. The value of
the index at the matching location must satisfy the equation
``abs(index[loc] - key) <= tolerance``.

Returns
-------
@@ -3469,46 +3455,17 @@ def get_loc(self, key, method=None, tolerance=None):
>>> non_monotonic_index.get_loc('b')
array([False, True, False, True])
"""
if method is None:
if tolerance is not None:
raise ValueError(
"tolerance argument only valid if using pad, "
"backfill or nearest lookups"
)
casted_key = self._maybe_cast_indexer(key)
try:
return self._engine.get_loc(casted_key)
except KeyError as err:
raise KeyError(key) from err
except TypeError:
# If we have a listlike key, _check_indexing_error will raise
# InvalidIndexError. Otherwise we fall through and re-raise
# the TypeError.
self._check_indexing_error(key)
raise

# GH#42269
warnings.warn(
f"Passing method to {type(self).__name__}.get_loc is deprecated "
"and will raise in a future version. Use "
"index.get_indexer([item], method=...) instead.",
FutureWarning,
stacklevel=find_stack_level(),
)

if is_scalar(key) and isna(key) and not self.hasnans:
raise KeyError(key)

if tolerance is not None:
tolerance = self._convert_tolerance(tolerance, np.asarray(key))

indexer = self.get_indexer([key], method=method, tolerance=tolerance)
if indexer.ndim > 1 or indexer.size > 1:
raise TypeError("get_loc requires scalar valued input")
loc = indexer.item()
if loc == -1:
raise KeyError(key)
return loc
casted_key = self._maybe_cast_indexer(key)
try:
return self._engine.get_loc(casted_key)
except KeyError as err:
raise KeyError(key) from err
except TypeError:
# If we have a listlike key, _check_indexing_error will raise
# InvalidIndexError. Otherwise we fall through and re-raise
# the TypeError.
self._check_indexing_error(key)
raise

_index_shared_docs[
"get_indexer"
11 changes: 3 additions & 8 deletions pandas/core/indexes/datetimes.py
Original file line number Diff line number Diff line change
@@ -556,7 +556,7 @@ def _disallow_mismatched_indexing(self, key) -> None:
except TypeError as err:
raise KeyError(key) from err

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location for requested label

@@ -587,8 +587,7 @@ def get_loc(self, key, method=None, tolerance=None):
try:
return self._partial_date_slice(reso, parsed)
except KeyError as err:
if method is None:
raise KeyError(key) from err
raise KeyError(key) from err

key = parsed

@@ -599,18 +598,14 @@ def get_loc(self, key, method=None, tolerance=None):
)

elif isinstance(key, dt.time):
if method is not None:
raise NotImplementedError(
"cannot yet lookup inexact labels when key is a time object"
)
return self.indexer_at_time(key)

else:
# unrecognized type
raise KeyError(key)

try:
return Index.get_loc(self, key, method, tolerance)
return Index.get_loc(self, key)
except KeyError as err:
raise KeyError(orig_key) from err

9 changes: 1 addition & 8 deletions pandas/core/indexes/multi.py
Original file line number Diff line number Diff line change
@@ -2730,7 +2730,7 @@ def _get_loc_single_level_index(self, level_index: Index, key: Hashable) -> int:
else:
return level_index.get_loc(key)

def get_loc(self, key, method=None):
def get_loc(self, key):
"""
Get location for a label or a tuple of labels.

@@ -2740,7 +2740,6 @@ def get_loc(self, key, method=None):
Parameters
----------
key : label or tuple of labels (one for each level)
method : None

Returns
-------
@@ -2772,12 +2771,6 @@ def get_loc(self, key, method=None):
>>> mi.get_loc(('b', 'e'))
1
"""
if method is not None:
raise NotImplementedError(
"only the default get_loc method is "
"currently supported for MultiIndex"
)

self._check_indexing_error(key)

def _maybe_to_slice(loc):
8 changes: 3 additions & 5 deletions pandas/core/indexes/period.py
Original file line number Diff line number Diff line change
@@ -375,7 +375,7 @@ def _convert_tolerance(self, tolerance, target):

return tolerance

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location for requested label.

@@ -421,10 +421,8 @@ def get_loc(self, key, method=None, tolerance=None):
# the reso < self._resolution_obj case goes
# through _get_string_slice
key = self._cast_partial_indexing_scalar(parsed)
elif method is None:
raise KeyError(key)
else:
key = self._cast_partial_indexing_scalar(parsed)
raise KeyError(key)

elif isinstance(key, Period):
self._disallow_mismatched_indexing(key)
@@ -437,7 +435,7 @@ def get_loc(self, key, method=None, tolerance=None):
raise KeyError(key)

try:
return Index.get_loc(self, key, method, tolerance)
return Index.get_loc(self, key)
except KeyError as err:
raise KeyError(orig_key) from err

20 changes: 9 additions & 11 deletions pandas/core/indexes/range.py
Original file line number Diff line number Diff line change
@@ -328,17 +328,15 @@ def inferred_type(self) -> str:
# Indexing Methods

@doc(Int64Index.get_loc)
def get_loc(self, key, method=None, tolerance=None):
if method is None and tolerance is None:
if is_integer(key) or (is_float(key) and key.is_integer()):
new_key = int(key)
try:
return self._range.index(new_key)
except ValueError as err:
raise KeyError(key) from err
self._check_indexing_error(key)
raise KeyError(key)
return super().get_loc(key, method=method, tolerance=tolerance)
def get_loc(self, key):
if is_integer(key) or (is_float(key) and key.is_integer()):
new_key = int(key)
try:
return self._range.index(new_key)
except ValueError as err:
raise KeyError(key) from err
self._check_indexing_error(key)
raise KeyError(key)

def _get_indexer(
self,
4 changes: 2 additions & 2 deletions pandas/core/indexes/timedeltas.py
Original file line number Diff line number Diff line change
@@ -174,7 +174,7 @@ def _is_comparable_dtype(self, dtype: DtypeObj) -> bool:
# -------------------------------------------------------------------
# Indexing Methods

def get_loc(self, key, method=None, tolerance=None):
def get_loc(self, key):
"""
Get integer location for requested label

@@ -189,7 +189,7 @@ def get_loc(self, key, method=None, tolerance=None):
except TypeError as err:
raise KeyError(key) from err

return Index.get_loc(self, key, method, tolerance)
return Index.get_loc(self, key)

def _parse_with_reso(self, label: str):
# the "with_reso" is a no-op for TimedeltaIndex
88 changes: 0 additions & 88 deletions pandas/tests/indexes/datetimes/test_indexing.py
Original file line number Diff line number Diff line change
@@ -8,8 +8,6 @@
import numpy as np
import pytest

from pandas.errors import InvalidIndexError

import pandas as pd
from pandas import (
DatetimeIndex,
@@ -405,75 +403,6 @@ def test_get_loc_key_unit_mismatch_not_castable(self):

assert key not in dti

@pytest.mark.parametrize("method", [None, "pad", "backfill", "nearest"])
@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc_method_exact_match(self, method):
idx = date_range("2000-01-01", periods=3)
assert idx.get_loc(idx[1], method) == 1
assert idx.get_loc(idx[1].to_pydatetime(), method) == 1
assert idx.get_loc(str(idx[1]), method) == 1

if method is not None:
assert idx.get_loc(idx[1], method, tolerance=pd.Timedelta("0 days")) == 1

@pytest.mark.filterwarnings("ignore:Passing method:FutureWarning")
def test_get_loc(self):
idx = date_range("2000-01-01", periods=3)

assert idx.get_loc("2000-01-01", method="nearest") == 0
assert idx.get_loc("2000-01-01T12", method="nearest") == 1

assert idx.get_loc("2000-01-01T12", method="nearest", tolerance="1 day") == 1
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=pd.Timedelta("1D"))
== 1
)
assert (
idx.get_loc(
"2000-01-01T12", method="nearest", tolerance=np.timedelta64(1, "D")
)
== 1
)
assert (
idx.get_loc("2000-01-01T12", method="nearest", tolerance=timedelta(1)) == 1
)
with pytest.raises(ValueError, match="unit abbreviation w/o a number"):
idx.get_loc("2000-01-01T12", method="nearest", tolerance="foo")
with pytest.raises(KeyError, match="'2000-01-01T03'"):
idx.get_loc("2000-01-01T03", method="nearest", tolerance="2 hours")
with pytest.raises(
ValueError, match="tolerance size must match target index size"
):
idx.get_loc(
"2000-01-01",
method="nearest",
tolerance=[
pd.Timedelta("1day").to_timedelta64(),
pd.Timedelta("1day").to_timedelta64(),
],
)

assert idx.get_loc("2000", method="nearest") == slice(0, 3)
assert idx.get_loc("2000-01", method="nearest") == slice(0, 3)

assert idx.get_loc("1999", method="nearest") == 0
assert idx.get_loc("2001", method="nearest") == 2

with pytest.raises(KeyError, match="'1999'"):
idx.get_loc("1999", method="pad")
with pytest.raises(KeyError, match="'2001'"):
idx.get_loc("2001", method="backfill")

with pytest.raises(KeyError, match="'foobar'"):
idx.get_loc("foobar")
with pytest.raises(InvalidIndexError, match=r"slice\(None, 2, None\)"):
idx.get_loc(slice(2))

idx = DatetimeIndex(["2000-01-01", "2000-01-04"])
assert idx.get_loc("2000-01-02", method="nearest") == 0
assert idx.get_loc("2000-01-03", method="nearest") == 1
assert idx.get_loc("2000-01", method="nearest") == slice(0, 2)

def test_get_loc_time_obj(self):
# time indexing
idx = date_range("2000-01-01", periods=24, freq="H")
@@ -486,11 +415,6 @@ def test_get_loc_time_obj(self):
expected = np.array([])
tm.assert_numpy_array_equal(result, expected, check_dtype=False)

msg = "cannot yet lookup inexact labels when key is a time object"
with pytest.raises(NotImplementedError, match=msg):
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
idx.get_loc(time(12, 30), method="pad")

def test_get_loc_time_obj2(self):
# GH#8667

@@ -525,18 +449,6 @@ def test_get_loc_time_nat(self):
expected = np.array([], dtype=np.intp)
tm.assert_numpy_array_equal(loc, expected)

def test_get_loc_tz_aware(self):
# https://github.com/pandas-dev/pandas/issues/32140
dti = date_range(
Timestamp("2019-12-12 00:00:00", tz="US/Eastern"),
Timestamp("2019-12-13 00:00:00", tz="US/Eastern"),
freq="5s",
)
key = Timestamp("2019-12-12 10:19:25", tz="US/Eastern")
with tm.assert_produces_warning(FutureWarning, match="deprecated"):
result = dti.get_loc(key, method="nearest")
assert result == 7433

def test_get_loc_nat(self):
# GH#20464
index = DatetimeIndex(["1/3/2000", "NaT"])
Loading