From 8d0d11c99b14210383a02579e820bfe84bcdbe71 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 10 Jan 2022 10:40:16 -0800 Subject: [PATCH 1/2] BUG: hash_array TypeError instead of AttributeError on Index GH#42003 --- pandas/core/util/hashing.py | 7 +++++++ pandas/tests/util/test_hashing.py | 9 +++++++++ 2 files changed, 16 insertions(+) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 02899bac14bb2..3a4fa49009e1f 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -286,6 +286,13 @@ def hash_array( if is_categorical_dtype(dtype): vals = cast("Categorical", vals) return _hash_categorical(vals, encoding, hash_key) + + elif isinstance(vals, ABCIndex): + # GH#42003 + raise TypeError( + "hash_array requires np.ndarray or ExtensionArray, not " + f"{type(vals).__name__}. Use hash_pandas_object instead." + ) elif not isinstance(vals, np.ndarray): # i.e. ExtensionArray vals, _ = vals._values_for_factorize() diff --git a/pandas/tests/util/test_hashing.py b/pandas/tests/util/test_hashing.py index 6eee756f67a2e..c2977b81a9b4a 100644 --- a/pandas/tests/util/test_hashing.py +++ b/pandas/tests/util/test_hashing.py @@ -71,6 +71,15 @@ def test_hash_array_errors(val): hash_array(val) +def test_hash_array_index_exception(): + # GH42003 TypeError instead of AttributeError + obj = pd.DatetimeIndex(["2018-10-28 01:20:00"], tz="Europe/Berlin") + + msg = "Use hash_pandas_object instead" + with pytest.raises(TypeError, match=msg): + hash_array(obj) + + def test_hash_tuples(): tuples = [(1, "one"), (1, "two"), (2, "one")] result = hash_tuples(tuples) From 3f9bd85f384db7dcb2dba1ad51e4d3364146cd10 Mon Sep 17 00:00:00 2001 From: Brock Date: Thu, 13 Jan 2022 11:09:41 -0800 Subject: [PATCH 2/2] check for EA --- pandas/core/util/hashing.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/core/util/hashing.py b/pandas/core/util/hashing.py index 3a4fa49009e1f..892fa83f98755 100644 --- a/pandas/core/util/hashing.py +++ b/pandas/core/util/hashing.py @@ -24,6 +24,7 @@ ) from pandas.core.dtypes.generic import ( ABCDataFrame, + ABCExtensionArray, ABCIndex, ABCMultiIndex, ABCSeries, @@ -287,15 +288,15 @@ def hash_array( vals = cast("Categorical", vals) return _hash_categorical(vals, encoding, hash_key) - elif isinstance(vals, ABCIndex): + elif isinstance(vals, ABCExtensionArray): + vals, _ = vals._values_for_factorize() + + elif not isinstance(vals, np.ndarray): # GH#42003 raise TypeError( "hash_array requires np.ndarray or ExtensionArray, not " f"{type(vals).__name__}. Use hash_pandas_object instead." ) - elif not isinstance(vals, np.ndarray): - # i.e. ExtensionArray - vals, _ = vals._values_for_factorize() return _hash_ndarray(vals, encoding, hash_key, categorize)