From 3e016816c77429f2582624254a54485b13af3912 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Mon, 28 Jan 2019 18:50:31 +0100 Subject: [PATCH 01/21] API: re-enable custom label types in set_index --- pandas/core/frame.py | 25 +++++++++++++++---------- pandas/tests/frame/test_alter_axes.py | 25 +++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 10 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 28c6f3c23a3ce..2630a09500395 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4135,13 +4135,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False, 'array, or a list containing only valid column keys and ' 'one-dimensional arrays.') - if (is_scalar(keys) or isinstance(keys, tuple) - or isinstance(keys, (ABCIndexClass, ABCSeries, np.ndarray))): - # make sure we have a container of keys/arrays we can iterate over - # tuples can appear as valid column keys! + if not isinstance(keys, list): keys = [keys] - elif not isinstance(keys, list): - raise ValueError(err_msg) missing = [] for col in keys: @@ -4150,10 +4145,20 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # tuples are always considered keys, never as list-likes if col not in self: missing.append(col) - elif (not isinstance(col, (ABCIndexClass, ABCSeries, - np.ndarray, list)) - or getattr(col, 'ndim', 1) > 1): - raise ValueError(err_msg) + elif isinstance(col, (ABCIndexClass, ABCSeries, + np.ndarray, list)): + # arrays are fine as long as they are one-dimensional + if getattr(col, 'ndim', 1) > 1: + raise ValueError(err_msg) + else: + # everything else gets tried as a key; see GH 24969 + try: + self[col] + str(col) + except KeyError: + tipo = type(col) + raise ValueError(err_msg, + 'Received column of type {}'.format(tipo)) if missing: raise KeyError('{}'.format(missing)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index c2355742199dc..524033da6f8c9 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -270,6 +270,31 @@ def test_set_index_raise_on_type(self, frame_of_index_cols, box, df.set_index(['A', df['A'], box(df['A'])], drop=drop, append=append) + def test_set_index_custom_label_type(self): + # GH 24969 + + class Thing: + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self): + return "<Thing %r>" % (self.name,) + + thing1 = Thing('One', 'red') + thing2 = Thing('Two', 'blue') + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, + index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + def test_construction_with_categorical_index(self): ci = tm.makeCategoricalIndex(10) ci.name = 'B' From 1b71e68579c81ce9b8585d284adbf1c7273f9ec9 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Mon, 28 Jan 2019 19:26:29 +0100 Subject: [PATCH 02/21] Fix doc pattern? --- pandas/core/frame.py | 1 - pandas/tests/frame/test_alter_axes.py | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 2630a09500395..a36c850fc5633 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,7 +4154,6 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # everything else gets tried as a key; see GH 24969 try: self[col] - str(col) except KeyError: tipo = type(col) raise ValueError(err_msg, diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 524033da6f8c9..c68910ef9d099 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -273,7 +273,7 @@ def test_set_index_raise_on_type(self, frame_of_index_cols, box, def test_set_index_custom_label_type(self): # GH 24969 - class Thing: + class Thing(object): def __init__(self, name, color): self.name = name self.color = color From caeb125ad5766ee5425095dadbfe2ce49e747968 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Wed, 30 Jan 2019 00:01:06 +0100 Subject: [PATCH 03/21] Review (TomAugspurger) --- doc/source/whatsnew/v0.24.1.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index de33ce64c1597..f088969afde11 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -73,6 +73,7 @@ Bug Fixes **Reshaping** - Bug in :func:`merge` when merging by index name would sometimes result in an incorrectly numbered index (:issue:`24212`) +- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` anymore (:issue:`24969`) **Other** From 8bd53407a1ce0bd2ba27d809d5aaad5c014bca49 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Wed, 30 Jan 2019 00:16:59 +0100 Subject: [PATCH 04/21] Review (jreback) --- pandas/core/frame.py | 9 ++++++- pandas/tests/frame/test_alter_axes.py | 39 ++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 5 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index a36c850fc5633..22317154a791f 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4153,8 +4153,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False, else: # everything else gets tried as a key; see GH 24969 try: - self[col] + hash(col) and self[col] + except TypeError: + # for unhashable types + tipo = type(col) + raise TypeError(err_msg, + 'Received column of type {}'.format(tipo)) except KeyError: + # for hashable types that are not keys; + # treat as ValueError, not missing key tipo = type(col) raise ValueError(err_msg, 'Received column of type {}'.format(tipo)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index c68910ef9d099..3df35bc693319 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -261,12 +261,17 @@ def test_set_index_raise_on_type(self, frame_of_index_cols, box, df = frame_of_index_cols msg = 'The parameter "keys" may be a column key, .*' - # forbidden type, e.g. set/tuple/iter - with pytest.raises(ValueError, match=msg): + # forbidden type, e.g. set/iter + + # iter is hashable, hence it is treated as a ValueError + # set is unhashable, hence it is a TypeError + exc = ValueError if box == iter else TypeError + + with pytest.raises(exc, match=msg): df.set_index(box(df['A']), drop=drop, append=append) - # forbidden type in list, e.g. set/tuple/iter - with pytest.raises(ValueError, match=msg): + # forbidden type in list, e.g. set/iter + with pytest.raises(exc, match=msg): df.set_index(['A', df['A'], box(df['A'])], drop=drop, append=append) @@ -295,6 +300,32 @@ def __str__(self): result = df.set_index([thing2]) tm.assert_frame_equal(result, expected) + def test_set_index_custom_label_type_raises(self): + # GH 24969 + + # purposefully inherit from something unhashable + class Thing(set): + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self): + return "<Thing %r>" % (self.name,) + + thing1 = Thing('One', 'red') + thing2 = Thing('Two', 'blue') + df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2]) + + msg = 'The parameter "keys" may be a column key, .*' + + with pytest.raises(TypeError, match=msg): + # use custom label directly + df.set_index(thing2) + + with pytest.raises(TypeError, match=msg): + # custom label wrapped in list + df.set_index([thing2]) + def test_construction_with_categorical_index(self): ci = tm.makeCategoricalIndex(10) ci.name = 'B' From d76ecfb9c304bdaede34ddd8e2c28f13db54f8ba Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Thu, 31 Jan 2019 00:40:28 +0100 Subject: [PATCH 05/21] Review (jreback & jorisvandenbossche) --- doc/source/whatsnew/v0.24.1.rst | 3 ++- pandas/core/frame.py | 19 ++++++++++++------- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index f35b4cdb936c5..4eb1a6ec83845 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -75,12 +75,13 @@ Bug Fixes **Reshaping** - Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`) -- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` anymore (:issue:`24969`) +- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`) **Visualization** - Fixed the warning for implicitly registered matplotlib converters not showing. See :ref:`whatsnew_0211.converters` for more (:issue:`24963`). + **Other** - diff --git a/pandas/core/frame.py b/pandas/core/frame.py index b0a03932a9ac4..6f934f2c38a0b 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,18 +4154,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False, else: # everything else gets tried as a key; see GH 24969 try: - hash(col) and self[col] + found = col in self.columns + if not found: + # check if this raises KeyError (e.g. iter is hashable + # but never a valid key); will be caught below + self[col] + # otherwise the key is missing + missing.append(col) except TypeError: # for unhashable types tipo = type(col) - raise TypeError(err_msg, - 'Received column of type {}'.format(tipo)) + raise TypeError(err_msg + 'Received column of ' + 'type {}'.format(tipo)) except KeyError: - # for hashable types that are not keys; - # treat as ValueError, not missing key + # for hashable types that are not keys tipo = type(col) - raise ValueError(err_msg, - 'Received column of type {}'.format(tipo)) + raise ValueError(err_msg + ' Received column of ' + 'type {}'.format(tipo)) if missing: raise KeyError('{}'.format(missing)) From d2ffb8125610146398c846555caa0bef20cdbbc2 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Thu, 31 Jan 2019 00:42:08 +0100 Subject: [PATCH 06/21] Revert last two commits --- pandas/core/frame.py | 18 +++---------- pandas/tests/frame/test_alter_axes.py | 39 +++------------------------ 2 files changed, 7 insertions(+), 50 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 6f934f2c38a0b..722d3339f578a 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4154,23 +4154,11 @@ def set_index(self, keys, drop=True, append=False, inplace=False, else: # everything else gets tried as a key; see GH 24969 try: - found = col in self.columns - if not found: - # check if this raises KeyError (e.g. iter is hashable - # but never a valid key); will be caught below - self[col] - # otherwise the key is missing - missing.append(col) - except TypeError: - # for unhashable types - tipo = type(col) - raise TypeError(err_msg + 'Received column of ' - 'type {}'.format(tipo)) + self[col] except KeyError: - # for hashable types that are not keys tipo = type(col) - raise ValueError(err_msg + ' Received column of ' - 'type {}'.format(tipo)) + raise ValueError(err_msg, + 'Received column of type {}'.format(tipo)) if missing: raise KeyError('{}'.format(missing)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 3df35bc693319..c68910ef9d099 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -261,17 +261,12 @@ def test_set_index_raise_on_type(self, frame_of_index_cols, box, df = frame_of_index_cols msg = 'The parameter "keys" may be a column key, .*' - # forbidden type, e.g. set/iter - - # iter is hashable, hence it is treated as a ValueError - # set is unhashable, hence it is a TypeError - exc = ValueError if box == iter else TypeError - - with pytest.raises(exc, match=msg): + # forbidden type, e.g. set/tuple/iter + with pytest.raises(ValueError, match=msg): df.set_index(box(df['A']), drop=drop, append=append) - # forbidden type in list, e.g. set/iter - with pytest.raises(exc, match=msg): + # forbidden type in list, e.g. set/tuple/iter + with pytest.raises(ValueError, match=msg): df.set_index(['A', df['A'], box(df['A'])], drop=drop, append=append) @@ -300,32 +295,6 @@ def __str__(self): result = df.set_index([thing2]) tm.assert_frame_equal(result, expected) - def test_set_index_custom_label_type_raises(self): - # GH 24969 - - # purposefully inherit from something unhashable - class Thing(set): - def __init__(self, name, color): - self.name = name - self.color = color - - def __str__(self): - return "<Thing %r>" % (self.name,) - - thing1 = Thing('One', 'red') - thing2 = Thing('Two', 'blue') - df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2]) - - msg = 'The parameter "keys" may be a column key, .*' - - with pytest.raises(TypeError, match=msg): - # use custom label directly - df.set_index(thing2) - - with pytest.raises(TypeError, match=msg): - # custom label wrapped in list - df.set_index([thing2]) - def test_construction_with_categorical_index(self): ci = tm.makeCategoricalIndex(10) ci.name = 'B' From 5863678045ce09f8f3ddca9321420a2080bbd8bd Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Thu, 31 Jan 2019 09:07:07 +0100 Subject: [PATCH 07/21] Review (jorisvandenbossche) --- pandas/core/frame.py | 17 +++++-- pandas/tests/frame/test_alter_axes.py | 70 +++++++++++++++++++++++++-- 2 files changed, 78 insertions(+), 9 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 722d3339f578a..3ba4f39367b56 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4151,14 +4151,23 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # arrays are fine as long as they are one-dimensional if getattr(col, 'ndim', 1) > 1: raise ValueError(err_msg) + elif is_list_like(col, allow_sets=False): + # various iterators/generators are hashable, but should not + # raise a KeyError + tipo = type(col) + raise ValueError(err_msg + ' Received column of ' + 'type {}'.format(tipo)) else: # everything else gets tried as a key; see GH 24969 try: - self[col] - except KeyError: + found = col in self.columns + except TypeError: tipo = type(col) - raise ValueError(err_msg, - 'Received column of type {}'.format(tipo)) + raise TypeError(err_msg + ' Received column of ' + 'type {}'.format(tipo)) + else: + if not found: + missing.append(col) if missing: raise KeyError('{}'.format(missing)) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index c68910ef9d099..8ca038ab38b63 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -255,21 +255,40 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - @pytest.mark.parametrize('box', [set, iter]) - def test_set_index_raise_on_type(self, frame_of_index_cols, box, - drop, append): + @pytest.mark.parametrize('box', [iter, lambda x: (y for y in x)], + ids=['iter', 'generator']) + def test_set_index_raise_on_type_iter(self, frame_of_index_cols, box, + drop, append): df = frame_of_index_cols msg = 'The parameter "keys" may be a column key, .*' - # forbidden type, e.g. set/tuple/iter + # forbidden type, e.g. iter/generator with pytest.raises(ValueError, match=msg): df.set_index(box(df['A']), drop=drop, append=append) - # forbidden type in list, e.g. set/tuple/iter + # forbidden type in list, e.g. iter/generator with pytest.raises(ValueError, match=msg): df.set_index(['A', df['A'], box(df['A'])], drop=drop, append=append) + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + @pytest.mark.parametrize('box', [set, lambda x: dict(zip(x, x)).keys()], + ids=['set', 'dict-view']) + def test_set_index_raise_on_type_unhashable(self, frame_of_index_cols, box, + drop, append): + df = frame_of_index_cols + + msg = 'The parameter "keys" may be a column key, .*' + # forbidden type that is unhashable, e.g. set/dict-view + with pytest.raises(TypeError, match=msg): + df.set_index(box(df['A']), drop=drop, append=append) + + # forbidden type in list that is unhashable, e.g. set/dict-view + with pytest.raises(TypeError, match=msg): + df.set_index(['A', df['A'], box(df['A'])], + drop=drop, append=append) + def test_set_index_custom_label_type(self): # GH 24969 @@ -281,6 +300,10 @@ def __init__(self, name, color): def __str__(self): return "<Thing %r>" % (self.name,) + def __repr__(self): + # necessary for pretty KeyError + return self.__str__() + thing1 = Thing('One', 'red') thing2 = Thing('Two', 'blue') df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) @@ -295,6 +318,43 @@ def __str__(self): result = df.set_index([thing2]) tm.assert_frame_equal(result, expected) + # missing key + thing3 = Thing('Three', 'pink') + msg = "<Thing 'Three'>" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + + def test_set_index_custom_label_type_raises(self): + # GH 24969 + + # purposefully inherit from something unhashable + class Thing(set): + def __init__(self, name, color): + self.name = name + self.color = color + + def __str__(self): + return "<Thing %r>" % (self.name,) + + thing1 = Thing('One', 'red') + thing2 = Thing('Two', 'blue') + df = DataFrame([[0, 2], [1, 3]], columns=[thing1, thing2]) + + msg = 'The parameter "keys" may be a column key, .*' + + with pytest.raises(TypeError, match=msg): + # use custom label directly + df.set_index(thing2) + + with pytest.raises(TypeError, match=msg): + # custom label wrapped in list + df.set_index([thing2]) + def test_construction_with_categorical_index(self): ci = tm.makeCategoricalIndex(10) ci.name = 'B' From 0a7d783c116965e1dc1a3a638ade7f1fbfa33930 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Fri, 1 Feb 2019 00:23:29 +0100 Subject: [PATCH 08/21] Fix hashable listlikes (review jorisvandenbossche) --- pandas/core/frame.py | 9 ++-- pandas/tests/frame/test_alter_axes.py | 62 ++++++++++++++++----------- 2 files changed, 43 insertions(+), 28 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 3ba4f39367b56..0a9dd88ca1024 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4151,12 +4151,13 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # arrays are fine as long as they are one-dimensional if getattr(col, 'ndim', 1) > 1: raise ValueError(err_msg) - elif is_list_like(col, allow_sets=False): + elif is_list_like(col) and not hasattr(col, '__len__'): # various iterators/generators are hashable, but should not - # raise a KeyError + # raise a KeyError; we identify them by their lack of __len__. + # hashable listlikes with __len__ get tested as keys below. tipo = type(col) - raise ValueError(err_msg + ' Received column of ' - 'type {}'.format(tipo)) + raise TypeError(err_msg + ' Received column of ' + 'type {}'.format(tipo)) else: # everything else gets tried as a key; see GH 24969 try: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 8ca038ab38b63..cbdb181554981 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -255,36 +255,18 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - @pytest.mark.parametrize('box', [iter, lambda x: (y for y in x)], - ids=['iter', 'generator']) - def test_set_index_raise_on_type_iter(self, frame_of_index_cols, box, - drop, append): + @pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)], + ids=['set', 'iter', 'generator']) + def test_set_index_raise_on_type(self, frame_of_index_cols, box, + drop, append): df = frame_of_index_cols msg = 'The parameter "keys" may be a column key, .*' - # forbidden type, e.g. iter/generator - with pytest.raises(ValueError, match=msg): - df.set_index(box(df['A']), drop=drop, append=append) - - # forbidden type in list, e.g. iter/generator - with pytest.raises(ValueError, match=msg): - df.set_index(['A', df['A'], box(df['A'])], - drop=drop, append=append) - - @pytest.mark.parametrize('append', [True, False]) - @pytest.mark.parametrize('drop', [True, False]) - @pytest.mark.parametrize('box', [set, lambda x: dict(zip(x, x)).keys()], - ids=['set', 'dict-view']) - def test_set_index_raise_on_type_unhashable(self, frame_of_index_cols, box, - drop, append): - df = frame_of_index_cols - - msg = 'The parameter "keys" may be a column key, .*' - # forbidden type that is unhashable, e.g. set/dict-view + # forbidden type, e.g. set/iter/generator with pytest.raises(TypeError, match=msg): df.set_index(box(df['A']), drop=drop, append=append) - # forbidden type in list that is unhashable, e.g. set/dict-view + # forbidden type in list, e.g. set/iter/generator with pytest.raises(TypeError, match=msg): df.set_index(['A', df['A'], box(df['A'])], drop=drop, append=append) @@ -329,6 +311,38 @@ def __repr__(self): # missing label in list df.set_index([thing3]) + def test_set_index_custom_label_hashable_iterable(self): + # GH 24969 + + # actual example discussed in GH 24984 was e.g. for shapely.geometry + # objects (e.g. a collection of Points) that can be both hashable and + # iterable; using frozenset as a stand-in for testing here + + thing1 = frozenset(['One', 'red']) + thing2 = frozenset(['Two', 'blue']) + df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) + expected = DataFrame({thing1: [0, 1]}, + index=Index([2, 3], name=thing2)) + + # use custom label directly + result = df.set_index(thing2) + tm.assert_frame_equal(result, expected) + + # custom label wrapped in list + result = df.set_index([thing2]) + tm.assert_frame_equal(result, expected) + + # missing key + thing3 = frozenset(['Three', 'pink']) + msg = r"frozenset\(\{'Three', 'pink'\}\)" + with pytest.raises(KeyError, match=msg): + # missing label directly + df.set_index(thing3) + + with pytest.raises(KeyError, match=msg): + # missing label in list + df.set_index([thing3]) + def test_set_index_custom_label_type_raises(self): # GH 24969 From 794f61d4d77d075b37e40144ed3adbda669ace64 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Fri, 1 Feb 2019 01:20:16 +0100 Subject: [PATCH 09/21] Stabilize repr of frozenset --- pandas/tests/frame/test_alter_axes.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index cbdb181554981..de131e35a7447 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -282,9 +282,8 @@ def __init__(self, name, color): def __str__(self): return "<Thing %r>" % (self.name,) - def __repr__(self): - # necessary for pretty KeyError - return self.__str__() + # necessary for pretty KeyError + __repr__ = __str__ thing1 = Thing('One', 'red') thing2 = Thing('Two', 'blue') @@ -318,8 +317,15 @@ def test_set_index_custom_label_hashable_iterable(self): # objects (e.g. a collection of Points) that can be both hashable and # iterable; using frozenset as a stand-in for testing here - thing1 = frozenset(['One', 'red']) - thing2 = frozenset(['Two', 'blue']) + class Thing(frozenset): + # need to stabilize repr for KeyError (due to random order in sets) + def __repr__(self): + tmp = sorted(list(self)) + # double curly brace prints one brace in format string + return "frozenset({{{}}})".format(', '.join(map(repr, tmp))) + + thing1 = Thing(['One', 'red']) + thing2 = Thing(['Two', 'blue']) df = DataFrame({thing1: [0, 1], thing2: [2, 3]}) expected = DataFrame({thing1: [0, 1]}, index=Index([2, 3], name=thing2)) @@ -333,7 +339,7 @@ def test_set_index_custom_label_hashable_iterable(self): tm.assert_frame_equal(result, expected) # missing key - thing3 = frozenset(['Three', 'pink']) + thing3 = Thing(['Three', 'pink']) msg = r"frozenset\(\{'Three', 'pink'\}\)" with pytest.raises(KeyError, match=msg): # missing label directly From c58e8b64a0d016a8551998b0fefa5d0bcea0b22c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Fri, 1 Feb 2019 20:05:10 +0100 Subject: [PATCH 10/21] Review (WillAyd) --- pandas/core/frame.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 988da53d7e811..9dd5b0750c200 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4148,13 +4148,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, missing = [] for col in keys: - if (is_scalar(col) or isinstance(col, tuple)): - # if col is a valid column key, everything is fine - # tuples are always considered keys, never as list-likes - if col not in self: - missing.append(col) - elif isinstance(col, (ABCIndexClass, ABCSeries, - np.ndarray, list)): + if isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray, list)): # arrays are fine as long as they are one-dimensional if getattr(col, 'ndim', 1) > 1: raise ValueError(err_msg) From 29fa8c005ad1a5d3663e1b29006bb99b045e560f Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Fri, 1 Feb 2019 20:25:39 +0100 Subject: [PATCH 11/21] Unambiguous KeyError message --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 9dd5b0750c200..3799d9b91d652 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4172,7 +4172,7 @@ def set_index(self, keys, drop=True, append=False, inplace=False, missing.append(col) if missing: - raise KeyError('{}'.format(missing)) + raise KeyError('None of {} are in the columns'.format(missing)) if inplace: frame = self From 5590433a434bdc3f613a0919227a448a316cd5f8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Sun, 3 Feb 2019 21:56:45 +0100 Subject: [PATCH 12/21] Remove redundant whatsnew --- doc/source/whatsnew/v0.24.1.rst | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/source/whatsnew/v0.24.1.rst b/doc/source/whatsnew/v0.24.1.rst index bd1f124a415b4..be0a2eb682e87 100644 --- a/doc/source/whatsnew/v0.24.1.rst +++ b/doc/source/whatsnew/v0.24.1.rst @@ -68,7 +68,6 @@ Bug Fixes **Reshaping** - Bug in :meth:`DataFrame.groupby` with :class:`Grouper` when there is a time change (DST) and grouping frequency is ``'1d'`` (:issue:`24972`) -- Fixed regression where custom hashable types could not be used as column keys in :meth:`DataFrame.set_index` (:issue:`24969`) **Visualization** From 2c4eaeaabe81e8287b0dc7e410fb7cc66f6c88be Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Sat, 9 Feb 2019 21:01:48 +0100 Subject: [PATCH 13/21] Review (jorisvandenbossche) --- pandas/compat/__init__.py | 2 ++ pandas/core/frame.py | 7 +++---- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/pandas/compat/__init__.py b/pandas/compat/__init__.py index d7ca7f8963f70..4036af85b7212 100644 --- a/pandas/compat/__init__.py +++ b/pandas/compat/__init__.py @@ -137,6 +137,7 @@ def lfilter(*args, **kwargs): reload = reload Hashable = collections.abc.Hashable Iterable = collections.abc.Iterable + Iterator = collections.abc.Iterator Mapping = collections.abc.Mapping MutableMapping = collections.abc.MutableMapping Sequence = collections.abc.Sequence @@ -199,6 +200,7 @@ def get_range_parameters(data): Hashable = collections.Hashable Iterable = collections.Iterable + Iterator = collections.Iterator Mapping = collections.Mapping MutableMapping = collections.MutableMapping Sequence = collections.Sequence diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c7252834b76c4..33b62826c5075 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -34,7 +34,7 @@ from pandas import compat from pandas.compat import (range, map, zip, lmap, lzip, StringIO, u, - PY36, raise_with_traceback, + PY36, raise_with_traceback, Iterator, string_and_binary_types) from pandas.compat.numpy import function as nv from pandas.core.dtypes.cast import ( @@ -4130,10 +4130,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, # arrays are fine as long as they are one-dimensional if getattr(col, 'ndim', 1) > 1: raise ValueError(err_msg) - elif is_list_like(col) and not hasattr(col, '__len__'): + elif isinstance(col, Iterator): # various iterators/generators are hashable, but should not - # raise a KeyError; we identify them by their lack of __len__. - # hashable listlikes with __len__ get tested as keys below. + # raise a KeyError; other list-likes get tested as keys below. tipo = type(col) raise TypeError(err_msg + ' Received column of ' 'type {}'.format(tipo)) From ea10359f00ebafd3bef71d24fd342ef5db459b47 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Sun, 17 Feb 2019 14:06:49 +0100 Subject: [PATCH 14/21] Review (jreback) --- doc/source/whatsnew/v0.25.0.rst | 1 + pandas/core/frame.py | 18 ++++++++---------- pandas/tests/frame/test_alter_axes.py | 14 ++++++++------ 3 files changed, 17 insertions(+), 16 deletions(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 686c5ad0165e7..b9b7461838dcc 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -21,6 +21,7 @@ Other Enhancements - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) +- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`) - .. _whatsnew_0250.api_breaking: diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 59c4f2b2a9568..c8056091e1dac 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4109,24 +4109,19 @@ def set_index(self, keys, drop=True, append=False, inplace=False, missing = [] for col in keys: - if isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray, list)): + if isinstance(col, (ABCIndexClass, ABCSeries, np.ndarray, + list, Iterator)): # arrays are fine as long as they are one-dimensional - if getattr(col, 'ndim', 1) > 1: + # iterators get converted to list below + if getattr(col, 'ndim', 1) != 1: raise ValueError(err_msg) - elif isinstance(col, Iterator): - # various iterators/generators are hashable, but should not - # raise a KeyError; other list-likes get tested as keys below. - tipo = type(col) - raise TypeError(err_msg + ' Received column of ' - 'type {}'.format(tipo)) else: # everything else gets tried as a key; see GH 24969 try: found = col in self.columns except TypeError: - tipo = type(col) raise TypeError(err_msg + ' Received column of ' - 'type {}'.format(tipo)) + 'type {}'.format(type(col))) else: if not found: missing.append(col) @@ -4162,6 +4157,9 @@ def set_index(self, keys, drop=True, append=False, inplace=False, elif isinstance(col, (list, np.ndarray)): arrays.append(col) names.append(None) + elif isinstance(col, Iterator): + arrays.append(list(col)) + names.append(None) # from here, col can only be a column label else: arrays.append(frame[col]._values) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index aee2384718602..aabf666ea0e26 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -178,10 +178,10 @@ def test_set_index_pass_arrays(self, frame_of_index_cols, # MultiIndex constructor does not work directly on Series -> lambda # We also emulate a "constructor" for the label -> lambda # also test index name if append=True (name is duplicate here for A) - @pytest.mark.parametrize('box2', [Series, Index, np.array, list, + @pytest.mark.parametrize('box2', [Series, Index, np.array, list, iter, lambda x: MultiIndex.from_arrays([x]), lambda x: x.name]) - @pytest.mark.parametrize('box1', [Series, Index, np.array, list, + @pytest.mark.parametrize('box1', [Series, Index, np.array, list, iter, lambda x: MultiIndex.from_arrays([x]), lambda x: x.name]) @pytest.mark.parametrize('append, index_name', [(True, None), @@ -195,6 +195,9 @@ def test_set_index_pass_arrays_duplicate(self, frame_of_index_cols, drop, keys = [box1(df['A']), box2(df['A'])] result = df.set_index(keys, drop=drop, append=append) + # if either box is iter, it has been consumed; re-read + keys = [box1(df['A']), box2(df['A'])] + # need to adapt first drop for case that both keys are 'A' -- # cannot drop the same column twice; # use "is" because == would give ambiguous Boolean error for containers @@ -255,18 +258,17 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) - @pytest.mark.parametrize('box', [set, iter, lambda x: (y for y in x)], - ids=['set', 'iter', 'generator']) + @pytest.mark.parametrize('box', [set], ids=['set']) def test_set_index_raise_on_type(self, frame_of_index_cols, box, drop, append): df = frame_of_index_cols msg = 'The parameter "keys" may be a column key, .*' - # forbidden type, e.g. set/iter/generator + # forbidden type, e.g. set with pytest.raises(TypeError, match=msg): df.set_index(box(df['A']), drop=drop, append=append) - # forbidden type in list, e.g. set/iter/generator + # forbidden type in list, e.g. set with pytest.raises(TypeError, match=msg): df.set_index(['A', df['A'], box(df['A'])], drop=drop, append=append) From ca178954b2ee4fd05196140674e1e8c0b1eb95c8 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Sun, 17 Feb 2019 16:13:26 +0100 Subject: [PATCH 15/21] Retrigger after connectivity issues From f4deacce184a5ea5a89825a4e5f50d5655acc580 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Mon, 18 Feb 2019 20:30:16 +0100 Subject: [PATCH 16/21] Review (jorisvandenbossche) --- pandas/core/frame.py | 9 +++++++++ pandas/tests/frame/test_alter_axes.py | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index c8056091e1dac..dea81c9077e74 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4167,6 +4167,15 @@ def set_index(self, keys, drop=True, append=False, inplace=False, if drop: to_remove.append(col) + if len(arrays[-1]) != len(self): + # check newest element against length of calling frame; + # ensure_index_from_sequences does not raise if append is False + raise ValueError('Length mismatch: Expected {len_self} rows, ' + 'received array of length {len_col}'.format( + len_self=len(self), + len_col=len(arrays[-1]) + )) + index = ensure_index_from_sequences(arrays, names) if verify_integrity and not index.is_unique: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index aabf666ea0e26..61e4768e930fe 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -256,6 +256,30 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): df.set_index(['A', df['A'], tuple(df['A'])], drop=drop, append=append) + # MultiIndex constructor does not work directly on Series -> lambda + @pytest.mark.parametrize('box', [Series, Index, np.array, iter, + lambda x: MultiIndex.from_arrays([x])], + ids=['Series', 'Index', 'np.array', + 'iter', 'MultiIndex']) + @pytest.mark.parametrize('length', [4, 6], ids=['too_short', 'too_long']) + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + def test_set_index_raise_on_len(self, frame_of_index_cols, box, length, + drop, append): + df = frame_of_index_cols # has length 5 + + values = np.random.randint(0, 10, (length,)) + + msg = 'Length mismatch: Expected 5 rows, received array of length.*' + + # wrong length directly + with pytest.raises(ValueError, match=msg): + df.set_index(box(values), drop=drop, append=append) + + # wrong length in list + with pytest.raises(ValueError, match=msg): + df.set_index(['A', df.A, box(values)], drop=drop, append=append) + @pytest.mark.parametrize('append', [True, False]) @pytest.mark.parametrize('drop', [True, False]) @pytest.mark.parametrize('box', [set], ids=['set']) From 9bfcfde175f6c78c2f7415139a561e2972cbe89b Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Mon, 18 Feb 2019 20:39:48 +0100 Subject: [PATCH 17/21] move test for easier diff --- pandas/tests/frame/test_alter_axes.py | 34 +++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index 61e4768e930fe..f31fe6fee9949 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -256,6 +256,23 @@ def test_set_index_raise_keys(self, frame_of_index_cols, drop, append): df.set_index(['A', df['A'], tuple(df['A'])], drop=drop, append=append) + @pytest.mark.parametrize('append', [True, False]) + @pytest.mark.parametrize('drop', [True, False]) + @pytest.mark.parametrize('box', [set], ids=['set']) + def test_set_index_raise_on_type(self, frame_of_index_cols, box, + drop, append): + df = frame_of_index_cols + + msg = 'The parameter "keys" may be a column key, .*' + # forbidden type, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(box(df['A']), drop=drop, append=append) + + # forbidden type in list, e.g. set + with pytest.raises(TypeError, match=msg): + df.set_index(['A', df['A'], box(df['A'])], + drop=drop, append=append) + # MultiIndex constructor does not work directly on Series -> lambda @pytest.mark.parametrize('box', [Series, Index, np.array, iter, lambda x: MultiIndex.from_arrays([x])], @@ -280,23 +297,6 @@ def test_set_index_raise_on_len(self, frame_of_index_cols, box, length, with pytest.raises(ValueError, match=msg): df.set_index(['A', df.A, box(values)], drop=drop, append=append) - @pytest.mark.parametrize('append', [True, False]) - @pytest.mark.parametrize('drop', [True, False]) - @pytest.mark.parametrize('box', [set], ids=['set']) - def test_set_index_raise_on_type(self, frame_of_index_cols, box, - drop, append): - df = frame_of_index_cols - - msg = 'The parameter "keys" may be a column key, .*' - # forbidden type, e.g. set - with pytest.raises(TypeError, match=msg): - df.set_index(box(df['A']), drop=drop, append=append) - - # forbidden type in list, e.g. set - with pytest.raises(TypeError, match=msg): - df.set_index(['A', df['A'], box(df['A'])], - drop=drop, append=append) - def test_set_index_custom_label_type(self): # GH 24969 From ca2ac6078e528a6a1b6a72da1a1cbe2a08d70f9c Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Tue, 19 Feb 2019 20:19:41 +0100 Subject: [PATCH 18/21] Review (jreback) --- doc/source/whatsnew/v0.25.0.rst | 2 +- pandas/tests/frame/test_alter_axes.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v0.25.0.rst b/doc/source/whatsnew/v0.25.0.rst index 70defbc070f73..797ec2b6c3a83 100644 --- a/doc/source/whatsnew/v0.25.0.rst +++ b/doc/source/whatsnew/v0.25.0.rst @@ -21,7 +21,7 @@ Other Enhancements - :meth:`Timestamp.replace` now supports the ``fold`` argument to disambiguate DST transition times (:issue:`25017`) - :meth:`DataFrame.at_time` and :meth:`Series.at_time` now support :meth:`datetime.time` objects with timezones (:issue:`24043`) -- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`) +- :meth:`DataFrame.set_index` now works for instances of ``abc.Iterator``, provided their output is of the same length as the calling frame (:issue:`22484`, :issue:`24984`) - .. _whatsnew_0250.api_breaking: diff --git a/pandas/tests/frame/test_alter_axes.py b/pandas/tests/frame/test_alter_axes.py index f31fe6fee9949..a25e893e08900 100644 --- a/pandas/tests/frame/test_alter_axes.py +++ b/pandas/tests/frame/test_alter_axes.py @@ -283,6 +283,7 @@ def test_set_index_raise_on_type(self, frame_of_index_cols, box, @pytest.mark.parametrize('drop', [True, False]) def test_set_index_raise_on_len(self, frame_of_index_cols, box, length, drop, append): + # GH 24984 df = frame_of_index_cols # has length 5 values = np.random.randint(0, 10, (length,)) From 87bd0a6f7b9377b24b2a185f8a658f9c23f5beac Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Tue, 19 Feb 2019 22:20:52 +0100 Subject: [PATCH 19/21] Add 'conda list' for azure/posix after activate 'pandas-dev' --- ci/incremental/setup_conda_environment.cmd | 1 + ci/incremental/setup_conda_environment.sh | 3 +++ ci/install_travis.sh | 1 + 3 files changed, 5 insertions(+) diff --git a/ci/incremental/setup_conda_environment.cmd b/ci/incremental/setup_conda_environment.cmd index c104d78591384..1893954c570c8 100644 --- a/ci/incremental/setup_conda_environment.cmd +++ b/ci/incremental/setup_conda_environment.cmd @@ -16,6 +16,7 @@ conda remove --all -q -y -n pandas-dev conda env create --file=ci\deps\azure-windows-%CONDA_PY%.yaml call activate pandas-dev +@rem Display pandas-dev environment (for debugging) conda list if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/ci/incremental/setup_conda_environment.sh b/ci/incremental/setup_conda_environment.sh index f174c17a614d8..6903553abcfe9 100755 --- a/ci/incremental/setup_conda_environment.sh +++ b/ci/incremental/setup_conda_environment.sh @@ -23,6 +23,9 @@ set +v source activate pandas-dev set -v +# Display pandas-dev environment (for debugging) +conda list + # remove any installed pandas package # w/o removing anything else echo diff --git a/ci/install_travis.sh b/ci/install_travis.sh index d1a940f119228..5169a55ae6e91 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -100,6 +100,7 @@ pip list --format columns |grep pandas echo "[running setup.py develop]" python setup.py develop || exit 1 +# Display pandas-dev environment (for debugging) echo echo "[show environment]" conda list From 759b369a8a27db1cb21da454ea718aa33b6431c3 Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Wed, 20 Feb 2019 18:24:34 +0100 Subject: [PATCH 20/21] Revert "Add 'conda list' for azure/posix after activate 'pandas-dev'" This reverts commit 87bd0a6f7b9377b24b2a185f8a658f9c23f5beac. --- ci/incremental/setup_conda_environment.cmd | 1 - ci/incremental/setup_conda_environment.sh | 3 --- ci/install_travis.sh | 1 - 3 files changed, 5 deletions(-) diff --git a/ci/incremental/setup_conda_environment.cmd b/ci/incremental/setup_conda_environment.cmd index 1893954c570c8..c104d78591384 100644 --- a/ci/incremental/setup_conda_environment.cmd +++ b/ci/incremental/setup_conda_environment.cmd @@ -16,7 +16,6 @@ conda remove --all -q -y -n pandas-dev conda env create --file=ci\deps\azure-windows-%CONDA_PY%.yaml call activate pandas-dev -@rem Display pandas-dev environment (for debugging) conda list if %errorlevel% neq 0 exit /b %errorlevel% diff --git a/ci/incremental/setup_conda_environment.sh b/ci/incremental/setup_conda_environment.sh index 6903553abcfe9..f174c17a614d8 100755 --- a/ci/incremental/setup_conda_environment.sh +++ b/ci/incremental/setup_conda_environment.sh @@ -23,9 +23,6 @@ set +v source activate pandas-dev set -v -# Display pandas-dev environment (for debugging) -conda list - # remove any installed pandas package # w/o removing anything else echo diff --git a/ci/install_travis.sh b/ci/install_travis.sh index 5169a55ae6e91..d1a940f119228 100755 --- a/ci/install_travis.sh +++ b/ci/install_travis.sh @@ -100,7 +100,6 @@ pip list --format columns |grep pandas echo "[running setup.py develop]" python setup.py develop || exit 1 -# Display pandas-dev environment (for debugging) echo echo "[show environment]" conda list From ecc7d03973975d4af91927e2117fefa02891fcbb Mon Sep 17 00:00:00 2001 From: "H. Vetinari" <h.vetinari@gmx.com> Date: Thu, 21 Feb 2019 08:19:17 +0100 Subject: [PATCH 21/21] Reflect change in docstring --- pandas/core/frame.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index dace327126aa2..608e5c53ec094 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -4025,7 +4025,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False, This parameter can be either a single column key, a single array of the same length as the calling DataFrame, or a list containing an arbitrary combination of column keys and arrays. Here, "array" - encompasses :class:`Series`, :class:`Index` and ``np.ndarray``. + encompasses :class:`Series`, :class:`Index`, ``np.ndarray``, and + instances of :class:`abc.Iterator`. drop : bool, default True Delete columns to be used as the new index. append : bool, default False @@ -4169,8 +4170,8 @@ def set_index(self, keys, drop=True, append=False, inplace=False, to_remove.append(col) if len(arrays[-1]) != len(self): - # check newest element against length of calling frame; - # ensure_index_from_sequences does not raise if append is False + # check newest element against length of calling frame, since + # ensure_index_from_sequences would not raise for append=False. raise ValueError('Length mismatch: Expected {len_self} rows, ' 'received array of length {len_col}'.format( len_self=len(self),