From 6b13318f12edbabf13f43b372f3a608bddb0d5e0 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 20:55:36 +0200 Subject: [PATCH 01/11] TST: printing df.columns should not raise exception when labels are unicode --- pandas/tests/test_index.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index c1d0894f9bfef..2b83e7108a094 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -847,6 +847,10 @@ def test_int_name_format(self): repr(s) repr(df) + def test_print_unicode_columns(self): + df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]}) + print(df.columns) # should not raise UnicodeDecodeError + class TestMultiIndex(unittest.TestCase): def setUp(self): From 07921941ca9b2bce86eb91c683dfae3c3665b59a Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 23:33:40 +0200 Subject: [PATCH 02/11] BUG: printing df.columns should not raise exception when labels are unicode --- pandas/core/format.py | 3 ++- pandas/core/index.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/pandas/core/format.py b/pandas/core/format.py index aae911ba807ef..4505e6153a9a3 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -268,7 +268,8 @@ def to_string(self, force_unicode=False): if len(frame.columns) == 0 or len(frame.index) == 0: info_line = (u'Empty %s\nColumns: %s\nIndex: %s' % (type(self.frame).__name__, - frame.columns, frame.index)) + com.pprint_thing(frame.columns), + com.pprint_thing(frame.index))) text = info_line else: strcols = self._to_str_columns(force_unicode) diff --git a/pandas/core/index.py b/pandas/core/index.py index 1ba78c698a1b5..65b60941fbbfc 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -209,9 +209,10 @@ def __str__(self): try: return np.array_repr(self.values) except UnicodeError: - converted = u','.join(unicode(x) for x in self.values) - return u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted, + converted = u','.join(com.pprint_thing(x) for x in self.values) + result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted, str(self.values.dtype)) + return com.console_encode(result) def _mpl_repr(self): # how to represent ourselves to matplotlib From 83c377ee763f1173eb4c7d9fe38a27d0ec623c80 Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 01:33:26 +0200 Subject: [PATCH 03/11] BUG: pprint_thing() should not realize lazy things The semantics of str()/unicode() do not realize iterators, and neither should pprint_thing. --- pandas/core/common.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/pandas/core/common.py b/pandas/core/common.py index 7bbbaab49e864..46c28e8af52ac 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1140,6 +1140,9 @@ def pprint_thing(thing, _nest_lvl=0): from pandas.core.format import print_config if thing is None: result = '' + elif (py3compat.PY3 and hasattr(thing,'__next__')) or \ + hasattr(thing,'next'): + return unicode(thing) elif (isinstance(thing, dict) and _nest_lvl < print_config.pprint_nest_depth): result = _pprint_dict(thing, _nest_lvl) From f6ca3e3fe11b0c5679ab2a3bb8764280ac084651 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 21:08:30 +0200 Subject: [PATCH 04/11] TST: df.ix[:,unicode] should not die with UnicodeEncodeError --- pandas/tests/test_internals.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 57ccfff23e5de..e9c0b2ae980d6 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -408,6 +408,13 @@ def test_get_numeric_data(self): self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool']) + def test_missing_unicode_key(self): + df=DataFrame({"a":[1]}) + try: + df.ix[:,u"\u05d0"] # should not raise UnicodeEncodeError + except KeyError: + pass # this is the expected exception + if __name__ == '__main__': # unittest.main() import nose From 51877c8ffb9625e4132e332e2ff943ac787e00ad Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 21:09:35 +0200 Subject: [PATCH 05/11] BUG: df.ix[:,unicode] should not die with UnicodeEncodeError --- pandas/core/internals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 10a85c5592514..24e88fd43ab0b 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -935,7 +935,7 @@ def _find_block(self, item): def _check_have(self, item): if item not in self.items: - raise KeyError('no item named %s' % str(item)) + raise KeyError('no item named %s' % com.pprint_thing(item)) def reindex_axis(self, new_axis, method=None, axis=0, copy=True): new_axis = _ensure_index(new_axis) From 209fb0b67a1b18c31735e42751529d6d52e729ac Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 21:28:24 +0200 Subject: [PATCH 06/11] TST: MultiIndex repr should properly encode unicode labels --- pandas/tests/test_index.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index 2b83e7108a094..b94840d0dfd85 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -1675,6 +1675,10 @@ def test_tolist(self): exp = list(self.index.values) self.assertEqual(result, exp) + def test_repr_with_unicode_data(self): + d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]} + index=pd.DataFrame(d).set_index(["a","b"]).index + self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_get_combined_index(): from pandas.core.index import _get_combined_index From 46ef8a606456044ecf048b2374defa047fcc9cdb Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 01:33:13 +0200 Subject: [PATCH 07/11] BUG: MultiIndex repr should properly encode unicode labels --- pandas/core/index.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pandas/core/index.py b/pandas/core/index.py index 65b60941fbbfc..291502c406018 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -1321,11 +1321,15 @@ def __repr__(self): self[-50:].values]) else: values = self.values - summary = np.array2string(values, max_line_width=70) + + summary = com.pprint_thing(values) np.set_printoptions(threshold=options['threshold']) - return output % summary + if py3compat.PY3: + return output % summary + else: + return com.console_encode(output % summary) def __len__(self): return len(self.labels[0]) From 4cc097080da0b3197a10605d013e8f38eba5af71 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 22:55:33 +0200 Subject: [PATCH 08/11] TST: nested exceptions clobber the exception context, must reraise with named arg --- pandas/tests/test_frame.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5c5fd1902c4cc..57799c6455fee 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1295,6 +1295,21 @@ def test_iget_value(self): expected = self.frame.get_value(row, col) assert_almost_equal(result, expected) + def test_nested_exception(self): + # Ignore the strange way of triggering the problem + # (which may get fixed), it's just a way to trigger + # the issue or reraising an outer exception without + # a named argument + df=DataFrame({"a":[1,2,3],"b":[4,5,6],"c":[7,8,9]}).set_index(["a","b"]) + l=list(df.index) + l[0]=["a","b"] + df.index=l + + try: + print df + except Exception,e: + self.assertNotEqual(type(e),UnboundLocalError) + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() From f2db4c170850741314df93b82b2395011eaa6453 Mon Sep 17 00:00:00 2001 From: y-p Date: Thu, 8 Nov 2012 22:20:02 +0200 Subject: [PATCH 09/11] BUG: nested exceptions clobber the exception context, must reraise with named arg --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a000485d85a4..4889285d5879c 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3919,7 +3919,7 @@ def _apply_standard(self, func, axis, ignore_failures=False): except (NameError, UnboundLocalError): # pragma: no cover # no k defined yet pass - raise + raise e if len(results) > 0 and _is_sequence(results[0]): if not isinstance(results[0], Series): From 2d755914b18980c676bfe023c8cb6efe4ee5cc20 Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 00:48:37 +0200 Subject: [PATCH 10/11] BUG: use pprint_thing() rather then str() --- pandas/core/frame.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 4889285d5879c..2c3bc9a31c9b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3915,7 +3915,8 @@ def _apply_standard(self, func, axis, ignore_failures=False): try: if hasattr(e, 'args'): k = res_index[i] - e.args = e.args + ('occurred at index %s' % str(k),) + e.args = e.args + ('occurred at index %s' % + com.pprint_thing(k),) except (NameError, UnboundLocalError): # pragma: no cover # no k defined yet pass From acfa4aba4be4d4b81656484664645e9708a4c2bf Mon Sep 17 00:00:00 2001 From: y-p Date: Fri, 9 Nov 2012 00:49:59 +0200 Subject: [PATCH 11/11] BUG: use pprint_thing() rather then str() in Block.repr() --- pandas/core/internals.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 24e88fd43ab0b..cd1ca8838d65d 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -49,9 +49,10 @@ def set_ref_items(self, ref_items, maybe_rename=True): self.ref_items = ref_items def __repr__(self): - shape = ' x '.join([str(s) for s in self.shape]) + shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) name = type(self).__name__ - return '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype) + result = '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype) + return com.console_encode(result) # repr must return byte-string def __contains__(self, item): return item in self.items