diff --git a/pandas/core/common.py b/pandas/core/common.py index 7bbbaab49e864..46c28e8af52ac 100644 --- a/pandas/core/common.py +++ b/pandas/core/common.py @@ -1140,6 +1140,9 @@ def pprint_thing(thing, _nest_lvl=0): from pandas.core.format import print_config if thing is None: result = '' + elif (py3compat.PY3 and hasattr(thing,'__next__')) or \ + hasattr(thing,'next'): + return unicode(thing) elif (isinstance(thing, dict) and _nest_lvl < print_config.pprint_nest_depth): result = _pprint_dict(thing, _nest_lvl) diff --git a/pandas/core/format.py b/pandas/core/format.py index aae911ba807ef..4505e6153a9a3 100644 --- a/pandas/core/format.py +++ b/pandas/core/format.py @@ -268,7 +268,8 @@ def to_string(self, force_unicode=False): if len(frame.columns) == 0 or len(frame.index) == 0: info_line = (u'Empty %s\nColumns: %s\nIndex: %s' % (type(self.frame).__name__, - frame.columns, frame.index)) + com.pprint_thing(frame.columns), + com.pprint_thing(frame.index))) text = info_line else: strcols = self._to_str_columns(force_unicode) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 5a000485d85a4..2c3bc9a31c9b6 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3915,11 +3915,12 @@ def _apply_standard(self, func, axis, ignore_failures=False): try: if hasattr(e, 'args'): k = res_index[i] - e.args = e.args + ('occurred at index %s' % str(k),) + e.args = e.args + ('occurred at index %s' % + com.pprint_thing(k),) except (NameError, UnboundLocalError): # pragma: no cover # no k defined yet pass - raise + raise e if len(results) > 0 and _is_sequence(results[0]): if not isinstance(results[0], Series): diff --git a/pandas/core/index.py b/pandas/core/index.py index 1ba78c698a1b5..291502c406018 100644 --- a/pandas/core/index.py +++ b/pandas/core/index.py @@ -209,9 +209,10 @@ def __str__(self): try: return np.array_repr(self.values) except UnicodeError: - converted = u','.join(unicode(x) for x in self.values) - return u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted, + converted = u','.join(com.pprint_thing(x) for x in self.values) + result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted, str(self.values.dtype)) + return com.console_encode(result) def _mpl_repr(self): # how to represent ourselves to matplotlib @@ -1320,11 +1321,15 @@ def __repr__(self): self[-50:].values]) else: values = self.values - summary = np.array2string(values, max_line_width=70) + + summary = com.pprint_thing(values) np.set_printoptions(threshold=options['threshold']) - return output % summary + if py3compat.PY3: + return output % summary + else: + return com.console_encode(output % summary) def __len__(self): return len(self.labels[0]) diff --git a/pandas/core/internals.py b/pandas/core/internals.py index 10a85c5592514..cd1ca8838d65d 100644 --- a/pandas/core/internals.py +++ b/pandas/core/internals.py @@ -49,9 +49,10 @@ def set_ref_items(self, ref_items, maybe_rename=True): self.ref_items = ref_items def __repr__(self): - shape = ' x '.join([str(s) for s in self.shape]) + shape = ' x '.join([com.pprint_thing(s) for s in self.shape]) name = type(self).__name__ - return '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype) + result = '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype) + return com.console_encode(result) # repr must return byte-string def __contains__(self, item): return item in self.items @@ -935,7 +936,7 @@ def _find_block(self, item): def _check_have(self, item): if item not in self.items: - raise KeyError('no item named %s' % str(item)) + raise KeyError('no item named %s' % com.pprint_thing(item)) def reindex_axis(self, new_axis, method=None, axis=0, copy=True): new_axis = _ensure_index(new_axis) diff --git a/pandas/tests/test_frame.py b/pandas/tests/test_frame.py index 5c5fd1902c4cc..57799c6455fee 100644 --- a/pandas/tests/test_frame.py +++ b/pandas/tests/test_frame.py @@ -1295,6 +1295,21 @@ def test_iget_value(self): expected = self.frame.get_value(row, col) assert_almost_equal(result, expected) + def test_nested_exception(self): + # Ignore the strange way of triggering the problem + # (which may get fixed), it's just a way to trigger + # the issue or reraising an outer exception without + # a named argument + df=DataFrame({"a":[1,2,3],"b":[4,5,6],"c":[7,8,9]}).set_index(["a","b"]) + l=list(df.index) + l[0]=["a","b"] + df.index=l + + try: + print df + except Exception,e: + self.assertNotEqual(type(e),UnboundLocalError) + _seriesd = tm.getSeriesData() _tsd = tm.getTimeSeriesData() diff --git a/pandas/tests/test_index.py b/pandas/tests/test_index.py index c1d0894f9bfef..b94840d0dfd85 100644 --- a/pandas/tests/test_index.py +++ b/pandas/tests/test_index.py @@ -847,6 +847,10 @@ def test_int_name_format(self): repr(s) repr(df) + def test_print_unicode_columns(self): + df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]}) + print(df.columns) # should not raise UnicodeDecodeError + class TestMultiIndex(unittest.TestCase): def setUp(self): @@ -1671,6 +1675,10 @@ def test_tolist(self): exp = list(self.index.values) self.assertEqual(result, exp) + def test_repr_with_unicode_data(self): + d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]} + index=pd.DataFrame(d).set_index(["a","b"]).index + self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped def test_get_combined_index(): from pandas.core.index import _get_combined_index diff --git a/pandas/tests/test_internals.py b/pandas/tests/test_internals.py index 57ccfff23e5de..e9c0b2ae980d6 100644 --- a/pandas/tests/test_internals.py +++ b/pandas/tests/test_internals.py @@ -408,6 +408,13 @@ def test_get_numeric_data(self): self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool']) + def test_missing_unicode_key(self): + df=DataFrame({"a":[1]}) + try: + df.ix[:,u"\u05d0"] # should not raise UnicodeEncodeError + except KeyError: + pass # this is the expected exception + if __name__ == '__main__': # unittest.main() import nose