Skip to content

PR: unicode, mostly #2201

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from Nov 9, 2012
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions pandas/core/common.py
Original file line number Diff line number Diff line change
Expand Up @@ -1140,6 +1140,9 @@ def pprint_thing(thing, _nest_lvl=0):
from pandas.core.format import print_config
if thing is None:
result = ''
elif (py3compat.PY3 and hasattr(thing,'__next__')) or \
hasattr(thing,'next'):
return unicode(thing)
elif (isinstance(thing, dict) and
_nest_lvl < print_config.pprint_nest_depth):
result = _pprint_dict(thing, _nest_lvl)
Expand Down
3 changes: 2 additions & 1 deletion pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,8 @@ def to_string(self, force_unicode=False):
if len(frame.columns) == 0 or len(frame.index) == 0:
info_line = (u'Empty %s\nColumns: %s\nIndex: %s'
% (type(self.frame).__name__,
frame.columns, frame.index))
com.pprint_thing(frame.columns),
com.pprint_thing(frame.index)))
text = info_line
else:
strcols = self._to_str_columns(force_unicode)
Expand Down
5 changes: 3 additions & 2 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -3915,11 +3915,12 @@ def _apply_standard(self, func, axis, ignore_failures=False):
try:
if hasattr(e, 'args'):
k = res_index[i]
e.args = e.args + ('occurred at index %s' % str(k),)
e.args = e.args + ('occurred at index %s' %
com.pprint_thing(k),)
except (NameError, UnboundLocalError): # pragma: no cover
# no k defined yet
pass
raise
raise e

if len(results) > 0 and _is_sequence(results[0]):
if not isinstance(results[0], Series):
Expand Down
13 changes: 9 additions & 4 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,10 @@ def __str__(self):
try:
return np.array_repr(self.values)
except UnicodeError:
converted = u','.join(unicode(x) for x in self.values)
return u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
converted = u','.join(com.pprint_thing(x) for x in self.values)
result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
str(self.values.dtype))
return com.console_encode(result)

def _mpl_repr(self):
# how to represent ourselves to matplotlib
Expand Down Expand Up @@ -1320,11 +1321,15 @@ def __repr__(self):
self[-50:].values])
else:
values = self.values
summary = np.array2string(values, max_line_width=70)

summary = com.pprint_thing(values)

np.set_printoptions(threshold=options['threshold'])

return output % summary
if py3compat.PY3:
return output % summary
else:
return com.console_encode(output % summary)

def __len__(self):
return len(self.labels[0])
Expand Down
7 changes: 4 additions & 3 deletions pandas/core/internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,9 +49,10 @@ def set_ref_items(self, ref_items, maybe_rename=True):
self.ref_items = ref_items

def __repr__(self):
shape = ' x '.join([str(s) for s in self.shape])
shape = ' x '.join([com.pprint_thing(s) for s in self.shape])
name = type(self).__name__
return '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype)
result = '%s: %s, %s, dtype %s' % (name, self.items, shape, self.dtype)
return com.console_encode(result) # repr must return byte-string

def __contains__(self, item):
return item in self.items
Expand Down Expand Up @@ -935,7 +936,7 @@ def _find_block(self, item):

def _check_have(self, item):
if item not in self.items:
raise KeyError('no item named %s' % str(item))
raise KeyError('no item named %s' % com.pprint_thing(item))

def reindex_axis(self, new_axis, method=None, axis=0, copy=True):
new_axis = _ensure_index(new_axis)
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1295,6 +1295,21 @@ def test_iget_value(self):
expected = self.frame.get_value(row, col)
assert_almost_equal(result, expected)

def test_nested_exception(self):
# Ignore the strange way of triggering the problem
# (which may get fixed), it's just a way to trigger
# the issue or reraising an outer exception without
# a named argument
df=DataFrame({"a":[1,2,3],"b":[4,5,6],"c":[7,8,9]}).set_index(["a","b"])
l=list(df.index)
l[0]=["a","b"]
df.index=l

try:
print df
except Exception,e:
self.assertNotEqual(type(e),UnboundLocalError)

_seriesd = tm.getSeriesData()
_tsd = tm.getTimeSeriesData()

Expand Down
8 changes: 8 additions & 0 deletions pandas/tests/test_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,6 +847,10 @@ def test_int_name_format(self):
repr(s)
repr(df)

def test_print_unicode_columns(self):
df=pd.DataFrame({u"\u05d0":[1,2,3],"\u05d1":[4,5,6],"c":[7,8,9]})
print(df.columns) # should not raise UnicodeDecodeError

class TestMultiIndex(unittest.TestCase):

def setUp(self):
Expand Down Expand Up @@ -1671,6 +1675,10 @@ def test_tolist(self):
exp = list(self.index.values)
self.assertEqual(result, exp)

def test_repr_with_unicode_data(self):
d={"a":[u"\u05d0",2,3],"b":[4,5,6],"c":[7,8,9]}
index=pd.DataFrame(d).set_index(["a","b"]).index
self.assertFalse("\\u" in repr(index)) # we don't want unicode-escaped

def test_get_combined_index():
from pandas.core.index import _get_combined_index
Expand Down
7 changes: 7 additions & 0 deletions pandas/tests/test_internals.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,13 @@ def test_get_numeric_data(self):

self.assertEqual(rs.ix[0, 'bool'], not df.ix[0, 'bool'])

def test_missing_unicode_key(self):
df=DataFrame({"a":[1]})
try:
df.ix[:,u"\u05d0"] # should not raise UnicodeEncodeError
except KeyError:
pass # this is the expected exception

if __name__ == '__main__':
# unittest.main()
import nose
Expand Down