Skip to content
Merged
67 changes: 26 additions & 41 deletions pandas/core/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
string representation of NAN to use, default 'NaN'
formatters : list or dict of one-parameter functions, optional
formatter functions to apply to columns' elements by position or name,
default None
default None, if the result is a string , it must be a unicode string.
float_format : one-parameter function, optional
formatter function to apply to columns' elements if they are floats
default None
Expand All @@ -62,7 +62,7 @@ class SeriesFormatter(object):
def __init__(self, series, buf=None, header=True, length=True,
na_rep='NaN', name=False, float_format=None):
self.series = series
self.buf = buf if buf is not None else StringIO()
self.buf = buf if buf is not None else StringIO(u"")
self.name = name
self.na_rep = na_rep
self.length = length
Expand Down Expand Up @@ -112,7 +112,7 @@ def to_string(self):
series = self.series

if len(series) == 0:
return ''
return u''

fmt_index, have_header = self._get_formatted_index()
fmt_values = self._get_formatted_values()
Expand All @@ -135,9 +135,7 @@ def to_string(self):
if footer:
result.append(footer)

if py3compat.PY3:
return unicode(u'\n'.join(result))
return com.console_encode(u'\n'.join(result))
return unicode(u'\n'.join(result))

if py3compat.PY3: # pragma: no cover
_encode_diff = lambda x: 0
Expand Down Expand Up @@ -200,10 +198,15 @@ def __init__(self, frame, buf=None, columns=None, col_space=None,
else:
self.columns = frame.columns

def _to_str_columns(self, force_unicode=False):
def _to_str_columns(self, force_unicode=None):
"""
Render a DataFrame to a list of columns (as lists of strings).
"""
import warnings
if force_unicode is not None: # pragma: no cover
warnings.warn("force_unicode is deprecated, it will have no effect",
FutureWarning)

# may include levels names also
str_index = self._get_formatted_index()
str_columns = self._get_formatted_column_labels()
Expand Down Expand Up @@ -237,32 +240,17 @@ def _to_str_columns(self, force_unicode=False):
if self.index:
strcols.insert(0, str_index)

if not py3compat.PY3:
if force_unicode:
def make_unicode(x):
if isinstance(x, unicode):
return x
return x.decode('utf-8')
strcols = map(lambda col: map(make_unicode, col), strcols)
else:
# Generally everything is plain strings, which has ascii
# encoding. Problem is when there is a char with value over
# 127. Everything then gets converted to unicode.
try:
map(lambda col: map(str, col), strcols)
except UnicodeError:
def make_unicode(x):
if isinstance(x, unicode):
return x
return x.decode('utf-8')
strcols = map(lambda col: map(make_unicode, col), strcols)

return strcols

def to_string(self, force_unicode=False):
def to_string(self, force_unicode=None):
"""
Render a DataFrame to a console-friendly tabular output.
"""
import warnings
if force_unicode is not None: # pragma: no cover
warnings.warn("force_unicode is deprecated, it will have no effect",
FutureWarning)

frame = self.frame

if len(frame.columns) == 0 or len(frame.index) == 0:
Expand All @@ -272,15 +260,20 @@ def to_string(self, force_unicode=False):
com.pprint_thing(frame.index)))
text = info_line
else:
strcols = self._to_str_columns(force_unicode)
strcols = self._to_str_columns()
text = adjoin(1, *strcols)

self.buf.writelines(text)

def to_latex(self, force_unicode=False, column_format=None):
def to_latex(self, force_unicode=None, column_format=None):
"""
Render a DataFrame to a LaTeX tabular environment output.
"""
import warnings
if force_unicode is not None: # pragma: no cover
warnings.warn("force_unicode is deprecated, it will have no effect",
FutureWarning)

frame = self.frame

if len(frame.columns) == 0 or len(frame.index) == 0:
Expand All @@ -289,7 +282,7 @@ def to_latex(self, force_unicode=False, column_format=None):
frame.columns, frame.index))
strcols = [[info_line]]
else:
strcols = self._to_str_columns(force_unicode)
strcols = self._to_str_columns()

if column_format is None:
column_format = '|l|%s|' % '|'.join('c' for _ in strcols)
Expand Down Expand Up @@ -726,18 +719,10 @@ def __init__(self, values, digits=7, formatter=None, na_rep='NaN',
self.justify = justify

def get_result(self):
if self._have_unicode():
fmt_values = self._format_strings(use_unicode=True)
else:
fmt_values = self._format_strings(use_unicode=False)

fmt_values = self._format_strings()
return _make_fixed_width(fmt_values, self.justify)

def _have_unicode(self):
mask = lib.map_infer(self.values, lambda x: isinstance(x, unicode))
return mask.any()

def _format_strings(self, use_unicode=False):
def _format_strings(self):
if self.float_format is None:
float_format = print_config.float_format
if float_format is None:
Expand Down
56 changes: 42 additions & 14 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -612,20 +612,51 @@ def _need_info_repr_(self):
else:
return False

def __repr__(self):
def __str__(self):
"""
Return a string representation for a particular DataFrame

Invoked by str(df) in both py2/py3.
Yields Bytestring in Py2, Unicode String in py3.
"""

if py3compat.PY3:
return self.__unicode__()
return self.__bytes__()

def __bytes__(self):
"""
Return a string representation for a particular DataFrame

Invoked by bytes(df) in py3 only.
Yields a bytestring in both py2/py3.
"""
return com.console_encode(self.__unicode__())

def __unicode__(self):
"""
Return a string representation for a particular DataFrame

Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
"""
buf = StringIO()
buf = StringIO(u"")
if self._need_info_repr_():
self.info(buf=buf, verbose=self._verbose_info)
else:
self.to_string(buf=buf)

value = buf.getvalue()
assert type(value) == unicode

if py3compat.PY3:
return unicode(value)
return com.console_encode(value)
return value

def __repr__(self):
"""
Return a string representation for a particular DataFrame

Yields Bytestring in Py2, Unicode String in py3.
"""
return str(self)

def _repr_html_(self):
"""
Expand Down Expand Up @@ -1379,19 +1410,21 @@ def to_excel(self, excel_writer, sheet_name='sheet1', na_rep='',
def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
header=True, index=True, na_rep='NaN', formatters=None,
float_format=None, sparsify=None, nanRep=None,
index_names=True, justify=None, force_unicode=False):
index_names=True, justify=None, force_unicode=None):
"""
Render a DataFrame to a console-friendly tabular output.
"""
import warnings
if force_unicode is not None: # pragma: no cover
warnings.warn("force_unicode is deprecated, it will have no effect",
FutureWarning)

if nanRep is not None: # pragma: no cover
import warnings
warnings.warn("nanRep is deprecated, use na_rep",
FutureWarning)
na_rep = nanRep

if colSpace is not None: # pragma: no cover
import warnings
warnings.warn("colSpace is deprecated, use col_space",
FutureWarning)
col_space = colSpace
Expand All @@ -1404,15 +1437,10 @@ def to_string(self, buf=None, columns=None, col_space=None, colSpace=None,
justify=justify,
index_names=index_names,
header=header, index=index)
formatter.to_string(force_unicode=force_unicode)
formatter.to_string()

if buf is None:
result = formatter.buf.getvalue()
if not force_unicode:
try:
result = str(result)
except ValueError:
pass
return result

@Appender(fmt.docstring_to_string, indents=1)
Expand Down
100 changes: 79 additions & 21 deletions pandas/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -132,12 +132,48 @@ def __array_finalize__(self, obj):
def _shallow_copy(self):
return self.view()

def __repr__(self):
def __str__(self):
"""
Return a string representation for a particular Index

Invoked by str(df) in both py2/py3.
Yields Bytestring in Py2, Unicode String in py3.
"""

if py3compat.PY3:
prepr = com.pprint_thing(self)
return self.__unicode__()
return self.__bytes__()

def __bytes__(self):
"""
Return a string representation for a particular Index

Invoked by bytes(df) in py3 only.
Yields a bytestring in both py2/py3.
"""
return com.console_encode(self.__unicode__())

def __unicode__(self):
"""
Return a string representation for a particular Index

Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
"""
if len(self) > 6 and len(self) > np.get_printoptions()['threshold']:
data = self[:3].tolist() + ["..."] + self[-3:].tolist()
else:
prepr = com.pprint_thing_encoded(self)
return 'Index(%s, dtype=%s)' % (prepr, self.dtype)
data = self

prepr = com.pprint_thing(data)
return '%s(%s, dtype=%s)' % (type(self).__name__, prepr, self.dtype)

def __repr__(self):
"""
Return a string representation for a particular Index

Yields Bytestring in Py2, Unicode String in py3.
"""
return str(self)

def astype(self, dtype):
return Index(self.values.astype(dtype), name=self.name,
Expand Down Expand Up @@ -207,15 +243,6 @@ def summary(self, name=None):
name = type(self).__name__
return '%s: %s entries%s' % (name, len(self), index_summary)

def __str__(self):
try:
return np.array_repr(self.values)
except UnicodeError:
converted = u','.join(com.pprint_thing(x) for x in self.values)
result = u'%s([%s], dtype=''%s'')' % (type(self).__name__, converted,
str(self.values.dtype))
return com.console_encode(result)

def _mpl_repr(self):
# how to represent ourselves to matplotlib
return self.values
Expand Down Expand Up @@ -394,8 +421,8 @@ def format(self, name=False):
result = []
for dt in self:
if dt.time() != zero_time or dt.tzinfo is not None:
return header + ['%s' % x for x in self]
result.append('%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
return header + [u'%s' % x for x in self]
result.append(u'%d-%.2d-%.2d' % (dt.year, dt.month, dt.day))
return header + result

values = self.values
Expand Down Expand Up @@ -1319,7 +1346,33 @@ def _array_values(self):
def dtype(self):
return np.dtype('O')

def __repr__(self):
def __str__(self):
"""
Return a string representation for a particular Index

Invoked by str(df) in both py2/py3.
Yields Bytestring in Py2, Unicode String in py3.
"""

if py3compat.PY3:
return self.__unicode__()
return self.__bytes__()

def __bytes__(self):
"""
Return a string representation for a particular Index

Invoked by bytes(df) in py3 only.
Yields a bytestring in both py2/py3.
"""
return com.console_encode(self.__unicode__())

def __unicode__(self):
"""
Return a string representation for a particular Index

Invoked by unicode(df) in py2 only. Yields a Unicode String in both py2/py3.
"""
output = 'MultiIndex\n%s'

options = np.get_printoptions()
Expand All @@ -1335,10 +1388,15 @@ def __repr__(self):

np.set_printoptions(threshold=options['threshold'])

if py3compat.PY3:
return output % summary
else:
return com.console_encode(output % summary)
return output % summary

def __repr__(self):
"""
Return a string representation for a particular Index

Yields Bytestring in Py2, Unicode String in py3.
"""
return str(self)

def __len__(self):
return len(self.labels[0])
Expand Down Expand Up @@ -1496,7 +1554,7 @@ def format(self, space=2, sparsify=None, adjoin=True, names=False,
formatted = lev.take(lab).format()
else:
# weird all NA case
formatted = [str(x) for x in com.take_1d(lev.values, lab)]
formatted = [com.pprint_thing(x) for x in com.take_1d(lev.values, lab)]
stringified_levels.append(formatted)

result_levels = []
Expand Down
Loading