Skip to content

BUG GH11600 - MultiIndex column level names lost when to_sparse() called #11606

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions doc/source/whatsnew/v0.17.1.txt
Original file line number Diff line number Diff line change
Expand Up @@ -156,3 +156,4 @@ Bug Fixes
- Bug in the link-time error caused by C ``inline`` functions on FreeBSD 10+ (with ``clang``) (:issue:`10510`)
- Bug in ``DataFrame.to_csv`` in passing through arguments for formatting ``MultiIndexes``, including ``date_format`` (:issue:`7791`)
- Bug in ``DataFrame.join()`` with ``how='right'`` producing a ``TypeError`` (:issue:`11519`)
- Bug in ``DataFrame.to_sparse()`` loses column names for MultIndexes (:issue:`11600`)
2 changes: 1 addition & 1 deletion pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -1191,7 +1191,7 @@ def to_sparse(self, fill_value=None, kind='block'):
y : SparseDataFrame
"""
from pandas.core.sparse import SparseDataFrame
return SparseDataFrame(self._series, index=self.index,
return SparseDataFrame(self._series, index=self.index, columns=self.columns,
default_kind=kind,
default_fill_value=fill_value)

Expand Down
2 changes: 1 addition & 1 deletion pandas/sparse/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,7 +246,7 @@ def to_dense(self):
df : DataFrame
"""
data = dict((k, v.to_dense()) for k, v in compat.iteritems(self))
return DataFrame(data, index=self.index)
return DataFrame(data, index=self.index,columns=self.columns)

def astype(self, dtype):
raise NotImplementedError
Expand Down
20 changes: 19 additions & 1 deletion pandas/sparse/tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import pandas as pd
dec = np.testing.dec

from pandas.util.testing import (assert_almost_equal, assert_series_equal,
from pandas.util.testing import (assert_almost_equal, assert_series_equal, assert_index_equal,
assert_frame_equal, assert_panel_equal, assertRaisesRegexp,
assert_numpy_array_equal, assert_attr_equal)
from numpy.testing import assert_equal
Expand Down Expand Up @@ -770,6 +770,24 @@ def test_combine_first(self):
assert_sp_series_equal(result, result2)
assert_sp_series_equal(result, expected)

class TestSparseHandlingMultiIndexes(tm.TestCase):

def setUp(self):
miindex = pd.MultiIndex.from_product([["x","y"], ["10","20"]],names=['row-foo', 'row-bar'])
micol = pd.MultiIndex.from_product([['a','b','c'], ["1","2"]],names=['col-foo', 'col-bar'])
dense_multiindex_frame = pd.DataFrame(index=miindex, columns=micol).sortlevel().sortlevel(axis=1)
self.dense_multiindex_frame = dense_multiindex_frame.fillna(value=3.14)

def test_to_sparse_preserve_multiindex_names_columns(self):
sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse().copy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

you don't need to copy

assert_index_equal(sparse_multiindex_frame.columns,self.dense_multiindex_frame.columns)

def test_round_trip_preserve_multiindex_names(self):
sparse_multiindex_frame = self.dense_multiindex_frame.to_sparse()
round_trip_multiindex_frame = sparse_multiindex_frame.to_dense()
assert_frame_equal(self.dense_multiindex_frame,round_trip_multiindex_frame,
check_column_type=True,check_names=True)


class TestSparseSeriesScipyInteraction(tm.TestCase):
# Issue 8048: add SparseSeries coo methods
Expand Down
34 changes: 20 additions & 14 deletions pandas/tseries/holiday.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
import warnings

from pandas import DateOffset, DatetimeIndex, Series, Timestamp
from pandas.compat import add_metaclass
from datetime import datetime, timedelta
Expand Down Expand Up @@ -192,10 +194,10 @@ def dates(self, start_date, end_date, return_name=False):
"""
start_date = Timestamp(start_date)
end_date = Timestamp(end_date)
filter_start_date = start_date

filter_start_date = start_date
filter_end_date = end_date

if self.year is not None:
dt = Timestamp(datetime(self.year, self.month, self.day))
if return_name:
Expand All @@ -208,22 +210,22 @@ def dates(self, start_date, end_date, return_name=False):
if self.days_of_week is not None:
holiday_dates = holiday_dates[np.in1d(holiday_dates.dayofweek,
self.days_of_week)]

if self.start_date is not None:
filter_start_date = max(self.start_date.tz_localize(filter_start_date.tz), filter_start_date)
if self.end_date is not None:
filter_end_date = min(self.end_date.tz_localize(filter_end_date.tz), filter_end_date)
holiday_dates = holiday_dates[(holiday_dates >= filter_start_date) &
holiday_dates = holiday_dates[(holiday_dates >= filter_start_date) &
(holiday_dates <= filter_end_date)]
if return_name:
return Series(self.name, index=holiday_dates)
return holiday_dates


def _reference_dates(self, start_date, end_date):
"""
Get reference dates for the holiday.

Return reference dates for the holiday also returning the year
prior to the start_date and year following the end_date. This ensures
that any offsets to be applied will yield the holidays within
Expand All @@ -238,13 +240,13 @@ def _reference_dates(self, start_date, end_date):
year_offset = DateOffset(years=1)
reference_start_date = Timestamp(
datetime(start_date.year-1, self.month, self.day))

reference_end_date = Timestamp(
datetime(end_date.year+1, self.month, self.day))
# Don't process unnecessary holidays
dates = DatetimeIndex(start=reference_start_date, end=reference_end_date,
dates = DatetimeIndex(start=reference_start_date, end=reference_end_date,
freq=year_offset, tz=start_date.tz)

return dates

def _apply_rule(self, dates):
Expand All @@ -269,7 +271,11 @@ def _apply_rule(self, dates):
else:
offsets = self.offset
for offset in offsets:
dates += offset

# if we are adding a non-vectorized value
# ignore the PerformanceWarnings:
with warnings.catch_warnings(record=True):
dates += offset
return dates

holiday_calendars = {}
Expand Down Expand Up @@ -327,12 +333,12 @@ def __init__(self, name=None, rules=None):

if rules is not None:
self.rules = rules

def rule_from_name(self, name):
for rule in self.rules:
if rule.name == name:
return rule

return None

def holidays(self, start=None, end=None, return_name=False):
Expand Down
51 changes: 24 additions & 27 deletions pandas/tseries/tests/test_timeseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -2637,35 +2637,32 @@ def test_datetime64_with_DateOffset(self):
assert_func(klass([x - op for x in s]), s - op)


# split by fast/slow path to test perf warning
off = {False:
['YearBegin', ('YearBegin', {'month': 5}),
'YearEnd', ('YearEnd', {'month': 5}),
'MonthBegin', 'MonthEnd', 'Week', ('Week', {'weekday': 3}),
'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin'],
PerformanceWarning:
['CustomBusinessDay', 'CDay', 'CBMonthEnd','CBMonthBegin',
'BMonthBegin', 'BMonthEnd', 'BusinessHour', 'BYearBegin',
'BYearEnd','BQuarterBegin', ('LastWeekOfMonth', {'weekday':2}),
('FY5253Quarter', {'qtr_with_extra_week': 1, 'startingMonth': 1,
'weekday': 2, 'variation': 'nearest'}),
('FY5253',{'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}),
('WeekOfMonth', {'weekday': 2, 'week': 2}), 'Easter',
('DateOffset', {'day': 4}), ('DateOffset', {'month': 5})]}
# assert these are equal on a piecewise basis
offsets = ['YearBegin', ('YearBegin', {'month': 5}),
'YearEnd', ('YearEnd', {'month': 5}),
'MonthBegin', 'MonthEnd', 'Week', ('Week', {'weekday': 3}),
'BusinessDay', 'BDay', 'QuarterEnd', 'QuarterBegin',
'CustomBusinessDay', 'CDay', 'CBMonthEnd','CBMonthBegin',
'BMonthBegin', 'BMonthEnd', 'BusinessHour', 'BYearBegin',
'BYearEnd','BQuarterBegin', ('LastWeekOfMonth', {'weekday':2}),
('FY5253Quarter', {'qtr_with_extra_week': 1, 'startingMonth': 1,
'weekday': 2, 'variation': 'nearest'}),
('FY5253',{'weekday': 0, 'startingMonth': 2, 'variation': 'nearest'}),
('WeekOfMonth', {'weekday': 2, 'week': 2}), 'Easter',
('DateOffset', {'day': 4}), ('DateOffset', {'month': 5})]

for normalize in (True, False):
for warning, offsets in off.items():
for do in offsets:
if isinstance(do, tuple):
do, kwargs = do
else:
do = do
kwargs = {}
op = getattr(pd.offsets,do)(5, normalize=normalize, **kwargs)
with tm.assert_produces_warning(warning):
assert_func(klass([x + op for x in s]), s + op)
assert_func(klass([x - op for x in s]), s - op)
assert_func(klass([op + x for x in s]), op + s)
for do in offsets:
if isinstance(do, tuple):
do, kwargs = do
else:
do = do
kwargs = {}
op = getattr(pd.offsets,do)(5, normalize=normalize, **kwargs)
assert_func(klass([x + op for x in s]), s + op)
assert_func(klass([x - op for x in s]), s - op)
assert_func(klass([op + x for x in s]), op + s)

# def test_add_timedelta64(self):
# rng = date_range('1/1/2000', periods=5)
# delta = rng.values[3] - rng.values[1]
Expand Down