From 0fab9fae7951175f4b2417fa6a7efce5298dbf54 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 15:38:15 -0700 Subject: [PATCH 1/4] BUG: tz_localize needs to invalidate freq --- pandas/core/arrays/datetimes.py | 11 ++++++++- .../tests/indexes/datetimes/test_timezones.py | 23 +++++++++++++++++++ pandas/tests/series/test_arithmetic.py | 1 + 3 files changed, 34 insertions(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index f5cc0817e8bd7..ed1f74b064af9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -973,7 +973,16 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): ) new_dates = new_dates.view(DT64NS_DTYPE) dtype = tz_to_dtype(tz) - return self._simple_new(new_dates, dtype=dtype, freq=self.freq) + + freq = None + if timezones.is_utc(tz) or (len(self) == 1 and not isna(new_dates[0])): + # we can preserve freq + # TODO: Also for fixed-offsets + freq = self.freq + elif tz is None and self.tz is None: + # no-op + freq = self.freq + return self._simple_new(new_dates, dtype=dtype, freq=freq) # ---------------------------------------------------------------- # Conversion Methods - Vectorized analogues of Timestamp methods diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index fbddf765be79c..8628ce7ade212 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -1161,3 +1161,26 @@ def test_iteration_preserves_nanoseconds(self, tz): ) for i, ts in enumerate(index): assert ts == index[i] + + +def test_tz_localize_invalidates_freq(): + # we only preserve freq in unambiguous cases + + # if localized to US/Eastern, this crosses a DST transition + dti = date_range("2014-03-08 23:00", "2014-03-09 09:00", freq="H") + assert dti.freq == "H" + + result = dti.tz_localize(None) # no-op + assert result.freq == "H" + + result = dti.tz_localize("UTC") # unambiguous freq preservation + assert result.freq == "H" + + result = dti.tz_localize("US/Eastern", nonexistent="shift_forward") + assert result.freq is None + assert result.inferred_freq is None # i.e. we are not _too_ strict here + + # Case where we _can_ keep freq because we're length==1 + dti2 = dti[:1] + result = dti2.tz_localize("US/Eastern") + assert result.freq == "H" diff --git a/pandas/tests/series/test_arithmetic.py b/pandas/tests/series/test_arithmetic.py index a6385240537ca..16163ee76ba63 100644 --- a/pandas/tests/series/test_arithmetic.py +++ b/pandas/tests/series/test_arithmetic.py @@ -378,6 +378,7 @@ def test_ser_cmp_result_names(self, names, op): # datetime64tz dtype dti = dti.tz_localize("US/Central") + dti._set_freq("infer") # freq not preserved by tz_localize ser = Series(dti).rename(names[1]) result = op(ser, dti) assert result.name == names[2] From 6b9e08e75740df7f3c65da55e1e908fcb4e3cb91 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 15:54:14 -0700 Subject: [PATCH 2/4] whatsnew --- doc/source/whatsnew/v1.1.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index d0e3e5c96dc3a..b2a87ea5fefe6 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -412,7 +412,7 @@ Datetimelike - Bug where :meth:`PeriodIndex` raised when passed a :class:`Series` of strings (:issue:`26109`) - Bug in :class:`Timestamp` arithmetic when adding or subtracting a ``np.ndarray`` with ``timedelta64`` dtype (:issue:`33296`) - Bug in :meth:`DatetimeIndex.to_period` not infering the frequency when called with no arguments (:issue:`33358`) - +- Bug in :meth:`DatetimeIndex.tz_localize` incorrectly retaining ``freq`` in some cases where the original freq is no longer valid (:issue:`30511`) Timedelta ^^^^^^^^^ From 94b6c71d1b51555aa734892164465bcb08f88410 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 16:30:15 -0700 Subject: [PATCH 3/4] update doctest --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index ed1f74b064af9..e68817fa37922 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -886,7 +886,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): DatetimeIndex(['2018-03-01 09:00:00-05:00', '2018-03-02 09:00:00-05:00', '2018-03-03 09:00:00-05:00'], - dtype='datetime64[ns, US/Eastern]', freq='D') + dtype='datetime64[ns, US/Eastern]', freq=None) With the ``tz=None``, we can remove the time zone information while keeping the local time (not converted to UTC): From 20015e408b2842681d1cc4790eaa3d2a4187a38c Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Tue, 14 Apr 2020 17:28:48 -0700 Subject: [PATCH 4/4] troubleshoot doctest --- pandas/core/arrays/datetimes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index e68817fa37922..2d58138d56ad9 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -894,7 +894,7 @@ def tz_localize(self, tz, ambiguous="raise", nonexistent="raise"): >>> tz_aware.tz_localize(None) DatetimeIndex(['2018-03-01 09:00:00', '2018-03-02 09:00:00', '2018-03-03 09:00:00'], - dtype='datetime64[ns]', freq='D') + dtype='datetime64[ns]', freq=None) Be careful with DST changes. When there is sequential data, pandas can infer the DST time: