diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 251faeea28dc6..4c13dc2756a0e 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -384,6 +384,7 @@ Datetimelike - Bug in :class:`Timestamp` construction when passing datetime components as positional arguments and ``tzinfo`` as a keyword argument incorrectly raising (:issue:`31929`) - Bug in :meth:`Index.astype` when casting from object dtype to ``timedelta64[ns]`` dtype incorrectly casting ``np.datetime64("NaT")`` values to ``np.timedelta64("NaT")`` instead of raising (:issue:`45722`) - Bug in :meth:`SeriesGroupBy.value_counts` index when passing categorical column (:issue:`44324`) +- Bug in :meth:`DatetimeIndex.tz_localize` localizing to UTC failing to make a copy of the underlying data (:issue:`46460`) - Timedelta diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index bc57cb8aaed83..ff77566e1d559 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -64,8 +64,6 @@ cdef int64_t tz_localize_to_utc_single( return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True) elif is_fixed_offset(tz): - # TODO: in this case we should be able to use get_utcoffset, - # that returns None for e.g. 'dateutil//usr/share/zoneinfo/Etc/GMT-9' _, deltas, _ = get_dst_info(tz) delta = deltas[0] return val - delta @@ -121,9 +119,10 @@ timedelta-like} Py_ssize_t delta_idx_offset, delta_idx, pos_left, pos_right int64_t *tdata int64_t v, left, right, val, v_left, v_right, new_local, remaining_mins - int64_t first_delta + int64_t first_delta, delta int64_t shift_delta = 0 - ndarray[int64_t] trans, result, result_a, result_b, dst_hours + ndarray[int64_t] trans, result_a, result_b, dst_hours + int64_t[::1] result npy_datetimestruct dts bint infer_dst = False, is_dst = False, fill = False bint shift_forward = False, shift_backward = False @@ -132,7 +131,7 @@ timedelta-like} # Vectorized version of DstTzInfo.localize if is_utc(tz) or tz is None: - return vals + return vals.copy() result = np.empty(n, dtype=np.int64) @@ -143,7 +142,18 @@ timedelta-like} result[i] = NPY_NAT else: result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True) - return result + return result.base # to return underlying ndarray + + elif is_fixed_offset(tz): + _, deltas, _ = get_dst_info(tz) + delta = deltas[0] + for i in range(n): + v = vals[i] + if v == NPY_NAT: + result[i] = NPY_NAT + else: + result[i] = v - delta + return result.base # to return underlying ndarray # silence false-positive compiler warning ambiguous_array = np.empty(0, dtype=bool) @@ -298,7 +308,7 @@ timedelta-like} stamp = _render_tstamp(val) raise pytz.NonExistentTimeError(stamp) - return result + return result.base # .base to get underlying ndarray cdef inline Py_ssize_t bisect_right_i8(int64_t *data, diff --git a/pandas/core/arrays/datetimes.py b/pandas/core/arrays/datetimes.py index 35ae9623d353d..9ffe33e0cf38e 100644 --- a/pandas/core/arrays/datetimes.py +++ b/pandas/core/arrays/datetimes.py @@ -456,9 +456,13 @@ def _generate_range( endpoint_tz = start.tz if start is not None else end.tz if tz is not None and endpoint_tz is None: - i8values = tzconversion.tz_localize_to_utc( - i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent - ) + + if not timezones.is_utc(tz): + # short-circuit tz_localize_to_utc which would make + # an unnecessary copy with UTC but be a no-op. + i8values = tzconversion.tz_localize_to_utc( + i8values, tz, ambiguous=ambiguous, nonexistent=nonexistent + ) # i8values is localized datetime64 array -> have to convert # start/end as well to compare @@ -2126,6 +2130,8 @@ def _sequence_to_dt64ns( if tz is not None: # Convert tz-naive to UTC tz = timezones.maybe_get_tz(tz) + # TODO: if tz is UTC, are there situations where we *don't* want a + # copy? tz_localize_to_utc always makes one. data = tzconversion.tz_localize_to_utc( data.view("i8"), tz, ambiguous=ambiguous ) diff --git a/pandas/tests/indexes/datetimes/test_timezones.py b/pandas/tests/indexes/datetimes/test_timezones.py index 51bc054010aca..a07f21f785828 100644 --- a/pandas/tests/indexes/datetimes/test_timezones.py +++ b/pandas/tests/indexes/datetimes/test_timezones.py @@ -327,6 +327,17 @@ def test_tz_convert_unsorted(self, tzstr): # ------------------------------------------------------------- # DatetimeIndex.tz_localize + def test_tz_localize_utc_copies(self, utc_fixture): + # GH#46460 + times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] + index = DatetimeIndex(times) + + res = index.tz_localize(utc_fixture) + assert not tm.shares_memory(res, index) + + res2 = index._data.tz_localize(utc_fixture) + assert not tm.shares_memory(index._data, res2) + def test_dti_tz_localize_nonexistent_raise_coerce(self): # GH#13057 times = ["2015-03-08 01:00", "2015-03-08 02:00", "2015-03-08 03:00"] diff --git a/pandas/tests/tslibs/test_conversion.py b/pandas/tests/tslibs/test_conversion.py index 41eb7ae85d032..d0864ae8e1b7b 100644 --- a/pandas/tests/tslibs/test_conversion.py +++ b/pandas/tests/tslibs/test_conversion.py @@ -50,6 +50,18 @@ def _compare_local_to_utc(tz_didx, naive_didx): tm.assert_numpy_array_equal(result, expected) +def test_tz_localize_to_utc_copies(): + # GH#46460 + arr = np.arange(5, dtype="i8") + result = tzconversion.tz_convert_from_utc(arr, tz=UTC) + tm.assert_numpy_array_equal(result, arr) + assert not np.shares_memory(arr, result) + + result = tzconversion.tz_convert_from_utc(arr, tz=None) + tm.assert_numpy_array_equal(result, arr) + assert not np.shares_memory(arr, result) + + def test_tz_convert_single_matches_tz_convert_hourly(tz_aware_fixture): tz = tz_aware_fixture tz_didx = date_range("2014-03-01", "2015-01-10", freq="H", tz=tz)