diff --git a/asv_bench/benchmarks/timeseries.py b/asv_bench/benchmarks/timeseries.py index 6c9f8ee77e5ad..b494dbd8a38fa 100644 --- a/asv_bench/benchmarks/timeseries.py +++ b/asv_bench/benchmarks/timeseries.py @@ -336,15 +336,33 @@ def time_infer_quarter(self): class ToDatetimeFormat: def setup(self): - self.s = Series(["19MAY11", "19MAY11:00:00:00"] * 100000) + N = 100000 + self.s = Series(["19MAY11", "19MAY11:00:00:00"] * N) self.s2 = self.s.str.replace(":\\S+$", "") + self.same_offset = ["10/11/2018 00:00:00.045-07:00"] * N + self.diff_offset = [ + f"10/11/2018 00:00:00.045-0{offset}:00" for offset in range(10) + ] * int(N / 10) + def time_exact(self): to_datetime(self.s2, format="%d%b%y") def time_no_exact(self): to_datetime(self.s, format="%d%b%y", exact=False) + def time_same_offset(self): + to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z") + + def time_different_offset(self): + to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z") + + def time_same_offset_to_utc(self): + to_datetime(self.same_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) + + def time_different_offset_to_utc(self): + to_datetime(self.diff_offset, format="%m/%d/%Y %H:%M:%S.%f%z", utc=True) + class ToDatetimeCache: diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst index 692df075f25cb..83df71520ba2f 100644 --- a/doc/source/whatsnew/v1.1.0.rst +++ b/doc/source/whatsnew/v1.1.0.rst @@ -58,6 +58,22 @@ For example: For more on working with fold, see :ref:`Fold subsection ` in the user guide. +.. _whatsnew_110.to_datetime_multiple_tzname_tzoffset_support: + +Parsing timezone-aware format with different timezones in to_datetime +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +:func:`to_datetime` now supports parsing formats containing timezone names (``%Z``) and UTC offsets (``%z``) from different timezones then converting them to UTC by setting ``utc=True``. This would return a :class:`DatetimeIndex` with timezone at UTC as opposed to an :class:`Index` with ``object`` dtype if ``utc=True`` is not set (:issue:`32792`). + +For example: + +.. ipython:: python + + tz_strs = ["2010-01-01 12:00:00 +0100", "2010-01-01 12:00:00 -0100", + "2010-01-01 12:00:00 +0300", "2010-01-01 12:00:00 +0400"] + pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z', utc=True) + pd.to_datetime(tz_strs, format='%Y-%m-%d %H:%M:%S %z') + .. _whatsnew_110.enhancements.other: Other enhancements diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index 7414165ab5711..b922e1b318622 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -229,13 +229,12 @@ def _return_parsed_timezone_results(result, timezones, tz, name): ------- tz_result : Index-like of parsed dates with timezone """ - if tz is not None: - raise ValueError( - "Cannot pass a tz argument when parsing strings with timezone information." - ) tz_results = np.array( [Timestamp(res).tz_localize(zone) for res, zone in zip(result, timezones)] ) + if tz is not None: + # Convert to the same tz + tz_results = np.array([tz_result.tz_convert(tz) for tz_result in tz_results]) from pandas import Index return Index(tz_results, name=name) diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index 6689021392a92..a751182dbf7af 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -323,8 +323,25 @@ def test_to_datetime_parse_tzname_or_tzoffset(self, fmt, dates, expected_dates): expected = pd.Index(expected_dates) tm.assert_equal(result, expected) - with pytest.raises(ValueError): - pd.to_datetime(dates, format=fmt, utc=True) + def test_to_datetime_parse_tzname_or_tzoffset_different_tz_to_utc(self): + # GH 32792 + dates = [ + "2010-01-01 12:00:00 +0100", + "2010-01-01 12:00:00 -0100", + "2010-01-01 12:00:00 +0300", + "2010-01-01 12:00:00 +0400", + ] + expected_dates = [ + "2010-01-01 11:00:00+00:00", + "2010-01-01 13:00:00+00:00", + "2010-01-01 09:00:00+00:00", + "2010-01-01 08:00:00+00:00", + ] + fmt = "%Y-%m-%d %H:%M:%S %z" + + result = pd.to_datetime(dates, format=fmt, utc=True) + expected = pd.DatetimeIndex(expected_dates) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize( "offset", ["+0", "-1foo", "UTCbar", ":10", "+01:000:01", ""]