From 3cc7fac5c5ba8b0b63a1c0528084897f3df29163 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 14:31:04 +0100 Subject: [PATCH 01/16] Added comments and removed useless variable declaration --- pandas/_libs/tslibs/period.pyx | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 54cae834d7024..bd2de821f9ff9 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1243,10 +1243,13 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): char *formatted bytes pat, brepl list found_pat = [False] * len(extra_fmts) - int year, quarter + int quarter str result, repl get_date_info(value, freq, &dts) + + # Find our additional directives in the pattern and replace them with + # placeholders that are not processed by c_strftime for i in range(len(extra_fmts)): pat = extra_fmts[i][0] brepl = extra_fmts[i][1] @@ -1254,6 +1257,7 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): fmt = fmt.replace(pat, brepl) found_pat[i] = True + # Execute c_strftime to process the usual datetime directives formatted = c_strftime(&dts, fmt) result = util.char_to_string(formatted) From b5085fe6b775e51c1f378197aeaed1e0030e3fde Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 15:59:02 +0100 Subject: [PATCH 02/16] Fixed #46252. Added comments and updated strftime doc for maintenance. Added a `TestPeriodIndexFormat` class --- pandas/_libs/tslibs/period.pyx | 52 ++++++++++++------ pandas/tests/io/formats/test_format.py | 73 +++++++++++++++++++++++++- 2 files changed, 108 insertions(+), 17 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index bd2de821f9ff9..e4044cd6f06ff 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1236,6 +1236,8 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"), cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] +cdef inline int idx_first_nonfiscal_fmt = 3 + cdef str _period_strftime(int64_t value, int freq, bytes fmt): cdef: Py_ssize_t i @@ -1243,7 +1245,7 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): char *formatted bytes pat, brepl list found_pat = [False] * len(extra_fmts) - int quarter + int quarter, us, ps str result, repl get_date_info(value, freq, &dts) @@ -1263,23 +1265,33 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): result = util.char_to_string(formatted) free(formatted) + # Now we will fill the placeholders corresponding to our additional directives + # First prepare the contents + if any(found_pat[idx_first_nonfiscal_fmt:]): + # Save these to local vars as dts can be modified by get_yq below + us = dts.us + ps = dts.ps + if any(found_pat[0:idx_first_nonfiscal_fmt]): + # Note: this modifies `dts` in-place so that year becomes fiscal year + # However it looses the us and ps + quarter = get_yq(value, freq, &dts) + + # Now do the filling per se for i in range(len(extra_fmts)): if found_pat[i]: - quarter = get_yq(value, freq, &dts) - - if i == 0: - repl = str(quarter) - elif i == 1: # %f, 2-digit year + if i == 0: # %q, 1-digit quarter. + repl = f"{quarter}" + elif i == 1: # %f, 2-digit 'Fiscal' year repl = f"{(dts.year % 100):02d}" - elif i == 2: + elif i == 2: # %F, 'Fiscal' year with a century repl = str(dts.year) - elif i == 3: - repl = f"{(value % 1_000):03d}" - elif i == 4: - repl = f"{(value % 1_000_000):06d}" - elif i == 5: - repl = f"{(value % 1_000_000_000):09d}" + elif i == 3: # %l, milliseconds + repl = f"{(us // 1_000):03d}" + elif i == 4: # %u, microseconds + repl = f"{(us):06d}" + elif i == 5: # %n, nanoseconds + repl = f"{((us * 1000) + (ps // 1000)):09d}" result = result.replace(str_extra_fmts[i], repl) @@ -2332,7 +2344,8 @@ cdef class _Period(PeriodMixin): containing one or several directives. The method recognizes the same directives as the :func:`time.strftime` function of the standard Python distribution, as well as the specific additional directives ``%f``, - ``%F``, ``%q``. (formatting & docs originally from scikits.timeries). + ``%F``, ``%q``, ``%l``, ``%u``, ``%n``. + (formatting & docs originally from scikits.timeries). +-----------+--------------------------------+-------+ | Directive | Meaning | Notes | @@ -2379,11 +2392,20 @@ cdef class _Period(PeriodMixin): | | AM or PM. | | +-----------+--------------------------------+-------+ | ``%q`` | Quarter as a decimal number | | - | | [01,04] | | + | | [1,4] | | +-----------+--------------------------------+-------+ | ``%S`` | Second as a decimal number | \(4) | | | [00,61]. | | +-----------+--------------------------------+-------+ + | ``%l`` | Millisecond as a decimal number| | + | | [000,999]. | | + +-----------+--------------------------------+-------+ + | ``%u`` | Microsecond as a decimal number| | + | | [000000,999999]. | | + +-----------+--------------------------------+-------+ + | ``%n`` | Nanosecond as a decimal number | | + | | [000000000,999999999]. | | + +-----------+--------------------------------+-------+ | ``%U`` | Week number of the year | \(5) | | | (Sunday as the first day of | | | | the week) as a decimal number | | diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index adcaeba5cfd8d..3dc26591a7137 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1,8 +1,10 @@ """ Test output formatting for Series/DataFrame, including to_string & reprs """ - -from datetime import datetime +from datetime import ( + datetime, + time, +) from io import StringIO import itertools from operator import methodcaller @@ -47,6 +49,13 @@ use_32bit_repr = is_platform_windows() or not IS64 +def get_local_am_pm(): + """Return the AM and PM strings returned by strftime in current locale""" + am_local = time(1).strftime("%p") + pm_local = time(13).strftime("%p") + return am_local, pm_local + + @pytest.fixture(params=["string", "pathlike", "buffer"]) def filepath_or_buffer_id(request): """ @@ -3167,6 +3176,66 @@ def test_str(self): assert str(NaT) == "NaT" +class TestPeriodIndexFormat: + def test_period(self): + """Basic test for period formatting with default format.""" + p = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="H") + # format is equivalent to strftime(None) + formatted = p.format() + assert formatted[0] == p[0].strftime(None) + assert formatted[0] == "2003-01-01 12:00" # default: minutes not shown + assert formatted[1] == "NaT" + + p = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") + # format is equivalent to strftime(None) + formatted = p.format() + assert formatted[0] == p[0].strftime(None) + assert formatted[0] == "2003-01-01 12:01:01.123456789" + assert formatted[1] == "2003-01-01 12:01:01.123456790" + + def test_period_custom(self): + # GH46252 + + # Get locale-specific reference + am_local, pm_local = get_local_am_pm() + + # 3 digits + p = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l") + formatted = p.format(date_format="%y %I:%M:%S%p (ms=%l us=%u ns=%n)") + assert formatted[0] == f"03 12:01:01{pm_local} (ms=123 us=123000 ns=123000000)" + assert formatted[1] == f"03 12:01:01{pm_local} (ms=124 us=124000 ns=124000000)" + + # 6 digits + p = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="u") + formatted = p.format(date_format="%y %I:%M:%S%p (ms=%l us=%u ns=%n)") + assert formatted[0] == f"03 12:01:01{pm_local} (ms=123 us=123456 ns=123456000)" + assert formatted[1] == f"03 12:01:01{pm_local} (ms=123 us=123457 ns=123457000)" + + # 9 digits + p = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") + formatted = p.format(date_format="%y %I:%M:%S%p (ms=%l us=%u ns=%n)") + assert formatted[0] == f"03 12:01:01{pm_local} (ms=123 us=123456 ns=123456789)" + assert formatted[1] == f"03 12:01:01{pm_local} (ms=123 us=123456 ns=123456790)" + + def test_period_tz(self): + """Test formatting periods created from a datetime with timezone.""" + + # This timestamp is in 2013 in Europe/Paris but is 2012 in UTC + dt = pd.to_datetime(["2013-01-01 00:00:00+01:00"], utc=True) + + # Converting to a period looses the timezone information + # Since tz is currently set as utc, we'll see 2012 + with tm.assert_produces_warning(UserWarning, match="will drop timezone"): + p = dt.to_period(freq="H") + assert p.format()[0] == "2012-12-31 23:00" + + # If tz is currently set as paris before conversion, we'll see 2013 + dt = dt.tz_convert("Europe/Paris") + with tm.assert_produces_warning(UserWarning, match="will drop timezone"): + p = dt.to_period(freq="H") + assert p.format()[0] == "2013-01-01 00:00" + + class TestDatetimeIndexFormat: def test_datetime(self): formatted = pd.to_datetime([datetime(2003, 1, 1, 12), NaT]).format() From 6460a4fd19da80c7381bab9385f3d5c923d9edaf Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 15:59:53 +0100 Subject: [PATCH 03/16] Code readability: replaced `dt` variables with `p` for "period" --- pandas/core/arrays/period.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 2368af0de1bf3..d7c78a4ee2ac2 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -642,17 +642,17 @@ def _format_native_types( values = self.astype(object) if date_format: - formatter = lambda dt: dt.strftime(date_format) + formatter = lambda p: p.strftime(date_format) else: - formatter = lambda dt: str(dt) + formatter = lambda p: str(p) if self._hasna: mask = self._isnan values[mask] = na_rep imask = ~mask - values[imask] = np.array([formatter(dt) for dt in values[imask]]) + values[imask] = np.array([formatter(p) for p in values[imask]]) else: - values = np.array([formatter(dt) for dt in values]) + values = np.array([formatter(p) for p in values]) return values # ------------------------------------------------------------------ From ace76482d42cb6191a7b77dcf9ca7fb1ef4836c4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 16:02:49 +0100 Subject: [PATCH 04/16] Improved doc for `PeriodIndex.strftime`, that is a doc shared with `DatetimeIndex.strftime`. Added a specific mention about %r and %R directives --- pandas/core/arrays/datetimelike.py | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index 7a7bad02fde72..d018541615ae4 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1589,6 +1589,14 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: of the string format can be found in `python string format doc <%(URL)s>`__. + Formats supported by the C `strftime` API but not by the python string format + doc (such as `"%%R"`, `"%%r"`) are not officially supported and should be + preferably replaced with their supported equivalents (such as `"%%H:%%M"`, + `"%%I:%%M:%%S %%p"`). + + Note that `PeriodIndex` support additional directives, detailed in + `Period.strftime`. + Parameters ---------- date_format : str @@ -1605,6 +1613,8 @@ def strftime(self, date_format: str) -> npt.NDArray[np.object_]: DatetimeIndex.normalize : Return DatetimeIndex with times to midnight. DatetimeIndex.round : Round the DatetimeIndex to the specified freq. DatetimeIndex.floor : Floor the DatetimeIndex to the specified freq. + Timestamp.strftime : Format a single Timestamp. + Period.strftime : Format a single Period. Examples -------- From 98daeb24a058368bb3ee2d00a211bca9ef5063e0 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 16:26:42 +0100 Subject: [PATCH 05/16] Fixed build issue (uninit-ed var). Improved test --- pandas/_libs/tslibs/period.pyx | 10 ++++++---- pandas/tests/io/formats/test_format.py | 10 ++++++---- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e4044cd6f06ff..9d223a961d225 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1266,15 +1266,17 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): free(formatted) # Now we will fill the placeholders corresponding to our additional directives + # First prepare the contents - if any(found_pat[idx_first_nonfiscal_fmt:]): - # Save these to local vars as dts can be modified by get_yq below - us = dts.us - ps = dts.ps + # Save these to local vars as dts can be modified by get_yq below + us = dts.us + ps = dts.ps if any(found_pat[0:idx_first_nonfiscal_fmt]): # Note: this modifies `dts` in-place so that year becomes fiscal year # However it looses the us and ps quarter = get_yq(value, freq, &dts) + else: + quarter = 0 # Now do the filling per se for i in range(len(extra_fmts)): diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 3dc26591a7137..b43dac0575292 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3180,16 +3180,18 @@ class TestPeriodIndexFormat: def test_period(self): """Basic test for period formatting with default format.""" p = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="H") - # format is equivalent to strftime(None) + # default formatting formatted = p.format() - assert formatted[0] == p[0].strftime(None) assert formatted[0] == "2003-01-01 12:00" # default: minutes not shown assert formatted[1] == "NaT" + # format is equivalent to strftime(None)... + assert formatted[0] == p.strftime(None)[0] + assert p.strftime(None)[1] is np.nan # ...except for NaTs + # Same test with nanoseconds freq p = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") - # format is equivalent to strftime(None) formatted = p.format() - assert formatted[0] == p[0].strftime(None) + assert (formatted == p.strftime(None)).all() assert formatted[0] == "2003-01-01 12:01:01.123456789" assert formatted[1] == "2003-01-01 12:01:01.123456790" From 82994928d476c369105bd3d3705c52e882883ff9 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 16:31:47 +0100 Subject: [PATCH 06/16] What's new update. --- doc/source/whatsnew/v1.5.0.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 8dac952874f89..9ae438ea6dd39 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -433,6 +433,7 @@ I/O Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) +- Bug in :meth:`Period.strftime`, :meth:`PeriodIndex.strftime` and :meth:`PeriodIndex.format`, directives `%l` `%u` and `%n` were giving wrong results (:issue:`46252`) - Plotting From 8378c8a447001b2d815e375e5a84a418b98e4a53 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 16:39:31 +0100 Subject: [PATCH 07/16] whats new --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 9ae438ea6dd39..95c84fc534e89 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -433,7 +433,7 @@ I/O Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) -- Bug in :meth:`Period.strftime`, :meth:`PeriodIndex.strftime` and :meth:`PeriodIndex.format`, directives `%l` `%u` and `%n` were giving wrong results (:issue:`46252`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives `%l` and `%u` were giving wrong results (:issue:`46252`) - Plotting From e05fa017d3a037ed8b65832799449e3eceb563b3 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 16:49:08 +0100 Subject: [PATCH 08/16] Fixed pre-commit hook: backticks in rst --- doc/source/whatsnew/v1.5.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.5.0.rst b/doc/source/whatsnew/v1.5.0.rst index 95c84fc534e89..73b6be6fd051a 100644 --- a/doc/source/whatsnew/v1.5.0.rst +++ b/doc/source/whatsnew/v1.5.0.rst @@ -433,7 +433,7 @@ I/O Period ^^^^^^ - Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`) -- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives `%l` and `%u` were giving wrong results (:issue:`46252`) +- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`) - Plotting From d3c319b27c86c81a275d5fdf76dce798fa34031d Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 22:40:40 +0100 Subject: [PATCH 09/16] code review: renamed variable --- pandas/core/arrays/period.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index d7c78a4ee2ac2..3ce57a6650feb 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -642,17 +642,17 @@ def _format_native_types( values = self.astype(object) if date_format: - formatter = lambda p: p.strftime(date_format) + formatter = lambda per: per.strftime(date_format) else: - formatter = lambda p: str(p) + formatter = lambda per: str(per) if self._hasna: mask = self._isnan values[mask] = na_rep imask = ~mask - values[imask] = np.array([formatter(p) for p in values[imask]]) + values[imask] = np.array([formatter(per) for per in values[imask]]) else: - values = np.array([formatter(p) for p in values]) + values = np.array([formatter(per) for per in values]) return values # ------------------------------------------------------------------ From 7b9c52ee86b570b2fc21e71ddb717dc31d944c31 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Mon, 14 Mar 2022 23:00:10 +0100 Subject: [PATCH 10/16] Removed the locale related test, will be covered separately (issue #46319) --- pandas/tests/io/formats/test_format.py | 28 +++++++++----------------- 1 file changed, 9 insertions(+), 19 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index b43dac0575292..2b8411031c18a 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -49,13 +49,6 @@ use_32bit_repr = is_platform_windows() or not IS64 -def get_local_am_pm(): - """Return the AM and PM strings returned by strftime in current locale""" - am_local = time(1).strftime("%p") - pm_local = time(13).strftime("%p") - return am_local, pm_local - - @pytest.fixture(params=["string", "pathlike", "buffer"]) def filepath_or_buffer_id(request): """ @@ -3198,26 +3191,23 @@ def test_period(self): def test_period_custom(self): # GH46252 - # Get locale-specific reference - am_local, pm_local = get_local_am_pm() - # 3 digits p = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l") - formatted = p.format(date_format="%y %I:%M:%S%p (ms=%l us=%u ns=%n)") - assert formatted[0] == f"03 12:01:01{pm_local} (ms=123 us=123000 ns=123000000)" - assert formatted[1] == f"03 12:01:01{pm_local} (ms=124 us=124000 ns=124000000)" + formatted = p.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + assert formatted[0] == "03 12:01:01 (ms=123 us=123000 ns=123000000)" + assert formatted[1] == "03 12:01:01 (ms=124 us=124000 ns=124000000)" # 6 digits p = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="u") - formatted = p.format(date_format="%y %I:%M:%S%p (ms=%l us=%u ns=%n)") - assert formatted[0] == f"03 12:01:01{pm_local} (ms=123 us=123456 ns=123456000)" - assert formatted[1] == f"03 12:01:01{pm_local} (ms=123 us=123457 ns=123457000)" + formatted = p.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456000)" + assert formatted[1] == "03 12:01:01 (ms=123 us=123457 ns=123457000)" # 9 digits p = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") - formatted = p.format(date_format="%y %I:%M:%S%p (ms=%l us=%u ns=%n)") - assert formatted[0] == f"03 12:01:01{pm_local} (ms=123 us=123456 ns=123456789)" - assert formatted[1] == f"03 12:01:01{pm_local} (ms=123 us=123456 ns=123456790)" + formatted = p.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456789)" + assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)" def test_period_tz(self): """Test formatting periods created from a datetime with timezone.""" From bfbe09451f089ec3130ba4d9d7778b3de47124e9 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Tue, 15 Mar 2022 11:34:58 +0100 Subject: [PATCH 11/16] Fixed useless import --- pandas/tests/io/formats/test_format.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 2b8411031c18a..38a2103da7431 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -1,10 +1,7 @@ """ Test output formatting for Series/DataFrame, including to_string & reprs """ -from datetime import ( - datetime, - time, -) +from datetime import datetime from io import StringIO import itertools from operator import methodcaller From 46e9f673f3b44958e75110c51735bb9082c46045 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 16 Mar 2022 16:52:10 +0100 Subject: [PATCH 12/16] Renamed `p` into `per` and removed the docstrings in tests as per code review --- pandas/tests/io/formats/test_format.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 38a2103da7431..5ab73bb840d06 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3167,26 +3167,26 @@ def test_str(self): class TestPeriodIndexFormat: - def test_period(self): - """Basic test for period formatting with default format.""" - p = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="H") - # default formatting - formatted = p.format() + def test_period_format_and_strftime_default(self): + per = pd.PeriodIndex([datetime(2003, 1, 1, 12), None], freq="H") + + # Default formatting + formatted = per.format() assert formatted[0] == "2003-01-01 12:00" # default: minutes not shown assert formatted[1] == "NaT" # format is equivalent to strftime(None)... - assert formatted[0] == p.strftime(None)[0] - assert p.strftime(None)[1] is np.nan # ...except for NaTs + assert formatted[0] == per.strftime(None)[0] + assert per.strftime(None)[1] is np.nan # ...except for NaTs # Same test with nanoseconds freq - p = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") - formatted = p.format() - assert (formatted == p.strftime(None)).all() + per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") + formatted = per.format() + assert (formatted == per.strftime(None)).all() assert formatted[0] == "2003-01-01 12:01:01.123456789" assert formatted[1] == "2003-01-01 12:01:01.123456790" def test_period_custom(self): - # GH46252 + # GH#46252 custom formatting directives %l (ms) and %u (us) # 3 digits p = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l") @@ -3207,7 +3207,7 @@ def test_period_custom(self): assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)" def test_period_tz(self): - """Test formatting periods created from a datetime with timezone.""" + # Formatting periods created from a datetime with timezone. # This timestamp is in 2013 in Europe/Paris but is 2012 in UTC dt = pd.to_datetime(["2013-01-01 00:00:00+01:00"], utc=True) From fc6465493e4609996c2006b309a2349adf9e7f5e Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 16 Mar 2022 16:58:46 +0100 Subject: [PATCH 13/16] Changed type of us, ps into `int32_t` as per code review --- pandas/_libs/tslibs/period.pyx | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index 9d223a961d225..e60a607b2ca19 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -9,6 +9,7 @@ from cpython.object cimport ( PyObject_RichCompareBool, ) from numpy cimport ( + int32_t, int64_t, ndarray, ) @@ -1245,7 +1246,8 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): char *formatted bytes pat, brepl list found_pat = [False] * len(extra_fmts) - int quarter, us, ps + int quarter + int32_t us, ps str result, repl get_date_info(value, freq, &dts) From d91e12c20719cd10ccc36dadada6cda856ed98f4 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 16 Mar 2022 16:59:15 +0100 Subject: [PATCH 14/16] inlined constant index as per code review --- pandas/_libs/tslibs/period.pyx | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/_libs/tslibs/period.pyx b/pandas/_libs/tslibs/period.pyx index e60a607b2ca19..99fb42efd0dee 100644 --- a/pandas/_libs/tslibs/period.pyx +++ b/pandas/_libs/tslibs/period.pyx @@ -1237,8 +1237,6 @@ cdef list extra_fmts = [(b"%q", b"^`AB`^"), cdef list str_extra_fmts = ["^`AB`^", "^`CD`^", "^`EF`^", "^`GH`^", "^`IJ`^", "^`KL`^"] -cdef inline int idx_first_nonfiscal_fmt = 3 - cdef str _period_strftime(int64_t value, int freq, bytes fmt): cdef: Py_ssize_t i @@ -1273,7 +1271,7 @@ cdef str _period_strftime(int64_t value, int freq, bytes fmt): # Save these to local vars as dts can be modified by get_yq below us = dts.us ps = dts.ps - if any(found_pat[0:idx_first_nonfiscal_fmt]): + if any(found_pat[0:3]): # Note: this modifies `dts` in-place so that year becomes fiscal year # However it looses the us and ps quarter = get_yq(value, freq, &dts) From df8d6e1434a416b6efe5da872dedf05161e5cbc5 Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 16 Mar 2022 17:36:35 +0100 Subject: [PATCH 15/16] Discarding single-letter variable, as per code review --- pandas/tests/io/formats/test_format.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index 5ab73bb840d06..d1e1d7dc99ab3 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3189,20 +3189,20 @@ def test_period_custom(self): # GH#46252 custom formatting directives %l (ms) and %u (us) # 3 digits - p = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l") - formatted = p.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + per = pd.period_range("2003-01-01 12:01:01.123", periods=2, freq="l") + formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") assert formatted[0] == "03 12:01:01 (ms=123 us=123000 ns=123000000)" assert formatted[1] == "03 12:01:01 (ms=124 us=124000 ns=124000000)" # 6 digits - p = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="u") - formatted = p.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + per = pd.period_range("2003-01-01 12:01:01.123456", periods=2, freq="u") + formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456000)" assert formatted[1] == "03 12:01:01 (ms=123 us=123457 ns=123457000)" # 9 digits - p = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") - formatted = p.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") + per = pd.period_range("2003-01-01 12:01:01.123456789", periods=2, freq="n") + formatted = per.format(date_format="%y %I:%M:%S (ms=%l us=%u ns=%n)") assert formatted[0] == "03 12:01:01 (ms=123 us=123456 ns=123456789)" assert formatted[1] == "03 12:01:01 (ms=123 us=123456 ns=123456790)" From 6eecc1bf51d883ce24bd8a72659f03845f65a2aa Mon Sep 17 00:00:00 2001 From: Sylvain MARIE Date: Wed, 16 Mar 2022 17:37:55 +0100 Subject: [PATCH 16/16] Discarding single-letter variable, as per code review --- pandas/tests/io/formats/test_format.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d1e1d7dc99ab3..9ab3e4cf6afac 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -3215,14 +3215,14 @@ def test_period_tz(self): # Converting to a period looses the timezone information # Since tz is currently set as utc, we'll see 2012 with tm.assert_produces_warning(UserWarning, match="will drop timezone"): - p = dt.to_period(freq="H") - assert p.format()[0] == "2012-12-31 23:00" + per = dt.to_period(freq="H") + assert per.format()[0] == "2012-12-31 23:00" # If tz is currently set as paris before conversion, we'll see 2013 dt = dt.tz_convert("Europe/Paris") with tm.assert_produces_warning(UserWarning, match="will drop timezone"): - p = dt.to_period(freq="H") - assert p.format()[0] == "2013-01-01 00:00" + per = dt.to_period(freq="H") + assert per.format()[0] == "2013-01-01 00:00" class TestDatetimeIndexFormat: