diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 62fff2ace1627..933e10a35118c 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -50,28 +50,26 @@ from pandas._libs.tslibs.np_datetime cimport ( from pandas._libs.tslibs.np_datetime import OutOfBoundsDatetime -from pandas._libs.tslibs.timezones cimport ( - get_utcoffset, - is_utc, -) -from pandas._libs.tslibs.util cimport ( - is_datetime64_object, - is_float_object, - is_integer_object, -) - -from pandas._libs.tslibs.parsing import parse_datetime_string - from pandas._libs.tslibs.nattype cimport ( NPY_NAT, c_NaT as NaT, c_nat_strings as nat_strings, ) +from pandas._libs.tslibs.parsing cimport parse_datetime_string from pandas._libs.tslibs.timestamps cimport _Timestamp +from pandas._libs.tslibs.timezones cimport ( + get_utcoffset, + is_utc, +) from pandas._libs.tslibs.tzconversion cimport ( Localizer, tz_localize_to_utc_single, ) +from pandas._libs.tslibs.util cimport ( + is_datetime64_object, + is_float_object, + is_integer_object, +) # ---------------------------------------------------------------------- # Constants @@ -550,8 +548,10 @@ cdef _TSObject convert_str_to_tsobject(str ts, tzinfo tz, str unit, return obj dt = parse_datetime_string( - ts, dayfirst=dayfirst, yearfirst=yearfirst + ts, dayfirst=dayfirst, yearfirst=yearfirst, out_bestunit=&out_bestunit ) + reso = get_supported_reso(out_bestunit) + return convert_datetime_to_tsobject(dt, tz, nanos=0, reso=reso) return convert_datetime_to_tsobject(dt, tz) diff --git a/pandas/_libs/tslibs/parsing.pxd b/pandas/_libs/tslibs/parsing.pxd index 25667f00e42b5..8809c81b530d0 100644 --- a/pandas/_libs/tslibs/parsing.pxd +++ b/pandas/_libs/tslibs/parsing.pxd @@ -1,3 +1,14 @@ +from cpython.datetime cimport datetime + +from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT + cpdef str get_rule_month(str source) cpdef quarter_to_myear(int year, int quarter, str freq) + +cdef datetime parse_datetime_string( + str date_string, + bint dayfirst, + bint yearfirst, + NPY_DATETIMEUNIT* out_bestunit +) diff --git a/pandas/_libs/tslibs/parsing.pyi b/pandas/_libs/tslibs/parsing.pyi index c5d53f77762f9..83a5b0085f0b4 100644 --- a/pandas/_libs/tslibs/parsing.pyi +++ b/pandas/_libs/tslibs/parsing.pyi @@ -6,7 +6,7 @@ from pandas._typing import npt class DateParseError(ValueError): ... -def parse_datetime_string( +def py_parse_datetime_string( date_string: str, dayfirst: bool = ..., yearfirst: bool = ..., diff --git a/pandas/_libs/tslibs/parsing.pyx b/pandas/_libs/tslibs/parsing.pyx index e84b5114df074..c6d8e0e8eb4ee 100644 --- a/pandas/_libs/tslibs/parsing.pyx +++ b/pandas/_libs/tslibs/parsing.pyx @@ -264,14 +264,26 @@ cdef bint _does_string_look_like_time(str parse_string): return 0 <= hour <= 23 and 0 <= minute <= 59 -def parse_datetime_string( +def py_parse_datetime_string( + str date_string, bint dayfirst=False, bint yearfirst=False +): + # Python-accessible version for testing (we can't just make + # parse_datetime_string cpdef bc it has a pointer argument) + cdef: + NPY_DATETIMEUNIT out_bestunit + + return parse_datetime_string(date_string, dayfirst, yearfirst, &out_bestunit) + + +cdef datetime parse_datetime_string( # NB: This will break with np.str_ (GH#32264) even though # isinstance(npstrobj, str) evaluates to True, so caller must ensure # the argument is *exactly* 'str' str date_string, - bint dayfirst=False, - bint yearfirst=False, -) -> datetime: + bint dayfirst, + bint yearfirst, + NPY_DATETIMEUNIT* out_bestunit +): """ Parse datetime string, only returns datetime. Also cares special handling matching time patterns. @@ -287,7 +299,6 @@ def parse_datetime_string( cdef: datetime dt - NPY_DATETIMEUNIT out_bestunit bint is_quarter = 0 if not _does_string_look_like_datetime(date_string): @@ -299,13 +310,13 @@ def parse_datetime_string( yearfirst=yearfirst) return dt - dt = _parse_delimited_date(date_string, dayfirst, &out_bestunit) + dt = _parse_delimited_date(date_string, dayfirst, out_bestunit) if dt is not None: return dt try: dt = _parse_dateabbr_string( - date_string, _DEFAULT_DATETIME, None, &out_bestunit, &is_quarter + date_string, _DEFAULT_DATETIME, None, out_bestunit, &is_quarter ) return dt except DateParseError: @@ -315,7 +326,7 @@ def parse_datetime_string( dt = dateutil_parse(date_string, default=_DEFAULT_DATETIME, dayfirst=dayfirst, yearfirst=yearfirst, - ignoretz=False, out_bestunit=&out_bestunit) + ignoretz=False, out_bestunit=out_bestunit) return dt diff --git a/pandas/tests/io/parser/test_parse_dates.py b/pandas/tests/io/parser/test_parse_dates.py index 9aebadd833506..d985800d943bd 100644 --- a/pandas/tests/io/parser/test_parse_dates.py +++ b/pandas/tests/io/parser/test_parse_dates.py @@ -18,7 +18,7 @@ import pytz from pandas._libs.tslibs import parsing -from pandas._libs.tslibs.parsing import parse_datetime_string +from pandas._libs.tslibs.parsing import py_parse_datetime_string from pandas.compat.pyarrow import ( pa_version_under6p0, pa_version_under7p0, @@ -1760,7 +1760,7 @@ def test_hypothesis_delimited_date( date_string = test_datetime.strftime(date_format.replace(" ", delimiter)) except_out_dateutil, result = _helper_hypothesis_delimited_date( - parse_datetime_string, date_string, dayfirst=dayfirst + py_parse_datetime_string, date_string, dayfirst=dayfirst ) except_in_dateutil, expected = _helper_hypothesis_delimited_date( du_parse, diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index c6ceb2fcb0ebd..8129985ef9bea 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -34,6 +34,24 @@ def test_construct_from_string_invalid_raises(self): with pytest.raises(ValueError, match="gives an invalid tzoffset"): Timestamp("200622-12-31") + def test_constructor_str_infer_reso(self): + # non-iso8601 path + + # _parse_delimited_date path + ts = Timestamp("01/30/2023") + assert ts.unit == "s" + + # _parse_dateabbr_string path + ts = Timestamp("2015Q1") + assert ts.unit == "s" + + # dateutil_parse path + ts = Timestamp("2016-01-01 1:30:01 PM") + assert ts.unit == "s" + + ts = Timestamp("2016 June 3 15:25:01.345") + assert ts.unit == "ms" + def test_constructor_from_iso8601_str_with_offset_reso(self): # GH#49737 ts = Timestamp("2016-01-01 04:05:06-01:00")