Skip to content

BUG: parsing nanoseconds incorrect resolution #46811

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 8 commits into from
Apr 27, 2022
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/source/whatsnew/v1.5.0.rst
Original file line number Diff line number Diff line change
@@ -581,6 +581,8 @@ Period
^^^^^^
- Bug in subtraction of :class:`Period` from :class:`PeriodArray` returning wrong results (:issue:`45999`)
- Bug in :meth:`Period.strftime` and :meth:`PeriodIndex.strftime`, directives ``%l`` and ``%u`` were giving wrong results (:issue:`46252`)
- Bug in inferring an incorrect ``freq`` when passing a string to :class:`Period` microseconds that are a multiple of 1000 (:issue:`46811`)
- Bug in constructing a :class:`Period` from a :class:`Timestamp` or ``np.datetime64`` object with non-zero nanoseconds and ``freq="ns"`` incorrectly truncating the nanoseconds (:issue:`46811`)
-

Plotting
7 changes: 5 additions & 2 deletions pandas/_libs/tslib.pyx
Original file line number Diff line number Diff line change
@@ -27,6 +27,7 @@ cnp.import_array()
import pytz

from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
check_dts_bounds,
dt64_to_dtstruct,
dtstruct_to_dt64,
@@ -75,6 +76,7 @@ def _test_parse_iso8601(ts: str):
cdef:
_TSObject obj
int out_local = 0, out_tzoffset = 0
NPY_DATETIMEUNIT out_bestunit

obj = _TSObject()

@@ -83,7 +85,7 @@ def _test_parse_iso8601(ts: str):
elif ts == 'today':
return Timestamp.now().normalize()

string_to_dts(ts, &obj.dts, &out_local, &out_tzoffset, True)
string_to_dts(ts, &obj.dts, &out_bestunit, &out_local, &out_tzoffset, True)
obj.value = dtstruct_to_dt64(&obj.dts)
check_dts_bounds(&obj.dts)
if out_local == 1:
@@ -428,6 +430,7 @@ cpdef array_to_datetime(
ndarray[int64_t] iresult
ndarray[object] oresult
npy_datetimestruct dts
NPY_DATETIMEUNIT out_bestunit
bint utc_convert = bool(utc)
bint seen_integer = False
bint seen_string = False
@@ -516,7 +519,7 @@ cpdef array_to_datetime(
continue

string_to_dts_failed = string_to_dts(
val, &dts, &out_local,
val, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if string_to_dts_failed:
3 changes: 2 additions & 1 deletion pandas/_libs/tslibs/conversion.pyx
Original file line number Diff line number Diff line change
@@ -586,6 +586,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
int out_local = 0, out_tzoffset = 0, string_to_dts_failed
datetime dt
int64_t ival
NPY_DATETIMEUNIT out_bestunit

if len(ts) == 0 or ts in nat_strings:
ts = NaT
@@ -604,7 +605,7 @@ cdef _TSObject _convert_str_to_tsobject(object ts, tzinfo tz, str unit,
# equiv: datetime.today().replace(tzinfo=tz)
else:
string_to_dts_failed = string_to_dts(
ts, &dts, &out_local,
ts, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/np_datetime.pxd
Original file line number Diff line number Diff line change
@@ -90,6 +90,7 @@ cdef NPY_DATETIMEUNIT get_datetime64_unit(object obj) nogil
cdef int string_to_dts(
str val,
npy_datetimestruct* dts,
NPY_DATETIMEUNIT* out_bestunit,
int* out_local,
int* out_tzoffset,
bint want_exc,
4 changes: 3 additions & 1 deletion pandas/_libs/tslibs/np_datetime.pyx
Original file line number Diff line number Diff line change
@@ -46,6 +46,7 @@ cdef extern from "src/datetime/np_datetime.h":
cdef extern from "src/datetime/np_datetime_strings.h":
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local, int *out_tzoffset)


@@ -255,6 +256,7 @@ cdef inline int64_t pydate_to_dt64(date val, npy_datetimestruct *dts):
cdef inline int string_to_dts(
str val,
npy_datetimestruct* dts,
NPY_DATETIMEUNIT* out_bestunit,
int* out_local,
int* out_tzoffset,
bint want_exc,
@@ -265,7 +267,7 @@ cdef inline int string_to_dts(

buf = get_c_string_buf_and_size(val, &length)
return parse_iso_8601_datetime(buf, length, want_exc,
dts, out_local, out_tzoffset)
dts, out_bestunit, out_local, out_tzoffset)


cpdef ndarray astype_overflowsafe(
37 changes: 37 additions & 0 deletions pandas/_libs/tslibs/parsing.pyx
Original file line number Diff line number Diff line change
@@ -53,6 +53,11 @@ from pandas._libs.tslibs.nattype cimport (
c_NaT as NaT,
c_nat_strings as nat_strings,
)
from pandas._libs.tslibs.np_datetime cimport (
NPY_DATETIMEUNIT,
npy_datetimestruct,
string_to_dts,
)
from pandas._libs.tslibs.offsets cimport is_offset_object
from pandas._libs.tslibs.util cimport (
get_c_string_buf_and_size,
@@ -350,6 +355,11 @@ cdef parse_datetime_string_with_reso(
"""
cdef:
object parsed, reso
bint string_to_dts_failed
npy_datetimestruct dts
NPY_DATETIMEUNIT out_bestunit
int out_local
int out_tzoffset

if not _does_string_look_like_datetime(date_string):
raise ValueError('Given date string not likely a datetime.')
@@ -358,6 +368,33 @@ cdef parse_datetime_string_with_reso(
if parsed is not None:
return parsed, reso

# Try iso8601 first, as it handles nanoseconds
# TODO: does this render some/all of parse_delimited_date redundant?
string_to_dts_failed = string_to_dts(
date_string, &dts, &out_bestunit, &out_local,
&out_tzoffset, False
)
if not string_to_dts_failed:
if dts.ps != 0 or out_local:
# TODO: the not-out_local case we could do without Timestamp;
# avoid circular import
from pandas import Timestamp
parsed = Timestamp(date_string)
else:
parsed = datetime(dts.year, dts.month, dts.day, dts.hour, dts.min, dts.sec, dts.us)
reso = {
NPY_DATETIMEUNIT.NPY_FR_Y: "year",
NPY_DATETIMEUNIT.NPY_FR_M: "month",
NPY_DATETIMEUNIT.NPY_FR_D: "day",
NPY_DATETIMEUNIT.NPY_FR_h: "hour",
NPY_DATETIMEUNIT.NPY_FR_m: "minute",
NPY_DATETIMEUNIT.NPY_FR_s: "second",
NPY_DATETIMEUNIT.NPY_FR_ms: "millisecond",
NPY_DATETIMEUNIT.NPY_FR_us: "microsecond",
NPY_DATETIMEUNIT.NPY_FR_ns: "nanosecond",
}[out_bestunit]
return parsed, reso

try:
return _parse_dateabbr_string(date_string, _DEFAULT_DATETIME, freq)
except DateParseError:
3 changes: 3 additions & 0 deletions pandas/_libs/tslibs/period.pyx
Original file line number Diff line number Diff line change
@@ -2584,10 +2584,13 @@ class Period(_Period):
dt = value
if freq is None:
raise ValueError('Must supply freq for datetime value')
if isinstance(dt, Timestamp):
nanosecond = dt.nanosecond
elif util.is_datetime64_object(value):
dt = Timestamp(value)
if freq is None:
raise ValueError('Must supply freq for datetime value')
nanosecond = dt.nanosecond
elif PyDate_Check(value):
dt = datetime(year=value.year, month=value.month, day=value.day)
if freq is None:
29 changes: 28 additions & 1 deletion pandas/_libs/tslibs/src/datetime/np_datetime_strings.c
Original file line number Diff line number Diff line change
@@ -68,11 +68,13 @@ This file implements string parsing and creation for NumPy datetime.
*/
int parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local, int *out_tzoffset) {
int year_leap = 0;
int i, numdigits;
const char *substr;
int sublen;
NPY_DATETIMEUNIT bestunit = NPY_FR_GENERIC;

/* If year-month-day are separated by a valid separator,
* months/days without leading zeroes will be parsed
@@ -137,6 +139,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
if (out_local != NULL) {
*out_local = 0;
}
bestunit = NPY_FR_Y;
goto finish;
}

@@ -182,6 +185,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,

/* Next character must be the separator, start of day, or end of string */
if (sublen == 0) {
bestunit = NPY_FR_M;
/* Forbid YYYYMM. Parsed instead as YYMMDD by someone else. */
if (!has_ymd_sep) {
goto parse_error;
@@ -231,6 +235,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
if (out_local != NULL) {
*out_local = 0;
}
bestunit = NPY_FR_D;
goto finish;
}

@@ -269,6 +274,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
if (!hour_was_2_digits) {
goto parse_error;
}
bestunit = NPY_FR_h;
goto finish;
}

@@ -310,6 +316,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

if (sublen == 0) {
bestunit = NPY_FR_m;
goto finish;
}

@@ -354,6 +361,7 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
++substr;
--sublen;
} else {
bestunit = NPY_FR_s;
goto parse_timezone;
}

@@ -370,6 +378,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

if (sublen == 0 || !isdigit(*substr)) {
if (numdigits > 3) {
bestunit = NPY_FR_us;
} else {
bestunit = NPY_FR_ms;
}
goto parse_timezone;
}

@@ -386,6 +399,11 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

if (sublen == 0 || !isdigit(*substr)) {
if (numdigits > 3) {
bestunit = NPY_FR_ps;
} else {
bestunit = NPY_FR_ns;
}
goto parse_timezone;
}

@@ -401,8 +419,14 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}
}

if (numdigits > 3) {
bestunit = NPY_FR_as;
} else {
bestunit = NPY_FR_fs;
}

parse_timezone:
/* trim any whitespace between time/timeezone */
/* trim any whitespace between time/timezone */
while (sublen > 0 && isspace(*substr)) {
++substr;
--sublen;
@@ -521,6 +545,9 @@ int parse_iso_8601_datetime(const char *str, int len, int want_exc,
}

finish:
if (out_bestunit != NULL) {
*out_bestunit = bestunit;
}
return 0;

parse_error:
1 change: 1 addition & 0 deletions pandas/_libs/tslibs/src/datetime/np_datetime_strings.h
Original file line number Diff line number Diff line change
@@ -56,6 +56,7 @@ This file implements string parsing and creation for NumPy datetime.
int
parse_iso_8601_datetime(const char *str, int len, int want_exc,
npy_datetimestruct *out,
NPY_DATETIMEUNIT *out_bestunit,
int *out_local,
int *out_tzoffset);

19 changes: 18 additions & 1 deletion pandas/tests/scalar/period/test_period.py
Original file line number Diff line number Diff line change
@@ -113,6 +113,21 @@ def test_construction(self):
with pytest.raises(TypeError, match="pass as a string instead"):
Period("1982", freq=("Min", 1))

def test_construction_from_timestamp_nanos(self):
# GH#46811 don't drop nanos from Timestamp
ts = Timestamp("2022-04-20 09:23:24.123456789")
per = Period(ts, freq="ns")

# should losslessly round-trip, not lose the 789
rt = per.to_timestamp()
assert rt == ts

# same thing but from a datetime64 object
dt64 = ts.asm8
per2 = Period(dt64, freq="ns")
rt2 = per2.to_timestamp()
assert rt2.asm8 == dt64

def test_construction_bday(self):

# Biz day construction, roll forward if non-weekday
@@ -324,8 +339,10 @@ def test_constructor_infer_freq(self):
p = Period("2007-01-01 07:10:15.123")
assert p.freq == "L"

# We see that there are 6 digits after the decimal, so get microsecond
# even though they are all zeros.
p = Period("2007-01-01 07:10:15.123000")
assert p.freq == "L"
assert p.freq == "U"

p = Period("2007-01-01 07:10:15.123400")
assert p.freq == "U"
6 changes: 6 additions & 0 deletions pandas/tests/tslibs/test_parsing.py
Original file line number Diff line number Diff line change
@@ -23,6 +23,12 @@ def test_parse_time_string():
assert parsed == parsed_lower


def test_parse_time_string_nanosecond_reso():
# GH#46811
parsed, reso = parse_time_string("2022-04-20 09:19:19.123456789")
assert reso == "nanosecond"


def test_parse_time_string_invalid_type():
# Raise on invalid input, don't just return it
msg = "Argument 'arg' has incorrect type (expected str, got tuple)"