-
-
Notifications
You must be signed in to change notification settings - Fork 18.5k
Bug fix - extension array with 2d datetime64 #46140
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e46c85f
0a51d25
c6bf639
f857db8
2be9fd2
c368578
d286682
6eea976
3875560
00a7e7e
065001f
654b01a
55f4908
fed0694
a77eb63
5c6370e
54e1316
e24e05d
7fed730
75e6228
a8c3b46
dee7364
3d7556b
8100431
95b423d
7451562
f71e11f
d927e42
7e5739b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -1621,18 +1621,23 @@ def __init__( | |
|
||
def _format_strings(self) -> list[str]: | ||
"""we by definition have DO NOT have a TZ""" | ||
values = self.values | ||
|
||
values = self.values | ||
if not isinstance(values, DatetimeIndex): | ||
values = DatetimeIndex(values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. OK, so there is a deeper bug here in that DatetimeIndex(values) isn't raising when passed a 2D array. |
||
|
||
if self.formatter is not None and callable(self.formatter): | ||
return [self.formatter(x) for x in values] | ||
fmt_values = np.array([self.formatter(x) for x in values]) | ||
else: | ||
fmt_values = values._data._format_native_types( | ||
na_rep=self.nat_rep, date_format=self.date_format | ||
) | ||
|
||
fmt_values = values._data._format_native_types( | ||
na_rep=self.nat_rep, date_format=self.date_format | ||
) | ||
return fmt_values.tolist() | ||
if fmt_values.ndim > 1: | ||
nested_formatter = GenericArrayFormatter(fmt_values) | ||
return list(nested_formatter.get_result()) | ||
|
||
return list(fmt_values) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. why is anything necessary between L1630-L1640? we know we have DatetimeIndex which is 1D. |
||
|
||
|
||
class ExtensionArrayFormatter(GenericArrayFormatter): | ||
|
@@ -1811,9 +1816,12 @@ def _format_strings(self) -> list[str]: | |
formatter = self.formatter or get_format_datetime64( | ||
ido, date_format=self.date_format | ||
) | ||
fmt_values = [formatter(x) for x in values] | ||
fmt_values = np.frompyfunc(formatter, 1, 1)(values) | ||
if fmt_values.ndim > 1: | ||
nested_formatter = GenericArrayFormatter(fmt_values) | ||
return list(nested_formatter.get_result()) | ||
|
||
return fmt_values | ||
return list(fmt_values) | ||
|
||
|
||
class Timedelta64Formatter(GenericArrayFormatter): | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -3158,6 +3158,97 @@ def format_func(x): | |
result = formatter.get_result() | ||
assert result == ["10:10", "12:12"] | ||
|
||
def test_datetime64formatter_2d_array(self): | ||
# GH#38390 | ||
x = date_range("2018-01-01", periods=10, freq="H").to_numpy() | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nitpick: pls avoid 1-letter variable names. for date_range i like "dti" |
||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((5, 2))) | ||
result = formatter.get_result() | ||
assert len(result) == 5 | ||
assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00]" | ||
assert result[4].strip() == "[2018-01-01 08:00:00, 2018-01-01 09:00:00]" | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((2, 5))) | ||
result = formatter.get_result() | ||
assert len(result) == 2 | ||
assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00, 201..." | ||
assert result[1].strip() == "[2018-01-01 05:00:00, 2018-01-01 06:00:00, 201..." | ||
|
||
def test_datetime64formatter_3d_array(self): | ||
# GH#38390 | ||
x = date_range("2018-01-01", periods=10, freq="H").to_numpy() | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((10, 1, 1))) | ||
result = formatter.get_result() | ||
assert len(result) == 10 | ||
assert result[0].strip() == "[[2018-01-01 00:00:00]]" | ||
assert result[9].strip() == "[[2018-01-01 09:00:00]]" | ||
|
||
def test_datetime64formatter_3d_array_format_func(self): | ||
# GH#38390 | ||
x = date_range("2018-01-01", periods=24, freq="H").to_numpy() | ||
|
||
def format_func(t): | ||
return t.strftime("%H-%m") | ||
|
||
formatter = fmt.Datetime64Formatter(x.reshape((4, 2, 3)), formatter=format_func) | ||
result = formatter.get_result() | ||
assert len(result) == 4 | ||
assert result[0].strip() == "[[00-01, 01-01, 02-01], [03-01, 04-01, 05-01]]" | ||
assert result[3].strip() == "[[18-01, 19-01, 20-01], [21-01, 22-01, 23-01]]" | ||
|
||
|
||
class TestDatetime64TZFormatter: | ||
weikhor marked this conversation as resolved.
Show resolved
Hide resolved
|
||
def test_mixed(self): | ||
# GH#38390 | ||
utc = dateutil.tz.tzutc() | ||
x = Series( | ||
[ | ||
datetime(2013, 1, 1, tzinfo=utc), | ||
datetime(2013, 1, 1, 12, tzinfo=utc), | ||
NaT, | ||
] | ||
) | ||
result = fmt.Datetime64TZFormatter(x).get_result() | ||
assert len(result) == 3 | ||
assert result[0].strip() == "2013-01-01 00:00:00+00:00" | ||
assert result[1].strip() == "2013-01-01 12:00:00+00:00" | ||
assert result[2].strip() == "NaT" | ||
|
||
def test_datetime64formatter_1d_array(self): | ||
# GH#38390 | ||
x = date_range("2018-01-01", periods=3, freq="H", tz="US/Pacific").to_numpy() | ||
formatter = fmt.Datetime64TZFormatter(x) | ||
result = formatter.get_result() | ||
assert len(result) == 3 | ||
assert result[0].strip() == "2018-01-01 00:00:00-08:00" | ||
assert result[1].strip() == "2018-01-01 01:00:00-08:00" | ||
assert result[2].strip() == "2018-01-01 02:00:00-08:00" | ||
|
||
def test_datetime64formatter_2d_array(self): | ||
# GH#38390 | ||
x = date_range("2018-01-01", periods=10, freq="H", tz="US/Pacific").to_numpy() | ||
formatter = fmt.Datetime64TZFormatter(x.reshape((5, 2))) | ||
result = formatter.get_result() | ||
assert len(result) == 5 | ||
assert result[0].strip() == "[2018-01-01 00:00:00-08:00, 2018-01-01 01:00:0..." | ||
assert result[4].strip() == "[2018-01-01 08:00:00-08:00, 2018-01-01 09:00:0..." | ||
|
||
def test_datetime64formatter_2d_array_format_func(self): | ||
# GH#38390 | ||
x = date_range("2018-01-01", periods=16, freq="H", tz="US/Pacific").to_numpy() | ||
|
||
def format_func(t): | ||
return t.strftime("%H-%m %Z") | ||
|
||
formatter = fmt.Datetime64TZFormatter( | ||
x.reshape((4, 2, 2)), formatter=format_func | ||
) | ||
result = formatter.get_result() | ||
assert len(result) == 4 | ||
assert result[0].strip() == "[[00-01 PST, 01-01 PST], [02-01 PST, 03-01 PST]]" | ||
assert result[3].strip() == "[[12-01 PST, 13-01 PST], [14-01 PST, 15-01 PST]]" | ||
|
||
|
||
class TestNaTFormatting: | ||
def test_repr(self): | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
what is the scenario in which we get here with non-1D values? could we just assert 1d-ness at the top and not worry about the rest?