Skip to content

Bug fix - extension array with 2d datetime64 #46140

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
e46c85f
Update format.py
Feb 24, 2022
0a51d25
Update test_format.py
Feb 24, 2022
c6bf639
pre commit
Feb 24, 2022
f857db8
Update test_format.py
Feb 24, 2022
2be9fd2
incompatible types in assignment
Feb 25, 2022
c368578
Merge branch 'bug_ExtensionArray_with_2D_datetime64' of https://githu…
Feb 25, 2022
d286682
add
Feb 26, 2022
6eea976
Update astype.py
Feb 26, 2022
3875560
Incompatible return value type
Feb 26, 2022
00a7e7e
change to list(fmt_values)
Feb 27, 2022
065001f
add
Mar 3, 2022
654b01a
add
Mar 3, 2022
55f4908
add
Mar 5, 2022
fed0694
Update test_format.py
Mar 5, 2022
a77eb63
add ndim condition
Mar 5, 2022
5c6370e
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 5, 2022
54e1316
2d case
Mar 5, 2022
e24e05d
Merge branch 'bug_ExtensionArray_with_2D_datetime64' of https://githu…
Mar 5, 2022
7fed730
change numpy type
Mar 5, 2022
75e6228
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 6, 2022
a8c3b46
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 8, 2022
dee7364
tidy
Mar 8, 2022
3d7556b
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 9, 2022
8100431
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 9, 2022
95b423d
nested format
Mar 10, 2022
7451562
expression and variable
Mar 10, 2022
f71e11f
Update format.py
Mar 10, 2022
d927e42
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 15, 2022
7e5739b
Merge branch 'main' into bug_ExtensionArray_with_2D_datetime64
Mar 16, 2022
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 16 additions & 8 deletions pandas/io/formats/format.py
Original file line number Diff line number Diff line change
Expand Up @@ -1621,18 +1621,23 @@ def __init__(

def _format_strings(self) -> list[str]:
"""we by definition have DO NOT have a TZ"""
values = self.values
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

what is the scenario in which we get here with non-1D values? could we just assert 1d-ness at the top and not worry about the rest?


values = self.values
if not isinstance(values, DatetimeIndex):
values = DatetimeIndex(values)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OK, so there is a deeper bug here in that DatetimeIndex(values) isn't raising when passed a 2D array.


if self.formatter is not None and callable(self.formatter):
return [self.formatter(x) for x in values]
fmt_values = np.array([self.formatter(x) for x in values])
else:
fmt_values = values._data._format_native_types(
na_rep=self.nat_rep, date_format=self.date_format
)

fmt_values = values._data._format_native_types(
na_rep=self.nat_rep, date_format=self.date_format
)
return fmt_values.tolist()
if fmt_values.ndim > 1:
nested_formatter = GenericArrayFormatter(fmt_values)
return list(nested_formatter.get_result())

return list(fmt_values)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

why is anything necessary between L1630-L1640? we know we have DatetimeIndex which is 1D.



class ExtensionArrayFormatter(GenericArrayFormatter):
Expand Down Expand Up @@ -1811,9 +1816,12 @@ def _format_strings(self) -> list[str]:
formatter = self.formatter or get_format_datetime64(
ido, date_format=self.date_format
)
fmt_values = [formatter(x) for x in values]
fmt_values = np.frompyfunc(formatter, 1, 1)(values)
if fmt_values.ndim > 1:
nested_formatter = GenericArrayFormatter(fmt_values)
return list(nested_formatter.get_result())

return fmt_values
return list(fmt_values)


class Timedelta64Formatter(GenericArrayFormatter):
Expand Down
91 changes: 91 additions & 0 deletions pandas/tests/io/formats/test_format.py
Original file line number Diff line number Diff line change
Expand Up @@ -3158,6 +3158,97 @@ def format_func(x):
result = formatter.get_result()
assert result == ["10:10", "12:12"]

def test_datetime64formatter_2d_array(self):
# GH#38390
x = date_range("2018-01-01", periods=10, freq="H").to_numpy()
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nitpick: pls avoid 1-letter variable names. for date_range i like "dti"


formatter = fmt.Datetime64Formatter(x.reshape((5, 2)))
result = formatter.get_result()
assert len(result) == 5
assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00]"
assert result[4].strip() == "[2018-01-01 08:00:00, 2018-01-01 09:00:00]"

formatter = fmt.Datetime64Formatter(x.reshape((2, 5)))
result = formatter.get_result()
assert len(result) == 2
assert result[0].strip() == "[2018-01-01 00:00:00, 2018-01-01 01:00:00, 201..."
assert result[1].strip() == "[2018-01-01 05:00:00, 2018-01-01 06:00:00, 201..."

def test_datetime64formatter_3d_array(self):
# GH#38390
x = date_range("2018-01-01", periods=10, freq="H").to_numpy()

formatter = fmt.Datetime64Formatter(x.reshape((10, 1, 1)))
result = formatter.get_result()
assert len(result) == 10
assert result[0].strip() == "[[2018-01-01 00:00:00]]"
assert result[9].strip() == "[[2018-01-01 09:00:00]]"

def test_datetime64formatter_3d_array_format_func(self):
# GH#38390
x = date_range("2018-01-01", periods=24, freq="H").to_numpy()

def format_func(t):
return t.strftime("%H-%m")

formatter = fmt.Datetime64Formatter(x.reshape((4, 2, 3)), formatter=format_func)
result = formatter.get_result()
assert len(result) == 4
assert result[0].strip() == "[[00-01, 01-01, 02-01], [03-01, 04-01, 05-01]]"
assert result[3].strip() == "[[18-01, 19-01, 20-01], [21-01, 22-01, 23-01]]"


class TestDatetime64TZFormatter:
def test_mixed(self):
# GH#38390
utc = dateutil.tz.tzutc()
x = Series(
[
datetime(2013, 1, 1, tzinfo=utc),
datetime(2013, 1, 1, 12, tzinfo=utc),
NaT,
]
)
result = fmt.Datetime64TZFormatter(x).get_result()
assert len(result) == 3
assert result[0].strip() == "2013-01-01 00:00:00+00:00"
assert result[1].strip() == "2013-01-01 12:00:00+00:00"
assert result[2].strip() == "NaT"

def test_datetime64formatter_1d_array(self):
# GH#38390
x = date_range("2018-01-01", periods=3, freq="H", tz="US/Pacific").to_numpy()
formatter = fmt.Datetime64TZFormatter(x)
result = formatter.get_result()
assert len(result) == 3
assert result[0].strip() == "2018-01-01 00:00:00-08:00"
assert result[1].strip() == "2018-01-01 01:00:00-08:00"
assert result[2].strip() == "2018-01-01 02:00:00-08:00"

def test_datetime64formatter_2d_array(self):
# GH#38390
x = date_range("2018-01-01", periods=10, freq="H", tz="US/Pacific").to_numpy()
formatter = fmt.Datetime64TZFormatter(x.reshape((5, 2)))
result = formatter.get_result()
assert len(result) == 5
assert result[0].strip() == "[2018-01-01 00:00:00-08:00, 2018-01-01 01:00:0..."
assert result[4].strip() == "[2018-01-01 08:00:00-08:00, 2018-01-01 09:00:0..."

def test_datetime64formatter_2d_array_format_func(self):
# GH#38390
x = date_range("2018-01-01", periods=16, freq="H", tz="US/Pacific").to_numpy()

def format_func(t):
return t.strftime("%H-%m %Z")

formatter = fmt.Datetime64TZFormatter(
x.reshape((4, 2, 2)), formatter=format_func
)
result = formatter.get_result()
assert len(result) == 4
assert result[0].strip() == "[[00-01 PST, 01-01 PST], [02-01 PST, 03-01 PST]]"
assert result[3].strip() == "[[12-01 PST, 13-01 PST], [14-01 PST, 15-01 PST]]"


class TestNaTFormatting:
def test_repr(self):
Expand Down