Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion pandas/core/describe.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,12 @@ def describe_numeric_1d(series: Series, percentiles: Sequence[float]) -> Series:
+ series.quantile(percentiles).tolist()
+ [series.max()]
)
return Series(d, index=stat_index, name=series.name)

result = Series(d, index=stat_index, name=series.name)
if is_numeric_dtype(series):
# GH#48340 - don't rely on inference, always return float on numeric data
result = result.astype(float)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Nit: Can we just set dtype=... when creating the Series?

return result


def describe_categorical_1d(
Expand Down
23 changes: 23 additions & 0 deletions pandas/tests/series/methods/test_describe.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import numpy as np
import pytest

from pandas import (
Period,
Expand Down Expand Up @@ -149,3 +150,25 @@ def test_datetime_is_numeric_includes_datetime(self):
index=["count", "mean", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)

@pytest.mark.parametrize(
"dtype", ["int32", "int64", "uint32", "uint64", "float32", "float64"]
)
def test_numeric_result_is_float(self, dtype):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Maybe any_real_numpy_dtype? Not sure if this fits the requirement here of what you want to test

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks - I also tried complex and found this was buggy in that case. Fixed and added all numeric dtypes.

# GH#48340 - describe should always return dtype float on numeric input
ser = Series([0, 1], dtype=dtype)
result = ser.describe()
expected = Series(
[
2.0,
0.5,
ser.std(),
0,
0.25,
0.5,
0.75,
1.0,
],
index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"],
)
tm.assert_series_equal(result, expected)