Skip to content

Commit 7742e36

Browse files
committed
Merge remote-tracking branch 'upstream/master' into remove-sparse
2 parents f1afc8f + 372a9a0 commit 7742e36

File tree

21 files changed

+239
-454
lines changed

21 files changed

+239
-454
lines changed

MANIFEST.in

+4
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ graft pandas
1515
global-exclude *.bz2
1616
global-exclude *.csv
1717
global-exclude *.dta
18+
global-exclude *.feather
1819
global-exclude *.gz
1920
global-exclude *.h5
2021
global-exclude *.html
@@ -24,7 +25,10 @@ global-exclude *.pickle
2425
global-exclude *.png
2526
global-exclude *.pyc
2627
global-exclude *.pyd
28+
global-exclude *.ods
29+
global-exclude *.odt
2730
global-exclude *.sas7bdat
31+
global-exclude *.sav
2832
global-exclude *.so
2933
global-exclude *.xls
3034
global-exclude *.xlsm

doc/source/user_guide/io.rst

+1-1
Original file line numberDiff line numberDiff line change
@@ -4860,7 +4860,7 @@ The above example creates a partitioned dataset that may look like:
48604860
from shutil import rmtree
48614861
try:
48624862
rmtree('test')
4863-
except Exception:
4863+
except OSError:
48644864
pass
48654865
48664866
.. _io.sql:

doc/source/whatsnew/v1.0.0.rst

+11-7
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,15 @@ Other enhancements
3737
(:issue:`28368`)
3838
-
3939

40+
41+
Build Changes
42+
^^^^^^^^^^^^^
43+
44+
Pandas has added a `pyproject.toml <https://www.python.org/dev/peps/pep-0517/>`_ file and will no longer include
45+
cythonized files in the source distribution uploaded to PyPI (:issue:`28341`, :issue:`20775`). If you're installing
46+
a built distribution (wheel) or via conda, this shouldn't have any effect on you. If you're building pandas from
47+
source, you should no longer need to install Cython into your build environment before calling ``pip install pandas``.
48+
4049
.. _whatsnew_1000.api_breaking:
4150

4251
Backwards incompatible API changes
@@ -116,6 +125,7 @@ Performance improvements
116125
Bug fixes
117126
~~~~~~~~~
118127

128+
- Bug in :meth:`DataFrame.to_html` when using ``formatters=<list>`` and ``max_cols`` together. (:issue:`25955`)
119129

120130
Categorical
121131
^^^^^^^^^^^
@@ -180,6 +190,7 @@ Indexing
180190
^^^^^^^^
181191

182192
- Bug in assignment using a reverse slicer (:issue:`26939`)
193+
- Bug in :meth:`DataFrame.explode` would duplicate frame in the presence of duplicates in the index (:issue:`28010`)
183194
- Bug in reindexing a :meth:`PeriodIndex` with another type of index that contained a `Period` (:issue:`28323`) (:issue:`28337`)
184195
- Fix assignment of column via `.loc` with numpy non-ns datetime type (:issue:`27395`)
185196

@@ -235,13 +246,6 @@ Sparse
235246
-
236247
-
237248

238-
239-
Build Changes
240-
^^^^^^^^^^^^^
241-
- Fixed pyqt development dependency issue because of different pyqt package name in conda and PyPI (:issue:`26838`)
242-
- Added a `pyproject.toml <https://www.python.org/dev/peps/pep-0517/>`_ file (:issue:`20775`)
243-
244-
245249
ExtensionArray
246250
^^^^^^^^^^^^^^
247251

pandas/core/frame.py

+43-19
Original file line numberDiff line numberDiff line change
@@ -5253,7 +5253,6 @@ def reorder_levels(self, order, axis=0):
52535253

52545254
def _combine_frame(self, other, func, fill_value=None, level=None):
52555255
this, other = self.align(other, join="outer", level=level, copy=False)
5256-
new_index, new_columns = this.index, this.columns
52575256

52585257
if fill_value is None:
52595258
# since _arith_op may be called in a loop, avoid function call
@@ -5271,38 +5270,62 @@ def _arith_op(left, right):
52715270

52725271
if ops.should_series_dispatch(this, other, func):
52735272
# iterate over columns
5274-
return ops.dispatch_to_series(this, other, _arith_op)
5273+
new_data = ops.dispatch_to_series(this, other, _arith_op)
52755274
else:
52765275
with np.errstate(all="ignore"):
5277-
result = _arith_op(this.values, other.values)
5278-
result = dispatch_fill_zeros(func, this.values, other.values, result)
5279-
return self._constructor(
5280-
result, index=new_index, columns=new_columns, copy=False
5281-
)
5276+
res_values = _arith_op(this.values, other.values)
5277+
new_data = dispatch_fill_zeros(func, this.values, other.values, res_values)
5278+
return this._construct_result(other, new_data, _arith_op)
52825279

52835280
def _combine_match_index(self, other, func, level=None):
52845281
left, right = self.align(other, join="outer", axis=0, level=level, copy=False)
52855282
# at this point we have `left.index.equals(right.index)`
52865283

52875284
if left._is_mixed_type or right._is_mixed_type:
52885285
# operate column-wise; avoid costly object-casting in `.values`
5289-
return ops.dispatch_to_series(left, right, func)
5286+
new_data = ops.dispatch_to_series(left, right, func)
52905287
else:
52915288
# fastpath --> operate directly on values
52925289
with np.errstate(all="ignore"):
52935290
new_data = func(left.values.T, right.values).T
5294-
return self._constructor(
5295-
new_data, index=left.index, columns=self.columns, copy=False
5296-
)
5291+
return left._construct_result(other, new_data, func)
52975292

52985293
def _combine_match_columns(self, other: Series, func, level=None):
52995294
left, right = self.align(other, join="outer", axis=1, level=level, copy=False)
53005295
# at this point we have `left.columns.equals(right.index)`
5301-
return ops.dispatch_to_series(left, right, func, axis="columns")
5296+
new_data = ops.dispatch_to_series(left, right, func, axis="columns")
5297+
return left._construct_result(right, new_data, func)
53025298

53035299
def _combine_const(self, other, func):
53045300
# scalar other or np.ndim(other) == 0
5305-
return ops.dispatch_to_series(self, other, func)
5301+
new_data = ops.dispatch_to_series(self, other, func)
5302+
return self._construct_result(other, new_data, func)
5303+
5304+
def _construct_result(self, other, result, func):
5305+
"""
5306+
Wrap the result of an arithmetic, comparison, or logical operation.
5307+
5308+
Parameters
5309+
----------
5310+
other : object
5311+
result : DataFrame
5312+
func : binary operator
5313+
5314+
Returns
5315+
-------
5316+
DataFrame
5317+
5318+
Notes
5319+
-----
5320+
`func` is included for compat with SparseDataFrame signature, is not
5321+
needed here.
5322+
"""
5323+
out = self._constructor(result, index=self.index, copy=False)
5324+
# Pin columns instead of passing to constructor for compat with
5325+
# non-unique columns case
5326+
out.columns = self.columns
5327+
return out
5328+
# TODO: finalize? we do for SparseDataFrame
53065329

53075330
def combine(self, other, func, fill_value=None, overwrite=True):
53085331
"""
@@ -6206,12 +6229,13 @@ def explode(self, column: Union[str, Tuple]) -> "DataFrame":
62066229
if not self.columns.is_unique:
62076230
raise ValueError("columns must be unique")
62086231

6209-
result = self[column].explode()
6210-
return (
6211-
self.drop([column], axis=1)
6212-
.join(result)
6213-
.reindex(columns=self.columns, copy=False)
6214-
)
6232+
df = self.reset_index(drop=True)
6233+
result = df[column].explode()
6234+
result = df.drop([column], axis=1).join(result)
6235+
result.index = self.index.take(result.index)
6236+
result = result.reindex(columns=self.columns, copy=False)
6237+
6238+
return result
62156239

62166240
def unstack(self, level=-1, fill_value=None):
62176241
"""

pandas/core/generic.py

+9-26
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,6 @@
3333
from pandas.util._decorators import Appender, Substitution, rewrite_axis_style_signature
3434
from pandas.util._validators import validate_bool_kwarg, validate_fillna_kwargs
3535

36-
from pandas.core.dtypes.cast import maybe_promote, maybe_upcast_putmask
3736
from pandas.core.dtypes.common import (
3837
ensure_int64,
3938
ensure_object,
@@ -968,15 +967,12 @@ def squeeze(self, axis=None):
968967
1
969968
"""
970969
axis = self._AXIS_NAMES if axis is None else (self._get_axis_number(axis),)
971-
try:
972-
return self.iloc[
973-
tuple(
974-
0 if i in axis and len(a) == 1 else slice(None)
975-
for i, a in enumerate(self.axes)
976-
)
977-
]
978-
except Exception:
979-
return self
970+
return self.iloc[
971+
tuple(
972+
0 if i in axis and len(a) == 1 else slice(None)
973+
for i, a in enumerate(self.axes)
974+
)
975+
]
980976

981977
def swaplevel(self, i=-2, j=-1, axis=0):
982978
"""
@@ -9042,22 +9038,9 @@ def _where(
90429038
# try to not change dtype at first (if try_quick)
90439039
if try_quick:
90449040

9045-
try:
9046-
new_other = com.values_from_object(self)
9047-
new_other = new_other.copy()
9048-
new_other[icond] = other
9049-
other = new_other
9050-
except Exception:
9051-
try_quick = False
9052-
9053-
# let's create a new (if we failed at the above
9054-
# or not try_quick
9055-
if not try_quick:
9056-
9057-
dtype, fill_value = maybe_promote(other.dtype)
9058-
new_other = np.empty(len(icond), dtype=dtype)
9059-
new_other.fill(fill_value)
9060-
maybe_upcast_putmask(new_other, icond, other)
9041+
new_other = com.values_from_object(self)
9042+
new_other = new_other.copy()
9043+
new_other[icond] = other
90619044
other = new_other
90629045

90639046
else:

pandas/core/internals/blocks.py

+31-30
Original file line numberDiff line numberDiff line change
@@ -600,41 +600,42 @@ def _astype(self, dtype, copy=False, errors="raise", **kwargs):
600600
return self.copy()
601601
return self
602602

603-
try:
604-
# force the copy here
605-
if self.is_extension:
606-
values = self.values.astype(dtype)
607-
else:
608-
if issubclass(dtype.type, str):
603+
# force the copy here
604+
if self.is_extension:
605+
# TODO: Should we try/except this astype?
606+
values = self.values.astype(dtype)
607+
else:
608+
if issubclass(dtype.type, str):
609609

610-
# use native type formatting for datetime/tz/timedelta
611-
if self.is_datelike:
612-
values = self.to_native_types()
613-
614-
# astype formatting
615-
else:
616-
values = self.get_values()
610+
# use native type formatting for datetime/tz/timedelta
611+
if self.is_datelike:
612+
values = self.to_native_types()
617613

614+
# astype formatting
618615
else:
619-
values = self.get_values(dtype=dtype)
620-
621-
# _astype_nansafe works fine with 1-d only
622-
vals1d = values.ravel()
623-
values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
616+
values = self.get_values()
624617

625-
# TODO(extension)
626-
# should we make this attribute?
627-
if isinstance(values, np.ndarray):
628-
values = values.reshape(self.shape)
618+
else:
619+
values = self.get_values(dtype=dtype)
629620

630-
except Exception:
631-
# e.g. astype_nansafe can fail on object-dtype of strings
632-
# trying to convert to float
633-
if errors == "raise":
634-
raise
635-
newb = self.copy() if copy else self
636-
else:
637-
newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim)
621+
# _astype_nansafe works fine with 1-d only
622+
vals1d = values.ravel()
623+
try:
624+
values = astype_nansafe(vals1d, dtype, copy=True, **kwargs)
625+
except (ValueError, TypeError):
626+
# e.g. astype_nansafe can fail on object-dtype of strings
627+
# trying to convert to float
628+
if errors == "raise":
629+
raise
630+
newb = self.copy() if copy else self
631+
return newb
632+
633+
# TODO(extension)
634+
# should we make this attribute?
635+
if isinstance(values, np.ndarray):
636+
values = values.reshape(self.shape)
637+
638+
newb = make_block(values, placement=self.mgr_locs, ndim=self.ndim)
638639

639640
if newb.is_numeric and self.is_numeric:
640641
if newb.shape != self.shape:

pandas/core/nanops.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -1300,10 +1300,12 @@ def _ensure_numeric(x):
13001300
elif not (is_float(x) or is_integer(x) or is_complex(x)):
13011301
try:
13021302
x = float(x)
1303-
except Exception:
1303+
except ValueError:
1304+
# e.g. "1+1j" or "foo"
13041305
try:
13051306
x = complex(x)
1306-
except Exception:
1307+
except ValueError:
1308+
# e.g. "foo"
13071309
raise TypeError(
13081310
"Could not convert {value!s} to numeric".format(value=x)
13091311
)

pandas/core/ops/__init__.py

+5-8
Original file line numberDiff line numberDiff line change
@@ -512,12 +512,7 @@ def column_op(a, b):
512512
raise NotImplementedError(right)
513513

514514
new_data = expressions.evaluate(column_op, str_rep, left, right)
515-
516-
result = left._constructor(new_data, index=left.index, copy=False)
517-
# Pin columns instead of passing to constructor for compat with
518-
# non-unique columns case
519-
result.columns = left.columns
520-
return result
515+
return new_data
521516

522517

523518
def dispatch_to_extension_op(
@@ -1056,7 +1051,8 @@ def f(self, other, axis=default_axis, level=None):
10561051
# Another DataFrame
10571052
if not self._indexed_same(other):
10581053
self, other = self.align(other, "outer", level=level, copy=False)
1059-
return dispatch_to_series(self, other, na_op, str_rep)
1054+
new_data = dispatch_to_series(self, other, na_op, str_rep)
1055+
return self._construct_result(other, new_data, na_op)
10601056

10611057
elif isinstance(other, ABCSeries):
10621058
return _combine_series_frame(
@@ -1086,7 +1082,8 @@ def f(self, other):
10861082
raise ValueError(
10871083
"Can only compare identically-labeled DataFrame objects"
10881084
)
1089-
return dispatch_to_series(self, other, func, str_rep)
1085+
new_data = dispatch_to_series(self, other, func, str_rep)
1086+
return self._construct_result(other, new_data, func)
10901087

10911088
elif isinstance(other, ABCSeries):
10921089
return _combine_series_frame(

pandas/core/series.py

-5
Original file line numberDiff line numberDiff line change
@@ -1277,11 +1277,6 @@ def _set_with(self, key, value):
12771277

12781278
if is_scalar(key):
12791279
key = [key]
1280-
elif not isinstance(key, (list, Series, np.ndarray)):
1281-
try:
1282-
key = list(key)
1283-
except Exception:
1284-
key = [key]
12851280

12861281
if isinstance(key, Index):
12871282
key_type = key.inferred_type

pandas/io/formats/format.py

+7
Original file line numberDiff line numberDiff line change
@@ -657,6 +657,13 @@ def _chk_truncate(self) -> None:
657657
frame = concat(
658658
(frame.iloc[:, :col_num], frame.iloc[:, -col_num:]), axis=1
659659
)
660+
# truncate formatter
661+
if isinstance(self.formatters, (list, tuple)):
662+
truncate_fmt = self.formatters
663+
self.formatters = [
664+
*truncate_fmt[:col_num],
665+
*truncate_fmt[-col_num:],
666+
]
660667
self.tr_col_num = col_num
661668
if truncate_v:
662669
# cast here since if truncate_v is True, max_rows_adj is not None

0 commit comments

Comments
 (0)