Skip to content

Commit 96032af

Browse files
committed
2 parents e009d61 + 752cd42 commit 96032af

File tree

92 files changed

+1867
-788
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

92 files changed

+1867
-788
lines changed

Makefile

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,3 +32,9 @@ check:
3232
--included-file-extensions="py" \
3333
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored \
3434
pandas/
35+
36+
python3 scripts/validate_unwanted_patterns.py \
37+
--validation-type="private_import_across_module" \
38+
--included-file-extensions="py" \
39+
--excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/
40+
pandas/

asv_bench/benchmarks/groupby.py

Lines changed: 32 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -627,49 +627,63 @@ def time_first(self):
627627

628628

629629
class TransformEngine:
630-
def setup(self):
630+
631+
param_names = ["parallel"]
632+
params = [[True, False]]
633+
634+
def setup(self, parallel):
631635
N = 10 ** 3
632636
data = DataFrame(
633637
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
634638
columns=[0, 1],
635639
)
640+
self.parallel = parallel
636641
self.grouper = data.groupby(0)
637642

638-
def time_series_numba(self):
643+
def time_series_numba(self, parallel):
639644
def function(values, index):
640645
return values * 5
641646

642-
self.grouper[1].transform(function, engine="numba")
647+
self.grouper[1].transform(
648+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
649+
)
643650

644-
def time_series_cython(self):
651+
def time_series_cython(self, parallel):
645652
def function(values):
646653
return values * 5
647654

648655
self.grouper[1].transform(function, engine="cython")
649656

650-
def time_dataframe_numba(self):
657+
def time_dataframe_numba(self, parallel):
651658
def function(values, index):
652659
return values * 5
653660

654-
self.grouper.transform(function, engine="numba")
661+
self.grouper.transform(
662+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
663+
)
655664

656-
def time_dataframe_cython(self):
665+
def time_dataframe_cython(self, parallel):
657666
def function(values):
658667
return values * 5
659668

660669
self.grouper.transform(function, engine="cython")
661670

662671

663672
class AggEngine:
664-
def setup(self):
673+
674+
param_names = ["parallel"]
675+
params = [[True, False]]
676+
677+
def setup(self, parallel):
665678
N = 10 ** 3
666679
data = DataFrame(
667680
{0: [str(i) for i in range(100)] * N, 1: list(range(100)) * N},
668681
columns=[0, 1],
669682
)
683+
self.parallel = parallel
670684
self.grouper = data.groupby(0)
671685

672-
def time_series_numba(self):
686+
def time_series_numba(self, parallel):
673687
def function(values, index):
674688
total = 0
675689
for i, value in enumerate(values):
@@ -679,9 +693,11 @@ def function(values, index):
679693
total += value * 2
680694
return total
681695

682-
self.grouper[1].agg(function, engine="numba")
696+
self.grouper[1].agg(
697+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
698+
)
683699

684-
def time_series_cython(self):
700+
def time_series_cython(self, parallel):
685701
def function(values):
686702
total = 0
687703
for i, value in enumerate(values):
@@ -693,7 +709,7 @@ def function(values):
693709

694710
self.grouper[1].agg(function, engine="cython")
695711

696-
def time_dataframe_numba(self):
712+
def time_dataframe_numba(self, parallel):
697713
def function(values, index):
698714
total = 0
699715
for i, value in enumerate(values):
@@ -703,9 +719,11 @@ def function(values, index):
703719
total += value * 2
704720
return total
705721

706-
self.grouper.agg(function, engine="numba")
722+
self.grouper.agg(
723+
function, engine="numba", engine_kwargs={"parallel": self.parallel}
724+
)
707725

708-
def time_dataframe_cython(self):
726+
def time_dataframe_cython(self, parallel):
709727
def function(values):
710728
total = 0
711729
for i, value in enumerate(values):

ci/build39.sh

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,9 @@
33

44
sudo apt-get install build-essential gcc xvfb
55
pip install --no-deps -U pip wheel setuptools
6-
pip install python-dateutil pytz pytest pytest-xdist hypothesis
6+
pip install numpy python-dateutil pytz pytest pytest-xdist hypothesis
77
pip install cython --pre # https://github.com/cython/cython/issues/3395
88

9-
git clone https://github.com/numpy/numpy
10-
cd numpy
11-
python setup.py build_ext --inplace
12-
python setup.py install
13-
cd ..
14-
rm -rf numpy
15-
169
python setup.py build_ext -inplace
1710
python -m pip install --no-build-isolation -e .
1811

ci/code_checks.sh

Lines changed: 11 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -116,11 +116,19 @@ if [[ -z "$CHECK" || "$CHECK" == "lint" ]]; then
116116
fi
117117
RET=$(($RET + $?)) ; echo $MSG "DONE"
118118

119-
MSG='Check for use of private module attribute access' ; echo $MSG
119+
MSG='Check for import of private attributes across modules' ; echo $MSG
120120
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
121-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
121+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored --format="##[error]{source_path}:{line_number}:{msg}" pandas/
122122
else
123-
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
123+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_import_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored pandas/
124+
fi
125+
RET=$(($RET + $?)) ; echo $MSG "DONE"
126+
127+
MSG='Check for use of private functions across modules' ; echo $MSG
128+
if [[ "$GITHUB_ACTIONS" == "true" ]]; then
129+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ --format="##[error]{source_path}:{line_number}:{msg}" pandas/
130+
else
131+
$BASE_DIR/scripts/validate_unwanted_patterns.py --validation-type="private_function_across_module" --included-file-extensions="py" --excluded-file-paths=pandas/tests,asv_bench/,pandas/_vendored,doc/ pandas/
124132
fi
125133
RET=$(($RET + $?)) ; echo $MSG "DONE"
126134

doc/source/user_guide/indexing.rst

Lines changed: 12 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -313,8 +313,10 @@ Selection by label
313313
314314
.. warning::
315315

316-
Starting in 0.21.0, pandas will show a ``FutureWarning`` if indexing with a list with missing labels. In the future
317-
this will raise a ``KeyError``. See :ref:`list-like Using loc with missing keys in a list is Deprecated <indexing.deprecate_loc_reindex_listlike>`.
316+
.. versionchanged:: 1.0.0
317+
318+
Pandas will raise a ``KeyError`` if indexing with a list with missing labels. See :ref:`list-like Using loc with
319+
missing keys in a list is Deprecated <indexing.deprecate_loc_reindex_listlike>`.
318320

319321
pandas provides a suite of methods in order to have **purely label based indexing**. This is a strict inclusion based protocol.
320322
Every label asked for must be in the index, or a ``KeyError`` will be raised.
@@ -578,8 +580,9 @@ IX indexer is deprecated
578580

579581
.. warning::
580582

581-
Starting in 0.20.0, the ``.ix`` indexer is deprecated, in favor of the more strict ``.iloc``
582-
and ``.loc`` indexers.
583+
.. versionchanged:: 1.0.0
584+
585+
The ``.ix`` indexer was removed, in favor of the more strict ``.iloc`` and ``.loc`` indexers.
583586

584587
``.ix`` offers a lot of magic on the inference of what the user wants to do. To wit, ``.ix`` can decide
585588
to index *positionally* OR via *labels* depending on the data type of the index. This has caused quite a
@@ -636,11 +639,13 @@ Indexing with list with missing labels is deprecated
636639

637640
.. warning::
638641

639-
Starting in 0.21.0, using ``.loc`` or ``[]`` with a list with one or more missing labels, is deprecated, in favor of ``.reindex``.
642+
.. versionchanged:: 1.0.0
643+
644+
Using ``.loc`` or ``[]`` with a list with one or more missing labels will no longer reindex, in favor of ``.reindex``.
640645

641646
In prior versions, using ``.loc[list-of-labels]`` would work as long as *at least 1* of the keys was found (otherwise it
642-
would raise a ``KeyError``). This behavior is deprecated and will show a warning message pointing to this section. The
643-
recommended alternative is to use ``.reindex()``.
647+
would raise a ``KeyError``). This behavior was changed and will now raise a ``KeyError`` if at least one label is missing.
648+
The recommended alternative is to use ``.reindex()``.
644649

645650
For example.
646651

doc/source/user_guide/io.rst

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -930,7 +930,7 @@ take full advantage of the flexibility of the date parsing API:
930930
.. ipython:: python
931931
932932
df = pd.read_csv('tmp.csv', header=None, parse_dates=date_spec,
933-
date_parser=pd.io.date_converters.parse_date_time)
933+
date_parser=pd.to_datetime)
934934
df
935935
936936
Pandas will try to call the ``date_parser`` function in three different ways. If
@@ -942,11 +942,6 @@ an exception is raised, the next one is tried:
942942
2. If #1 fails, ``date_parser`` is called with all the columns
943943
concatenated row-wise into a single array (e.g., ``date_parser(['2013 1', '2013 2'])``).
944944

945-
3. If #2 fails, ``date_parser`` is called once for every row with one or more
946-
string arguments from the columns indicated with `parse_dates`
947-
(e.g., ``date_parser('2013', '1')`` for the first row, ``date_parser('2013', '2')``
948-
for the second, etc.).
949-
950945
Note that performance-wise, you should try these methods of parsing dates in order:
951946

952947
1. Try to infer the format using ``infer_datetime_format=True`` (see section below).
@@ -958,14 +953,6 @@ Note that performance-wise, you should try these methods of parsing dates in ord
958953
For optimal performance, this should be vectorized, i.e., it should accept arrays
959954
as arguments.
960955

961-
You can explore the date parsing functionality in
962-
`date_converters.py <https://github.com/pandas-dev/pandas/blob/master/pandas/io/date_converters.py>`__
963-
and add your own. We would love to turn this module into a community supported
964-
set of date/time parsers. To get you started, ``date_converters.py`` contains
965-
functions to parse dual date and time columns, year/month/day columns,
966-
and year/month/day/hour/minute/second columns. It also contains a
967-
``generic_parser`` function so you can curry it with a function that deals with
968-
a single date rather than the entire array.
969956

970957
.. ipython:: python
971958
:suppress:
@@ -3024,19 +3011,12 @@ It is often the case that users will insert columns to do temporary computations
30243011
in Excel and you may not want to read in those columns. ``read_excel`` takes
30253012
a ``usecols`` keyword to allow you to specify a subset of columns to parse.
30263013

3027-
.. deprecated:: 0.24.0
3014+
.. versionchanged:: 1.0.0
30283015

3029-
Passing in an integer for ``usecols`` has been deprecated. Please pass in a list
3016+
Passing in an integer for ``usecols`` will no longer work. Please pass in a list
30303017
of ints from 0 to ``usecols`` inclusive instead.
30313018

3032-
If ``usecols`` is an integer, then it is assumed to indicate the last column
3033-
to be parsed.
3034-
3035-
.. code-block:: python
3036-
3037-
pd.read_excel('path_to_file.xls', 'Sheet1', usecols=2)
3038-
3039-
You can also specify a comma-delimited set of Excel columns and ranges as a string:
3019+
You can specify a comma-delimited set of Excel columns and ranges as a string:
30403020

30413021
.. code-block:: python
30423022

doc/source/user_guide/timeseries.rst

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -327,11 +327,11 @@ which can be specified. These are computed from the starting point specified by
327327
that was discussed :ref:`above<timeseries.converting.format>`). The
328328
available units are listed on the documentation for :func:`pandas.to_datetime`.
329329

330+
.. versionchanged:: 1.0.0
331+
330332
Constructing a :class:`Timestamp` or :class:`DatetimeIndex` with an epoch timestamp
331-
with the ``tz`` argument specified will currently localize the epoch timestamps to UTC
332-
first then convert the result to the specified time zone. However, this behavior
333-
is :ref:`deprecated <whatsnew_0240.deprecations.integer_tz>`, and if you have
334-
epochs in wall time in another timezone, it is recommended to read the epochs
333+
with the ``tz`` argument specified will raise a ValueError. If you have
334+
epochs in wall time in another timezone, you can read the epochs
335335
as timezone-naive timestamps and then localize to the appropriate timezone:
336336

337337
.. ipython:: python

doc/source/whatsnew/v1.1.3.rst

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,10 @@ including other versions of pandas.
1414

1515
Fixed regressions
1616
~~~~~~~~~~~~~~~~~
17+
- Fixed regression in :meth:`DataFrame.agg`, :meth:`DataFrame.apply`, :meth:`Series.agg`, and :meth:`Series.apply` where internal suffix is exposed to the users when no relabelling is applied (:issue:`36189`)
18+
- Fixed regression in :class:`IntegerArray` unary plus and minus operations raising a ``TypeError`` (:issue:`36063`)
19+
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a tuple (:issue:`35534`)
20+
- Fixed regression in :meth:`Series.__getitem__` incorrectly raising when the input was a frozenset (:issue:`35747`)
1721
-
1822

1923
.. ---------------------------------------------------------------------------
@@ -22,7 +26,8 @@ Fixed regressions
2226

2327
Bug fixes
2428
~~~~~~~~~
25-
-
29+
- Bug in :meth:`Series.str.startswith` and :meth:`Series.str.endswith` with ``category`` dtype not propagating ``na`` parameter (:issue:`36241`)
30+
- Bug in :class:`Series` constructor where integer overflow would occur for sufficiently large scalar inputs when an index was provided (:issue:`36291`)
2631

2732
.. ---------------------------------------------------------------------------
2833

doc/source/whatsnew/v1.2.0.rst

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ Other enhancements
104104
- :meth:`DataFrame.applymap` now supports ``na_action`` (:issue:`23803`)
105105
- :class:`Index` with object dtype supports division and multiplication (:issue:`34160`)
106106
- :meth:`DataFrame.explode` and :meth:`Series.explode` now support exploding of sets (:issue:`35614`)
107-
-
107+
- `Styler` now allows direct CSS class name addition to individual data cells (:issue:`36159`)
108108

109109
.. _whatsnew_120.api_breaking.python:
110110

@@ -195,7 +195,7 @@ Deprecations
195195
~~~~~~~~~~~~
196196
- Deprecated parameter ``inplace`` in :meth:`MultiIndex.set_codes` and :meth:`MultiIndex.set_levels` (:issue:`35626`)
197197
- Deprecated parameter ``dtype`` in :~meth:`Index.copy` on method all index classes. Use the :meth:`Index.astype` method instead for changing dtype(:issue:`35853`)
198-
-
198+
- Date parser functions :func:`~pandas.io.date_converters.parse_date_time`, :func:`~pandas.io.date_converters.parse_date_fields`, :func:`~pandas.io.date_converters.parse_all_fields` and :func:`~pandas.io.date_converters.generic_parser` from ``pandas.io.date_converters`` are deprecated and will be removed in a future version; use :func:`to_datetime` instead (:issue:`35741`)
199199

200200
.. ---------------------------------------------------------------------------
201201
@@ -205,8 +205,9 @@ Deprecations
205205
Performance improvements
206206
~~~~~~~~~~~~~~~~~~~~~~~~
207207

208+
- Performance improvements when creating Series with dtype `str` or :class:`StringDtype` from array with many string elements (:issue:`36304`, :issue:`36317`)
208209
- Performance improvement in :meth:`GroupBy.agg` with the ``numba`` engine (:issue:`35759`)
209-
-
210+
- Performance improvement in :meth:`GroupBy.transform` with the ``numba`` engine (:issue:`36240`)
210211

211212
.. ---------------------------------------------------------------------------
212213
@@ -228,7 +229,9 @@ Datetimelike
228229
- Bug in :class:`DateOffset` where attributes reconstructed from pickle files differ from original objects when input values exceed normal ranges (e.g months=12) (:issue:`34511`)
229230
- Bug in :meth:`DatetimeIndex.get_slice_bound` where ``datetime.date`` objects were not accepted or naive :class:`Timestamp` with a tz-aware :class:`DatetimeIndex` (:issue:`35690`)
230231
- Bug in :meth:`DatetimeIndex.slice_locs` where ``datetime.date`` objects were not accepted (:issue:`34077`)
231-
- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64`` or ``timedelta64`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`)
232+
- Bug in :meth:`DatetimeIndex.searchsorted`, :meth:`TimedeltaIndex.searchsorted`, :meth:`PeriodIndex.searchsorted`, and :meth:`Series.searchsorted` with ``datetime64``, ``timedelta64`` or ``Period`` dtype placement of ``NaT`` values being inconsistent with ``NumPy`` (:issue:`36176`,:issue:`36254`)
233+
- Inconsistency in :class:`DatetimeArray`, :class:`TimedeltaArray`, and :class:`PeriodArray` setitem casting arrays of strings to datetimelike scalars but not scalar strings (:issue:`36261`)
234+
-
232235

233236
Timedelta
234237
^^^^^^^^^
@@ -308,6 +311,7 @@ Groupby/resample/rolling
308311

309312
- Bug in :meth:`DataFrameGroupBy.count` and :meth:`SeriesGroupBy.sum` returning ``NaN`` for missing categories when grouped on multiple ``Categoricals``. Now returning ``0`` (:issue:`35028`)
310313
- Bug in :meth:`DataFrameGroupBy.apply` that would some times throw an erroneous ``ValueError`` if the grouping axis had duplicate entries (:issue:`16646`)
314+
- Bug in :meth:`DataFrame.resample(...)` that would throw a ``ValueError`` when resampling from "D" to "24H" over a transition into daylight savings time (DST) (:issue:`35219`)
311315
- Bug when combining methods :meth:`DataFrame.groupby` with :meth:`DataFrame.resample` and :meth:`DataFrame.interpolate` raising an ``TypeError`` (:issue:`35325`)
312316
- Bug in :meth:`DataFrameGroupBy.apply` where a non-nuisance grouping column would be dropped from the output columns if another groupby method was called before ``.apply()`` (:issue:`34656`)
313317
- Bug in :meth:`DataFrameGroupby.apply` would drop a :class:`CategoricalIndex` when grouped on. (:issue:`35792`)
@@ -323,6 +327,7 @@ Reshaping
323327
- Bug in :meth:`DataFrame.pivot_table` with ``aggfunc='count'`` or ``aggfunc='sum'`` returning ``NaN`` for missing categories when pivoted on a ``Categorical``. Now returning ``0`` (:issue:`31422`)
324328
- Bug in :func:`union_indexes` where input index names are not preserved in some cases. Affects :func:`concat` and :class:`DataFrame` constructor (:issue:`13475`)
325329
- Bug in func :meth:`crosstab` when using multiple columns with ``margins=True`` and ``normalize=True`` (:issue:`35144`)
330+
- Bug in :meth:`DataFrame.agg` with ``func={'name':<FUNC>}`` incorrectly raising ``TypeError`` when ``DataFrame.columns==['Name']`` (:issue:`36212`)
326331
-
327332

328333
Sparse
@@ -342,6 +347,7 @@ Other
342347
^^^^^
343348
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` incorrectly raising ``AssertionError`` instead of ``ValueError`` when invalid parameter combinations are passed (:issue:`36045`)
344349
- Bug in :meth:`DataFrame.replace` and :meth:`Series.replace` with numeric values and string ``to_replace`` (:issue:`34789`)
350+
- Bug in :meth:`Series.transform` would give incorrect results or raise when the argument ``func`` was dictionary (:issue:`35811`)
345351
-
346352

347353
.. ---------------------------------------------------------------------------

pandas/_libs/index.pyx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -260,7 +260,7 @@ cdef class IndexEngine:
260260
def get_indexer_non_unique(self, targets):
261261
"""
262262
Return an indexer suitable for taking from a non unique index
263-
return the labels in the same order ast the target
263+
return the labels in the same order as the target
264264
and a missing indexer into the targets (which correspond
265265
to the -1 indices in the results
266266
"""

pandas/_libs/lib.pyx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -655,6 +655,10 @@ cpdef ndarray[object] ensure_string_array(
655655

656656
for i in range(n):
657657
val = result[i]
658+
659+
if isinstance(val, str):
660+
continue
661+
658662
if not checknull(val):
659663
result[i] = str(val)
660664
else:

0 commit comments

Comments
 (0)