Skip to content

Commit b2046a6

Browse files
Merge remote-tracking branch 'upstream/master' into bisect
2 parents 8262d59 + c322b24 commit b2046a6

File tree

222 files changed

+10217
-8930
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

222 files changed

+10217
-8930
lines changed

.pre-commit-config.yaml

Lines changed: 27 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,11 @@ repos:
6060
entry: |
6161
(?x)
6262
# Check for imports from pandas.core.common instead of `import pandas.core.common as com`
63-
from\ pandas\.core\.common\ import|
64-
from\ pandas\.core\ import\ common|
63+
from\ pandas\.core\.common\ import
64+
|from\ pandas\.core\ import\ common
6565
6666
# Check for imports from collections.abc instead of `from collections import abc`
67-
from\ collections\.abc\ import
67+
|from\ collections\.abc\ import
6868
6969
- id: non-standard-numpy.random-related-imports
7070
name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
@@ -73,24 +73,24 @@ repos:
7373
entry: |
7474
(?x)
7575
# Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
76-
from\ numpy\ import\ random|
77-
from\ numpy.random\ import
76+
from\ numpy\ import\ random
77+
|from\ numpy.random\ import
7878
types: [python]
7979
- id: non-standard-imports-in-tests
8080
name: Check for non-standard imports in test suite
8181
language: pygrep
8282
entry: |
8383
(?x)
8484
# Check for imports from pandas._testing instead of `import pandas._testing as tm`
85-
from\ pandas\._testing\ import|
86-
from\ pandas\ import\ _testing\ as\ tm|
85+
from\ pandas\._testing\ import
86+
|from\ pandas\ import\ _testing\ as\ tm
8787
8888
# No direct imports from conftest
89-
conftest\ import|
90-
import\ conftest
89+
|conftest\ import
90+
|import\ conftest
9191
9292
# Check for use of pandas.testing instead of tm
93-
pd\.testing\.
93+
|pd\.testing\.
9494
types: [python]
9595
files: ^pandas/tests/
9696
- id: incorrect-code-directives
@@ -148,9 +148,8 @@ repos:
148148
name: Check for outdated annotation syntax and missing error codes
149149
entry: |
150150
(?x)
151-
\#\ type:\ (?!ignore)|
152-
\#\ type:\s?ignore(?!\[)|
153-
\)\ ->\ \"
151+
\#\ type:\ (?!ignore)
152+
|\#\ type:\s?ignore(?!\[)
154153
language: pygrep
155154
types: [python]
156155
- id: np-bool
@@ -166,9 +165,15 @@ repos:
166165
files: ^pandas/tests/
167166
exclude: |
168167
(?x)^
169-
pandas/tests/io/excel/test_writers\.py|
170-
pandas/tests/io/pytables/common\.py|
171-
pandas/tests/io/pytables/test_store\.py$
168+
pandas/tests/io/excel/test_writers\.py
169+
|pandas/tests/io/pytables/common\.py
170+
|pandas/tests/io/pytables/test_store\.py$
171+
- id: no-pandas-api-types
172+
name: Check code for instances of pd.api.types
173+
entry: (pd|pandas)\.api\.types\.
174+
language: pygrep
175+
types: [python]
176+
files: ^pandas/tests/
172177
- repo: https://github.com/asottile/yesqa
173178
rev: v1.2.2
174179
hooks:
@@ -184,4 +189,9 @@ repos:
184189
hooks:
185190
- id: codespell
186191
types_or: [python, rst, markdown]
187-
files: ^pandas/core/
192+
files: ^pandas/
193+
exclude: ^pandas/tests/
194+
- repo: https://github.com/MarcoGorelli/no-string-hints
195+
rev: v0.1.5
196+
hooks:
197+
- id: no-string-hints

asv_bench/benchmarks/io/csv.py

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,6 +76,54 @@ def time_frame(self, obs):
7676
self.data.to_csv(self.fname)
7777

7878

79+
class ToCSVIndexes(BaseIO):
80+
81+
fname = "__test__.csv"
82+
83+
@staticmethod
84+
def _create_df(rows, cols):
85+
index_cols = {
86+
"index1": np.random.randint(0, rows, rows),
87+
"index2": np.full(rows, 1, dtype=np.int),
88+
"index3": np.full(rows, 1, dtype=np.int),
89+
}
90+
data_cols = {
91+
f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)
92+
}
93+
df = DataFrame({**index_cols, **data_cols})
94+
return df
95+
96+
def setup(self):
97+
ROWS = 100000
98+
COLS = 5
99+
# For tests using .head(), create an initial dataframe with this many times
100+
# more rows
101+
HEAD_ROW_MULTIPLIER = 10
102+
103+
self.df_standard_index = self._create_df(ROWS, COLS)
104+
105+
self.df_custom_index_then_head = (
106+
self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
107+
.set_index(["index1", "index2", "index3"])
108+
.head(ROWS)
109+
)
110+
111+
self.df_head_then_custom_index = (
112+
self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
113+
.head(ROWS)
114+
.set_index(["index1", "index2", "index3"])
115+
)
116+
117+
def time_standard_index(self):
118+
self.df_standard_index.to_csv(self.fname)
119+
120+
def time_multiindex(self):
121+
self.df_head_then_custom_index.to_csv(self.fname)
122+
123+
def time_head_of_multiindex(self):
124+
self.df_custom_index_then_head.to_csv(self.fname)
125+
126+
79127
class StringIORewind:
80128
def data(self, stringio_object):
81129
stringio_object.seek(0)

asv_bench/benchmarks/reshape.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55

66
import pandas as pd
77
from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
8+
from pandas.api.types import CategoricalDtype
89

910

1011
class Melt:
@@ -196,7 +197,7 @@ def setup(self):
196197
categories = list(string.ascii_letters[:12])
197198
s = pd.Series(
198199
np.random.choice(categories, size=1000000),
199-
dtype=pd.api.types.CategoricalDtype(categories),
200+
dtype=CategoricalDtype(categories),
200201
)
201202
self.s = s
202203

asv_bench/benchmarks/series_methods.py

Lines changed: 33 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -25,17 +25,32 @@ def time_constructor(self, data):
2525

2626
class IsIn:
2727

28-
params = ["int64", "uint64", "object"]
28+
params = ["int64", "uint64", "object", "Int64"]
2929
param_names = ["dtype"]
3030

3131
def setup(self, dtype):
32-
self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype)
32+
N = 10000
33+
self.s = Series(np.random.randint(1, 10, N)).astype(dtype)
3334
self.values = [1, 2]
3435

3536
def time_isin(self, dtypes):
3637
self.s.isin(self.values)
3738

3839

40+
class IsInBoolean:
41+
42+
params = ["boolean", "bool"]
43+
param_names = ["dtype"]
44+
45+
def setup(self, dtype):
46+
N = 10000
47+
self.s = Series(np.random.randint(0, 2, N)).astype(dtype)
48+
self.values = [True, False]
49+
50+
def time_isin(self, dtypes):
51+
self.s.isin(self.values)
52+
53+
3954
class IsInDatetime64:
4055
def setup(self):
4156
dti = date_range(
@@ -59,21 +74,27 @@ def time_isin_empty(self):
5974

6075

6176
class IsInFloat64:
62-
def setup(self):
63-
self.small = Series([1, 2], dtype=np.float64)
64-
self.many_different_values = np.arange(10 ** 6, dtype=np.float64)
65-
self.few_different_values = np.zeros(10 ** 7, dtype=np.float64)
66-
self.only_nans_values = np.full(10 ** 7, np.nan, dtype=np.float64)
6777

68-
def time_isin_many_different(self):
78+
params = [np.float64, "Float64"]
79+
param_names = ["dtype"]
80+
81+
def setup(self, dtype):
82+
N_many = 10 ** 5
83+
N_few = 10 ** 6
84+
self.small = Series([1, 2], dtype=dtype)
85+
self.many_different_values = np.arange(N_many, dtype=np.float64)
86+
self.few_different_values = np.zeros(N_few, dtype=np.float64)
87+
self.only_nans_values = np.full(N_few, np.nan, dtype=np.float64)
88+
89+
def time_isin_many_different(self, dtypes):
6990
# runtime is dominated by creation of the lookup-table
7091
self.small.isin(self.many_different_values)
7192

72-
def time_isin_few_different(self):
93+
def time_isin_few_different(self, dtypes):
7394
# runtime is dominated by creation of the lookup-table
7495
self.small.isin(self.few_different_values)
7596

76-
def time_isin_nan_values(self):
97+
def time_isin_nan_values(self, dtypes):
7798
# runtime is dominated by creation of the lookup-table
7899
self.small.isin(self.few_different_values)
79100

@@ -114,7 +135,7 @@ def time_isin_long_series_long_values_floats(self):
114135

115136
class IsInLongSeriesLookUpDominates:
116137
params = [
117-
["int64", "int32", "float64", "float32", "object"],
138+
["int64", "int32", "float64", "float32", "object", "Int64", "Float64"],
118139
[5, 1000],
119140
["random_hits", "random_misses", "monotone_hits", "monotone_misses"],
120141
]
@@ -141,7 +162,7 @@ def time_isin(self, dtypes, MaxNumber, series_type):
141162

142163
class IsInLongSeriesValuesDominate:
143164
params = [
144-
["int64", "int32", "float64", "float32", "object"],
165+
["int64", "int32", "float64", "float32", "object", "Int64", "Float64"],
145166
["random", "monotone"],
146167
]
147168
param_names = ["dtype", "series_type"]

doc/source/_static/css/pandas.css

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,10 @@
1+
/* Override some aspects of the pydata-sphinx-theme */
2+
3+
:root {
4+
/* Use softer blue from bootstrap's default info color */
5+
--color-info: 23, 162, 184;
6+
}
7+
18
/* Getting started index page */
29

310
.intro-card {

doc/source/conf.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,7 +164,7 @@
164164

165165
# The language for content autogenerated by Sphinx. Refer to documentation
166166
# for a list of supported languages.
167-
# language = None
167+
language = "en"
168168

169169
# There are two options for replacing |today|: either, you set today to some
170170
# non-false value, then it is used:

doc/source/getting_started/comparison/comparison_with_spreadsheets.rst

Lines changed: 21 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -89,13 +89,6 @@ Both `Excel <https://support.microsoft.com/en-us/office/import-data-from-externa
8989
and :ref:`pandas <10min_tut_02_read_write>` can import data from various sources in various
9090
formats.
9191

92-
Excel files
93-
'''''''''''
94-
95-
Excel opens `various Excel file formats <https://support.microsoft.com/en-us/office/file-formats-that-are-supported-in-excel-0943ff2c-6014-4e8d-aaea-b83d51d46247>`_
96-
by double-clicking them, or using `the Open menu <https://support.microsoft.com/en-us/office/open-files-from-the-file-menu-97f087d8-3136-4485-8e86-c5b12a8c4176>`_.
97-
In pandas, you use :ref:`special methods for reading and writing from/to Excel files <io.excel>`.
98-
9992
CSV
10093
'''
10194

@@ -125,6 +118,27 @@ would be:
125118
# alternatively, read_table is an alias to read_csv with tab delimiter
126119
tips = pd.read_table("tips.csv", header=None)
127120
121+
Excel files
122+
'''''''''''
123+
124+
Excel opens `various Excel file formats <https://support.microsoft.com/en-us/office/file-formats-that-are-supported-in-excel-0943ff2c-6014-4e8d-aaea-b83d51d46247>`_
125+
by double-clicking them, or using `the Open menu <https://support.microsoft.com/en-us/office/open-files-from-the-file-menu-97f087d8-3136-4485-8e86-c5b12a8c4176>`_.
126+
In pandas, you use :ref:`special methods for reading and writing from/to Excel files <io.excel>`.
127+
128+
Let's first :ref:`create a new Excel file <io.excel_writer>` based on the ``tips`` dataframe in the above example:
129+
130+
.. code-block:: python
131+
132+
tips.to_excel("./tips.xlsx")
133+
134+
Should you wish to subsequently access the data in the ``tips.xlsx`` file, you can read it into your module using
135+
136+
.. code-block:: python
137+
138+
tips_df = pd.read_excel("./tips.xlsx", index_col=0)
139+
140+
You have just read in an Excel file using pandas!
141+
128142

129143
Limiting output
130144
~~~~~~~~~~~~~~~

doc/source/reference/extensions.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ objects.
4848
api.extensions.ExtensionArray.equals
4949
api.extensions.ExtensionArray.factorize
5050
api.extensions.ExtensionArray.fillna
51+
api.extensions.ExtensionArray.isin
5152
api.extensions.ExtensionArray.isna
5253
api.extensions.ExtensionArray.ravel
5354
api.extensions.ExtensionArray.repeat

doc/source/user_guide/io.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2853,7 +2853,7 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
28532853
The `xlrd <https://xlrd.readthedocs.io/en/latest/>`__ package is now only for reading
28542854
old-style ``.xls`` files.
28552855

2856-
Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel`
2856+
Before pandas 1.2.0, the default argument ``engine=None`` to :func:`~pandas.read_excel`
28572857
would result in using the ``xlrd`` engine in many cases, including new
28582858
Excel 2007+ (``.xlsx``) files.
28592859
If `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__ is installed,

doc/source/user_guide/timeseries.rst

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -2605,17 +2605,10 @@ For example, to localize and convert a naive stamp to time zone aware.
26052605
s_naive.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
26062606
26072607
Time zone information can also be manipulated using the ``astype`` method.
2608-
This method can localize and convert time zone naive timestamps or
2609-
convert time zone aware timestamps.
2608+
This method can convert between different timezone-aware dtypes.
26102609

26112610
.. ipython:: python
26122611
2613-
# localize and convert a naive time zone
2614-
s_naive.astype("datetime64[ns, US/Eastern]")
2615-
2616-
# make an aware tz naive
2617-
s_aware.astype("datetime64[ns]")
2618-
26192612
# convert to a new time zone
26202613
s_aware.astype("datetime64[ns, CET]")
26212614

doc/source/whatsnew/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ Version 1.2
2424
.. toctree::
2525
:maxdepth: 2
2626

27+
v1.2.2
2728
v1.2.1
2829
v1.2.0
2930

doc/source/whatsnew/v1.2.0.rst

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -286,6 +286,8 @@ Other enhancements
286286
- Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
287287
- Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`)
288288
- Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`).
289+
This change has been reverted in pandas 1.2.1, and the behaviour to not align DataFrames
290+
is deprecated instead, see the :ref:`the 1.2.1 release notes <whatsnew_121.ufunc_deprecation>`.
289291
- Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
290292
- :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`)
291293
- :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use nullable dtypes that use ``pd.NA`` as missing value indicator where possible for the resulting DataFrame (default is ``False``, and only applicable for ``engine="pyarrow"``) (:issue:`31242`)
@@ -536,6 +538,14 @@ Deprecations
536538
- The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
537539
- The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
538540

541+
**Calling NumPy ufuncs on non-aligned DataFrames**
542+
543+
Calling NumPy ufuncs on non-aligned DataFrames changed behaviour in pandas
544+
1.2.0 (to align the inputs before calling the ufunc), but this change is
545+
reverted in pandas 1.2.1. The behaviour to not align is now deprecated instead,
546+
see the :ref:`the 1.2.1 release notes <whatsnew_121.ufunc_deprecation>` for
547+
more details.
548+
539549
.. ---------------------------------------------------------------------------
540550
541551
@@ -736,7 +746,7 @@ I/O
736746
- Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`)
737747
- :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other ``read_*`` functions (:issue:`37909`)
738748
- :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`)
739-
- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`)
749+
- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`, :issue:`37983`)
740750
- :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`)
741751
- Bug in the conversion of a sliced ``pyarrow.Table`` with missing values to a DataFrame (:issue:`38525`)
742752
- Bug in :func:`read_sql_table` raising a ``sqlalchemy.exc.OperationalError`` when column names contained a percentage sign (:issue:`37517`)

0 commit comments

Comments
 (0)