simonjayhawkins
diff --git a/‎.pre-commit-config.yaml
Lines changed: 27 additions & 17 deletions b/‎.pre-commit-config.yaml
Lines changed: 27 additions & 17 deletions
diff --git a/‎asv_bench/benchmarks/io/csv.py
Lines changed: 48 additions & 0 deletions b/‎asv_bench/benchmarks/io/csv.py
Lines changed: 48 additions & 0 deletions
diff --git a/‎asv_bench/benchmarks/reshape.py
Lines changed: 2 additions & 1 deletion b/‎asv_bench/benchmarks/reshape.py
Lines changed: 2 additions & 1 deletion
diff --git a/‎asv_bench/benchmarks/series_methods.py
Lines changed: 33 additions & 12 deletions b/‎asv_bench/benchmarks/series_methods.py
Lines changed: 33 additions & 12 deletions
diff --git a/‎doc/source/_static/css/pandas.css
Lines changed: 7 additions & 0 deletions b/‎doc/source/_static/css/pandas.css
Lines changed: 7 additions & 0 deletions
diff --git a/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion b/‎doc/source/conf.py
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/getting_started/comparison/comparison_with_spreadsheets.rst
Lines changed: 21 additions & 7 deletions b/‎doc/source/getting_started/comparison/comparison_with_spreadsheets.rst
Lines changed: 21 additions & 7 deletions
diff --git a/‎doc/source/reference/extensions.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/reference/extensions.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/user_guide/io.rst
Lines changed: 1 addition & 1 deletion b/‎doc/source/user_guide/io.rst
Lines changed: 1 addition & 1 deletion
diff --git a/‎doc/source/user_guide/timeseries.rst
Lines changed: 1 addition & 8 deletions b/‎doc/source/user_guide/timeseries.rst
Lines changed: 1 addition & 8 deletions
diff --git a/‎doc/source/whatsnew/index.rst
Lines changed: 1 addition & 0 deletions b/‎doc/source/whatsnew/index.rst
Lines changed: 1 addition & 0 deletions
diff --git a/‎doc/source/whatsnew/v1.2.0.rst
Lines changed: 11 additions & 1 deletion b/‎doc/source/whatsnew/v1.2.0.rst
Lines changed: 11 additions & 1 deletion
@@ -60,11 +60,11 @@ repos:
         entry: |
             (?x)
             # Check for imports from pandas.core.common instead of `import pandas.core.common as com`
-            from\ pandas\.core\.common\ import|
-            from\ pandas\.core\ import\ common|
+            from\ pandas\.core\.common\ import
+            |from\ pandas\.core\ import\ common
 
             # Check for imports from collections.abc instead of `from collections import abc`
-            from\ collections\.abc\ import
+            |from\ collections\.abc\ import
 
     -   id: non-standard-numpy.random-related-imports
         name: Check for non-standard numpy.random-related imports excluding pandas/_testing.py
@@ -73,24 +73,24 @@ repos:
         entry: |
             (?x)
             # Check for imports from np.random.<method> instead of `from numpy import random` or `from numpy.random import <method>`
-            from\ numpy\ import\ random|
-            from\ numpy.random\ import
+            from\ numpy\ import\ random
+            |from\ numpy.random\ import
         types: [python]
     -   id: non-standard-imports-in-tests
         name: Check for non-standard imports in test suite
         language: pygrep
         entry: |
             (?x)
             # Check for imports from pandas._testing instead of `import pandas._testing as tm`
-            from\ pandas\._testing\ import|
-            from\ pandas\ import\ _testing\ as\ tm|
+            from\ pandas\._testing\ import
+            |from\ pandas\ import\ _testing\ as\ tm
 
             # No direct imports from conftest
-            conftest\ import|
-            import\ conftest
+            |conftest\ import
+            |import\ conftest
 
             # Check for use of pandas.testing instead of tm
-            pd\.testing\.
+            |pd\.testing\.
         types: [python]
         files: ^pandas/tests/
     -   id: incorrect-code-directives
@@ -148,9 +148,8 @@ repos:
         name: Check for outdated annotation syntax and missing error codes
         entry: |
             (?x)
-            \#\ type:\ (?!ignore)|
-            \#\ type:\s?ignore(?!\[)|
-            \)\ ->\ \"
+            \#\ type:\ (?!ignore)
+            |\#\ type:\s?ignore(?!\[)
         language: pygrep
         types: [python]
     -   id: np-bool
@@ -166,9 +165,15 @@ repos:
         files: ^pandas/tests/
         exclude: |
             (?x)^
-            pandas/tests/io/excel/test_writers\.py|
-            pandas/tests/io/pytables/common\.py|
-            pandas/tests/io/pytables/test_store\.py$
+            pandas/tests/io/excel/test_writers\.py
+            |pandas/tests/io/pytables/common\.py
+            |pandas/tests/io/pytables/test_store\.py$
+    -   id: no-pandas-api-types
+        name: Check code for instances of pd.api.types
+        entry: (pd|pandas)\.api\.types\.
+        language: pygrep
+        types: [python]
+        files: ^pandas/tests/
 -   repo: https://github.com/asottile/yesqa
     rev: v1.2.2
     hooks:
@@ -184,4 +189,9 @@ repos:
     hooks:
     -   id: codespell
         types_or: [python, rst, markdown]
-        files: ^pandas/core/
+        files: ^pandas/
+        exclude: ^pandas/tests/
+-   repo: https://github.com/MarcoGorelli/no-string-hints
+    rev: v0.1.5
+    hooks:
+    -   id: no-string-hints
@@ -76,6 +76,54 @@ def time_frame(self, obs):
         self.data.to_csv(self.fname)
 
 
+class ToCSVIndexes(BaseIO):
+
+    fname = "__test__.csv"
+
+    @staticmethod
+    def _create_df(rows, cols):
+        index_cols = {
+            "index1": np.random.randint(0, rows, rows),
+            "index2": np.full(rows, 1, dtype=np.int),
+            "index3": np.full(rows, 1, dtype=np.int),
+        }
+        data_cols = {
+            f"col{i}": np.random.uniform(0, 100000.0, rows) for i in range(cols)
+        }
+        df = DataFrame({**index_cols, **data_cols})
+        return df
+
+    def setup(self):
+        ROWS = 100000
+        COLS = 5
+        # For tests using .head(), create an initial dataframe with this many times
+        # more rows
+        HEAD_ROW_MULTIPLIER = 10
+
+        self.df_standard_index = self._create_df(ROWS, COLS)
+
+        self.df_custom_index_then_head = (
+            self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
+            .set_index(["index1", "index2", "index3"])
+            .head(ROWS)
+        )
+
+        self.df_head_then_custom_index = (
+            self._create_df(ROWS * HEAD_ROW_MULTIPLIER, COLS)
+            .head(ROWS)
+            .set_index(["index1", "index2", "index3"])
+        )
+
+    def time_standard_index(self):
+        self.df_standard_index.to_csv(self.fname)
+
+    def time_multiindex(self):
+        self.df_head_then_custom_index.to_csv(self.fname)
+
+    def time_head_of_multiindex(self):
+        self.df_custom_index_then_head.to_csv(self.fname)
+
+
 class StringIORewind:
     def data(self, stringio_object):
         stringio_object.seek(0)
 
@@ -5,6 +5,7 @@
 
 import pandas as pd
 from pandas import DataFrame, MultiIndex, date_range, melt, wide_to_long
+from pandas.api.types import CategoricalDtype
 
 
 class Melt:
@@ -196,7 +197,7 @@ def setup(self):
         categories = list(string.ascii_letters[:12])
         s = pd.Series(
             np.random.choice(categories, size=1000000),
-            dtype=pd.api.types.CategoricalDtype(categories),
+            dtype=CategoricalDtype(categories),
         )
         self.s = s
 
 
@@ -25,17 +25,32 @@ def time_constructor(self, data):
 
 class IsIn:
 
-    params = ["int64", "uint64", "object"]
+    params = ["int64", "uint64", "object", "Int64"]
     param_names = ["dtype"]
 
     def setup(self, dtype):
-        self.s = Series(np.random.randint(1, 10, 100000)).astype(dtype)
+        N = 10000
+        self.s = Series(np.random.randint(1, 10, N)).astype(dtype)
         self.values = [1, 2]
 
     def time_isin(self, dtypes):
         self.s.isin(self.values)
 
 
+class IsInBoolean:
+
+    params = ["boolean", "bool"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        N = 10000
+        self.s = Series(np.random.randint(0, 2, N)).astype(dtype)
+        self.values = [True, False]
+
+    def time_isin(self, dtypes):
+        self.s.isin(self.values)
+
+
 class IsInDatetime64:
     def setup(self):
         dti = date_range(
@@ -59,21 +74,27 @@ def time_isin_empty(self):
 
 
 class IsInFloat64:
-    def setup(self):
-        self.small = Series([1, 2], dtype=np.float64)
-        self.many_different_values = np.arange(10 ** 6, dtype=np.float64)
-        self.few_different_values = np.zeros(10 ** 7, dtype=np.float64)
-        self.only_nans_values = np.full(10 ** 7, np.nan, dtype=np.float64)
 
-    def time_isin_many_different(self):
+    params = [np.float64, "Float64"]
+    param_names = ["dtype"]
+
+    def setup(self, dtype):
+        N_many = 10 ** 5
+        N_few = 10 ** 6
+        self.small = Series([1, 2], dtype=dtype)
+        self.many_different_values = np.arange(N_many, dtype=np.float64)
+        self.few_different_values = np.zeros(N_few, dtype=np.float64)
+        self.only_nans_values = np.full(N_few, np.nan, dtype=np.float64)
+
+    def time_isin_many_different(self, dtypes):
         # runtime is dominated by creation of the lookup-table
         self.small.isin(self.many_different_values)
 
-    def time_isin_few_different(self):
+    def time_isin_few_different(self, dtypes):
         # runtime is dominated by creation of the lookup-table
         self.small.isin(self.few_different_values)
 
-    def time_isin_nan_values(self):
+    def time_isin_nan_values(self, dtypes):
         # runtime is dominated by creation of the lookup-table
         self.small.isin(self.few_different_values)
 
@@ -114,7 +135,7 @@ def time_isin_long_series_long_values_floats(self):
 
 class IsInLongSeriesLookUpDominates:
     params = [
-        ["int64", "int32", "float64", "float32", "object"],
+        ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"],
         [5, 1000],
         ["random_hits", "random_misses", "monotone_hits", "monotone_misses"],
     ]
@@ -141,7 +162,7 @@ def time_isin(self, dtypes, MaxNumber, series_type):
 
 class IsInLongSeriesValuesDominate:
     params = [
-        ["int64", "int32", "float64", "float32", "object"],
+        ["int64", "int32", "float64", "float32", "object", "Int64", "Float64"],
         ["random", "monotone"],
     ]
     param_names = ["dtype", "series_type"]
 
@@ -1,3 +1,10 @@
+/* Override some aspects of the pydata-sphinx-theme */
+
+:root {
+  /* Use softer blue from bootstrap's default info color */
+  --color-info: 23, 162, 184;
+}
+
 /* Getting started index page */
 
 .intro-card {
 
@@ -164,7 +164,7 @@
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
-# language = None
+language = "en"
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
 
@@ -89,13 +89,6 @@ Both `Excel <https://support.microsoft.com/en-us/office/import-data-from-externa
 and :ref:`pandas <10min_tut_02_read_write>` can import data from various sources in various
 formats.
 
-Excel files
-'''''''''''
-
-Excel opens `various Excel file formats <https://support.microsoft.com/en-us/office/file-formats-that-are-supported-in-excel-0943ff2c-6014-4e8d-aaea-b83d51d46247>`_
-by double-clicking them, or using `the Open menu <https://support.microsoft.com/en-us/office/open-files-from-the-file-menu-97f087d8-3136-4485-8e86-c5b12a8c4176>`_.
-In pandas, you use :ref:`special methods for reading and writing from/to Excel files <io.excel>`.
-
 CSV
 '''
 
@@ -125,6 +118,27 @@ would be:
    # alternatively, read_table is an alias to read_csv with tab delimiter
    tips = pd.read_table("tips.csv", header=None)
 
+Excel files
+'''''''''''
+
+Excel opens `various Excel file formats <https://support.microsoft.com/en-us/office/file-formats-that-are-supported-in-excel-0943ff2c-6014-4e8d-aaea-b83d51d46247>`_
+by double-clicking them, or using `the Open menu <https://support.microsoft.com/en-us/office/open-files-from-the-file-menu-97f087d8-3136-4485-8e86-c5b12a8c4176>`_.
+In pandas, you use :ref:`special methods for reading and writing from/to Excel files <io.excel>`.
+
+Let's first :ref:`create a new Excel file <io.excel_writer>` based on the ``tips`` dataframe in the above example:
+
+.. code-block:: python
+
+    tips.to_excel("./tips.xlsx")
+
+Should you wish to subsequently access the data in the ``tips.xlsx`` file, you can read it into your module using
+
+.. code-block:: python
+
+    tips_df = pd.read_excel("./tips.xlsx", index_col=0)
+
+You have just read in an Excel file using pandas!
+
 
 Limiting output
 ~~~~~~~~~~~~~~~
 
@@ -48,6 +48,7 @@ objects.
       api.extensions.ExtensionArray.equals
       api.extensions.ExtensionArray.factorize
       api.extensions.ExtensionArray.fillna
+      api.extensions.ExtensionArray.isin
       api.extensions.ExtensionArray.isna
       api.extensions.ExtensionArray.ravel
       api.extensions.ExtensionArray.repeat
 
@@ -2853,7 +2853,7 @@ See the :ref:`cookbook<cookbook.excel>` for some advanced strategies.
    The `xlrd <https://xlrd.readthedocs.io/en/latest/>`__ package is now only for reading
    old-style ``.xls`` files.
 
-   Previously, the default argument ``engine=None`` to :func:`~pandas.read_excel`
+   Before pandas 1.2.0, the default argument ``engine=None`` to :func:`~pandas.read_excel`
    would result in using the ``xlrd`` engine in many cases, including new
    Excel 2007+ (``.xlsx``) files.
    If `openpyxl <https://openpyxl.readthedocs.io/en/stable/>`__  is installed,
 
@@ -2605,17 +2605,10 @@ For example, to localize and convert a naive stamp to time zone aware.
    s_naive.dt.tz_localize("UTC").dt.tz_convert("US/Eastern")
 
 Time zone information can also be manipulated using the ``astype`` method.
-This method can localize and convert time zone naive timestamps or
-convert time zone aware timestamps.
+This method can convert between different timezone-aware dtypes.
 
 .. ipython:: python
 
-   # localize and convert a naive time zone
-   s_naive.astype("datetime64[ns, US/Eastern]")
-
-   # make an aware tz naive
-   s_aware.astype("datetime64[ns]")
-
    # convert to a new time zone
    s_aware.astype("datetime64[ns, CET]")
 
 
@@ -24,6 +24,7 @@ Version 1.2
 .. toctree::
    :maxdepth: 2
 
+   v1.2.2
    v1.2.1
    v1.2.0
 
 
@@ -286,6 +286,8 @@ Other enhancements
 - Added methods :meth:`IntegerArray.prod`, :meth:`IntegerArray.min`, and :meth:`IntegerArray.max` (:issue:`33790`)
 - Calling a NumPy ufunc on a ``DataFrame`` with extension types now preserves the extension types when possible (:issue:`23743`)
 - Calling a binary-input NumPy ufunc on multiple ``DataFrame`` objects now aligns, matching the behavior of binary operations and ufuncs on ``Series`` (:issue:`23743`).
+  This change has been reverted in pandas 1.2.1, and the behaviour to not align DataFrames
+  is deprecated instead, see the :ref:`the 1.2.1 release notes <whatsnew_121.ufunc_deprecation>`.
 - Where possible :meth:`RangeIndex.difference` and :meth:`RangeIndex.symmetric_difference` will return :class:`RangeIndex` instead of :class:`Int64Index` (:issue:`36564`)
 - :meth:`DataFrame.to_parquet` now supports :class:`MultiIndex` for columns in parquet format (:issue:`34777`)
 - :func:`read_parquet` gained a ``use_nullable_dtypes=True`` option to use nullable dtypes that use ``pd.NA`` as missing value indicator where possible for the resulting DataFrame (default is ``False``, and only applicable for ``engine="pyarrow"``) (:issue:`31242`)
@@ -536,6 +538,14 @@ Deprecations
 - The ``inplace`` parameter of :meth:`Categorical.remove_unused_categories` is deprecated and will be removed in a future version (:issue:`37643`)
 - The ``null_counts`` parameter of :meth:`DataFrame.info` is deprecated and replaced by ``show_counts``. It will be removed in a future version (:issue:`37999`)
 
+**Calling NumPy ufuncs on non-aligned DataFrames**
+
+Calling NumPy ufuncs on non-aligned DataFrames changed behaviour in pandas
+1.2.0 (to align the inputs before calling the ufunc), but this change is
+reverted in pandas 1.2.1. The behaviour to not align is now deprecated instead,
+see the :ref:`the 1.2.1 release notes <whatsnew_121.ufunc_deprecation>` for
+more details.
+
 .. ---------------------------------------------------------------------------
 
 
@@ -736,7 +746,7 @@ I/O
 - Parse missing values using :func:`read_json` with ``dtype=False`` to ``NaN`` instead of ``None`` (:issue:`28501`)
 - :meth:`read_fwf` was inferring compression with ``compression=None`` which was not consistent with the other ``read_*`` functions (:issue:`37909`)
 - :meth:`DataFrame.to_html` was ignoring ``formatters`` argument for ``ExtensionDtype`` columns (:issue:`36525`)
-- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`)
+- Bumped minimum xarray version to 0.12.3 to avoid reference to the removed ``Panel`` class (:issue:`27101`, :issue:`37983`)
 - :meth:`DataFrame.to_csv` was re-opening file-like handles that also implement ``os.PathLike`` (:issue:`38125`)
 - Bug in the conversion of a sliced ``pyarrow.Table`` with missing values to a DataFrame (:issue:`38525`)
 - Bug in :func:`read_sql_table` raising a ``sqlalchemy.exc.OperationalError`` when column names contained a percentage sign (:issue:`37517`)