data-apis · honno · Feb 28, 2023 · Feb 14, 2023 · Feb 14, 2023 · Feb 28, 2023
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -1,5 +1,8 @@
 name: Run tests
-on: [push]
+on:
+  push: null
+  schedule:
+    - cron: '0 0 * * 0'  # i.e. every week
 jobs:
   test:
     runs-on: ubuntu-latest
@@ -10,14 +13,17 @@ jobs:
       uses: actions/setup-python@v2
       with:
         python-version: 3.8
+    - name: Upgrade pip
+      run: |
+        pip install pip --upgrade
     - name: Install dependencies
       run: |
         pip install -r requirements.txt
     - name: Install dataframe libraries
       run: |
         pip install ray git+https://github.com/modin-project/modin
         pip install vaex  # use stable as no nightly builds and long build time
-        pip install pandas --no-deps --ignore-installed  # use stable as erroneous nightly builds and long build time
+        pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas --ignore-installed --no-deps
     - name: Run tests
       run: |
         pytest tests/ -v --ci
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -9,14 +9,14 @@ repos:
   hooks:
   - id: pyupgrade
 - repo: https://github.com/timothycrosley/isort
-  rev: 5.9.2
+  rev: 5.12.0
   hooks:
   - id: isort
 - repo: https://github.com/psf/black
   rev: 22.3.0
   hooks:
   - id: black
-- repo: https://gitlab.com/pycqa/flake8
+- repo: https://github.com/pycqa/flake8
   rev: 3.9.2
   hooks:
   - id: flake8

diff --git a/tests/conftest.py b/tests/conftest.py
@@ -39,25 +39,16 @@ def pytest_configure(config):
 
 
 ci_xfail_ids = [
-    # https://github.com/vaexio/vaex/pull/2150
-    "tests/test_signatures.py::test_column_method[vaex-size]",
     # https://github.com/rapidsai/cudf/issues/11320
     "test_signatures.py::test_buffer_method[cudf-__dlpack__]",
     "test_signatures.py::test_buffer_method[cudf-__dlpack_device__]",
     # https://github.com/vaexio/vaex/issues/2083
     # https://github.com/vaexio/vaex/issues/2093
     # https://github.com/vaexio/vaex/issues/2113
-    # https://github.com/vaexio/vaex/pull/2150
-    "test_from_dataframe.py::test_from_dataframe_roundtrip[pandas-vaex]",
-    "test_from_dataframe.py::test_from_dataframe_roundtrip[vaex-modin]",
     "test_from_dataframe.py::test_from_dataframe_roundtrip[modin-vaex]",
     "test_from_dataframe.py::test_from_dataframe_roundtrip[vaex-pandas]",
-    # https://github.com/vaexio/vaex/pull/2150
-    "test_column_object.py::test_size[vaex]",
     # https://github.com/rapidsai/cudf/issues/11389
     "test_column_object.py::test_dtype[cudf]",
-    # https://github.com/vaexio/vaex/pull/2150
-    "test_column_object.py::test_describe_categorical_on_categorical[vaex]",
     # Raises RuntimeError, which is technically correct, but the spec will
     # require TypeError soon.
     # See https://github.com/data-apis/dataframe-api/pull/74
@@ -75,6 +66,12 @@ def pytest_configure(config):
     # https://github.com/vaexio/vaex/issues/2118
     # https://github.com/vaexio/vaex/issues/2139
     "test_column_object.py::test_dtype[vaex]",
+    # SEGFAULT
+    "test_from_dataframe.py::test_from_dataframe_roundtrip[pandas-vaex]",
+    # modin flakiness
+    "test_from_dataframe.py::test_from_dataframe_roundtrip[modin-pandas]",
+    "test_from_dataframe.py::test_from_dataframe_roundtrip[modin-modin]",
+    "test_meta.py::test_frame_equal[modin]",
 ]
 assert not any(case in ci_xfail_ids for case in ci_skip_ids)  # sanity check
 

diff --git a/tests/wrappers.py b/tests/wrappers.py
@@ -1,6 +1,7 @@
 import re
 from copy import copy
 from typing import Any, Callable, Dict, List, NamedTuple, Set, Tuple
+from unittest.mock import MagicMock
 
 import numpy as np
 import pytest
@@ -79,7 +80,8 @@ def __repr__(self) -> str:
         return f"LibraryInfo(<{self.name}>)"
 
 
-libinfo_params = []
+unskipped_params = []
+skipped_params = []
 
 
 # pandas
@@ -89,7 +91,9 @@ def __repr__(self) -> str:
     import pandas as pd
     from pandas.api.interchange import from_dataframe as pandas_from_dataframe
 except ImportError as e:
-    libinfo_params.append(pytest.param("pandas", marks=pytest.mark.skip(reason=e.msg)))
+    skipped_params.append(
+        pytest.param(None, id="pandas", marks=pytest.mark.skip(reason=e.msg))
+    )
 else:
 
     def pandas_mock_to_toplevel(mock_df: MockDataFrame) -> pd.DataFrame:
@@ -112,7 +116,7 @@ def pandas_mock_to_toplevel(mock_df: MockDataFrame) -> pd.DataFrame:
         from_dataframe=pandas_from_dataframe,
         frame_equal=lambda df1, df2: df1.equals(df2),
     )
-    libinfo_params.append(pytest.param(pandas_libinfo, id=pandas_libinfo.name))
+    unskipped_params.append(pytest.param(pandas_libinfo, id=pandas_libinfo.name))
 
 
 # vaex
@@ -122,7 +126,9 @@ def pandas_mock_to_toplevel(mock_df: MockDataFrame) -> pd.DataFrame:
     import vaex
     from vaex.dataframe_protocol import from_dataframe_to_vaex as vaex_from_dataframe
 except ImportError as e:
-    libinfo_params.append(pytest.param("vaex", marks=pytest.mark.skip(reason=e.msg)))
+    skipped_params.append(
+        pytest.param(None, id="modin", marks=pytest.mark.skip(reason=e.msg))
+    )
 else:
 
     def vaex_mock_to_toplevel(mock_df: MockDataFrame) -> TopLevelDataFrame:
@@ -172,30 +178,24 @@ def vaex_frame_equal(df1, df2) -> bool:
         allow_zero_cols=False,
         allow_zero_rows=False,
     )
-    libinfo_params.append(pytest.param(vaex_libinfo, id=vaex_libinfo.name))
+    unskipped_params.append(pytest.param(vaex_libinfo, id=vaex_libinfo.name))
 
 
 # modin
 # -----
 
 
 try:
-    import modin  # noqa: F401
+    # ethereal hacks! ----------------------------------------------------------
+    import pandas
 
-    try:
-        import pandas
-        from pandas.core import base
-        from pandas.errors import DataError
-    except ImportError:
-        pass
-    else:
-        # One issue modin has with pandas upstream is an outdated import of an
-        # exception class, so we try monkey-patching the class to the old path.
-        setattr(base, "DataError", DataError)
-        # modin also hard checks for supported pandas versions, so we
-        # monkey-patch a supported version.
-        setattr(pandas, "__version__", "1.4.3")
+    setattr(pandas, "__getattr__", MagicMock())
+    if not hasattr(pandas.DataFrame, "mad"):
+        setattr(pandas.DataFrame, "mad", MagicMock())
+    setattr(pandas.core.indexing, "__getattr__", MagicMock())
+    # ------------------------------------------------------------ end of hacks.
 
+    import modin  # noqa: F401
     import ray
 
     # Without local_mode=True, ray does not use our monkey-patched pandas
@@ -208,7 +208,9 @@ def vaex_frame_equal(df1, df2) -> bool:
     from modin import pandas as mpd
     from modin.pandas.utils import from_dataframe as modin_from_dataframe
 except ImportError as e:
-    libinfo_params.append(pytest.param("modin", marks=pytest.mark.skip(reason=e.msg)))
+    skipped_params.append(
+        pytest.param(None, id="modin", marks=pytest.mark.skip(reason=e.msg))
+    )
 else:
 
     def modin_mock_to_toplevel(mock_df: MockDataFrame) -> mpd.DataFrame:
@@ -259,15 +261,15 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool:
         # https://github.com/modin-project/modin/issues/4643
         allow_zero_rows=False,
     )
-    libinfo_params.append(pytest.param(modin_libinfo, id=modin_libinfo.name))
+    unskipped_params.append(pytest.param(modin_libinfo, id=modin_libinfo.name))
 
 
 # cuDF
 # -----
 
 
 try:
-    # cudf has a few issues with upstream pandas that we "fix" with a few hacks
+    # ethereal hacks! ----------------------------------------------------------
     try:
         import pandas
         import pyarrow
@@ -294,11 +296,14 @@ def register_extension_type(*a, **kw):
         setattr(pyarrow, "register_extension_type", register_extension_type)
         setattr(datetimes, "_guess_datetime_format", guess_datetime_format)
         setattr(pandas, "__version__", "1.4.3")
+    # ------------------------------------------------------------ end of hacks.
 
     import cudf
     from cudf.core.df_protocol import from_dataframe as cudf_from_dataframe
 except ImportError as e:
-    libinfo_params.append(pytest.param("cudf", marks=pytest.mark.skip(reason=e.msg)))
+    skipped_params.append(
+        pytest.param(None, id="cudf", marks=pytest.mark.skip(reason=e.msg))
+    )
 else:
 
     def cudf_mock_to_toplevel(mock_df: MockDataFrame) -> cudf.DataFrame:
@@ -332,12 +337,24 @@ def cudf_mock_to_toplevel(mock_df: MockDataFrame) -> cudf.DataFrame:
             NominalDtype.UTF8,
         },
     )
-    libinfo_params.append(pytest.param(cudf_libinfo, id=cudf_libinfo.name))
+    unskipped_params.append(pytest.param(cudf_libinfo, id=cudf_libinfo.name))
 
+libinfo_params = skipped_params + unskipped_params
+ids = [p.id for p in libinfo_params]
+assert len(set(ids)) == len(ids), f"ids: {ids}"  # sanity check
 
 libname_to_libinfo: Dict[str, LibraryInfo] = {}
 for param in libinfo_params:
     if not any(m.name.startswith("skip") for m in param.marks):
         libinfo = param.values[0]
         assert isinstance(libinfo, LibraryInfo)  # for mypy
         libname_to_libinfo[libinfo.name] = libinfo
+
+
+if __name__ == "__main__":
+    print(f"Wrapped libraries: {[p.id for p in unskipped_params]}")
+    if len(skipped_params) > 0:
+        print("Skipped libraries:")
+        for p in skipped_params:
+            m = next(m for m in p.marks if m.name == "skip")
+            print(f"    {p.id}; reason={m.kwargs['reason']}")