diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f82f209..34ddf6d 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,5 +1,8 @@ name: Run tests -on: [push] +on: + push: null + schedule: + - cron: '0 0 * * 0' # i.e. every week jobs: test: runs-on: ubuntu-latest @@ -10,6 +13,9 @@ jobs: uses: actions/setup-python@v2 with: python-version: 3.8 + - name: Upgrade pip + run: | + pip install pip --upgrade - name: Install dependencies run: | pip install -r requirements.txt @@ -17,7 +23,7 @@ jobs: run: | pip install ray git+https://github.com/modin-project/modin pip install vaex # use stable as no nightly builds and long build time - pip install pandas --no-deps --ignore-installed # use stable as erroneous nightly builds and long build time + pip install --pre --extra-index https://pypi.anaconda.org/scipy-wheels-nightly/simple pandas --ignore-installed --no-deps - name: Run tests run: | pytest tests/ -v --ci diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d17865e..0f80c48 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -9,14 +9,14 @@ repos: hooks: - id: pyupgrade - repo: https://github.com/timothycrosley/isort - rev: 5.9.2 + rev: 5.12.0 hooks: - id: isort - repo: https://github.com/psf/black rev: 22.3.0 hooks: - id: black -- repo: https://gitlab.com/pycqa/flake8 +- repo: https://github.com/pycqa/flake8 rev: 3.9.2 hooks: - id: flake8 diff --git a/tests/conftest.py b/tests/conftest.py index b4fc2fc..0dcebc4 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -39,25 +39,16 @@ def pytest_configure(config): ci_xfail_ids = [ - # https://github.com/vaexio/vaex/pull/2150 - "tests/test_signatures.py::test_column_method[vaex-size]", # https://github.com/rapidsai/cudf/issues/11320 "test_signatures.py::test_buffer_method[cudf-__dlpack__]", "test_signatures.py::test_buffer_method[cudf-__dlpack_device__]", # https://github.com/vaexio/vaex/issues/2083 # https://github.com/vaexio/vaex/issues/2093 # https://github.com/vaexio/vaex/issues/2113 - # https://github.com/vaexio/vaex/pull/2150 - "test_from_dataframe.py::test_from_dataframe_roundtrip[pandas-vaex]", - "test_from_dataframe.py::test_from_dataframe_roundtrip[vaex-modin]", "test_from_dataframe.py::test_from_dataframe_roundtrip[modin-vaex]", "test_from_dataframe.py::test_from_dataframe_roundtrip[vaex-pandas]", - # https://github.com/vaexio/vaex/pull/2150 - "test_column_object.py::test_size[vaex]", # https://github.com/rapidsai/cudf/issues/11389 "test_column_object.py::test_dtype[cudf]", - # https://github.com/vaexio/vaex/pull/2150 - "test_column_object.py::test_describe_categorical_on_categorical[vaex]", # Raises RuntimeError, which is technically correct, but the spec will # require TypeError soon. # See https://github.com/data-apis/dataframe-api/pull/74 @@ -75,6 +66,12 @@ def pytest_configure(config): # https://github.com/vaexio/vaex/issues/2118 # https://github.com/vaexio/vaex/issues/2139 "test_column_object.py::test_dtype[vaex]", + # SEGFAULT + "test_from_dataframe.py::test_from_dataframe_roundtrip[pandas-vaex]", + # modin flakiness + "test_from_dataframe.py::test_from_dataframe_roundtrip[modin-pandas]", + "test_from_dataframe.py::test_from_dataframe_roundtrip[modin-modin]", + "test_meta.py::test_frame_equal[modin]", ] assert not any(case in ci_xfail_ids for case in ci_skip_ids) # sanity check diff --git a/tests/wrappers.py b/tests/wrappers.py index b5f81ee..d977519 100644 --- a/tests/wrappers.py +++ b/tests/wrappers.py @@ -1,6 +1,7 @@ import re from copy import copy from typing import Any, Callable, Dict, List, NamedTuple, Set, Tuple +from unittest.mock import MagicMock import numpy as np import pytest @@ -79,7 +80,8 @@ def __repr__(self) -> str: return f"LibraryInfo(<{self.name}>)" -libinfo_params = [] +unskipped_params = [] +skipped_params = [] # pandas @@ -89,7 +91,9 @@ def __repr__(self) -> str: import pandas as pd from pandas.api.interchange import from_dataframe as pandas_from_dataframe except ImportError as e: - libinfo_params.append(pytest.param("pandas", marks=pytest.mark.skip(reason=e.msg))) + skipped_params.append( + pytest.param(None, id="pandas", marks=pytest.mark.skip(reason=e.msg)) + ) else: def pandas_mock_to_toplevel(mock_df: MockDataFrame) -> pd.DataFrame: @@ -112,7 +116,7 @@ def pandas_mock_to_toplevel(mock_df: MockDataFrame) -> pd.DataFrame: from_dataframe=pandas_from_dataframe, frame_equal=lambda df1, df2: df1.equals(df2), ) - libinfo_params.append(pytest.param(pandas_libinfo, id=pandas_libinfo.name)) + unskipped_params.append(pytest.param(pandas_libinfo, id=pandas_libinfo.name)) # vaex @@ -122,7 +126,9 @@ def pandas_mock_to_toplevel(mock_df: MockDataFrame) -> pd.DataFrame: import vaex from vaex.dataframe_protocol import from_dataframe_to_vaex as vaex_from_dataframe except ImportError as e: - libinfo_params.append(pytest.param("vaex", marks=pytest.mark.skip(reason=e.msg))) + skipped_params.append( + pytest.param(None, id="modin", marks=pytest.mark.skip(reason=e.msg)) + ) else: def vaex_mock_to_toplevel(mock_df: MockDataFrame) -> TopLevelDataFrame: @@ -172,7 +178,7 @@ def vaex_frame_equal(df1, df2) -> bool: allow_zero_cols=False, allow_zero_rows=False, ) - libinfo_params.append(pytest.param(vaex_libinfo, id=vaex_libinfo.name)) + unskipped_params.append(pytest.param(vaex_libinfo, id=vaex_libinfo.name)) # modin @@ -180,22 +186,16 @@ def vaex_frame_equal(df1, df2) -> bool: try: - import modin # noqa: F401 + # ethereal hacks! ---------------------------------------------------------- + import pandas - try: - import pandas - from pandas.core import base - from pandas.errors import DataError - except ImportError: - pass - else: - # One issue modin has with pandas upstream is an outdated import of an - # exception class, so we try monkey-patching the class to the old path. - setattr(base, "DataError", DataError) - # modin also hard checks for supported pandas versions, so we - # monkey-patch a supported version. - setattr(pandas, "__version__", "1.4.3") + setattr(pandas, "__getattr__", MagicMock()) + if not hasattr(pandas.DataFrame, "mad"): + setattr(pandas.DataFrame, "mad", MagicMock()) + setattr(pandas.core.indexing, "__getattr__", MagicMock()) + # ------------------------------------------------------------ end of hacks. + import modin # noqa: F401 import ray # Without local_mode=True, ray does not use our monkey-patched pandas @@ -208,7 +208,9 @@ def vaex_frame_equal(df1, df2) -> bool: from modin import pandas as mpd from modin.pandas.utils import from_dataframe as modin_from_dataframe except ImportError as e: - libinfo_params.append(pytest.param("modin", marks=pytest.mark.skip(reason=e.msg))) + skipped_params.append( + pytest.param(None, id="modin", marks=pytest.mark.skip(reason=e.msg)) + ) else: def modin_mock_to_toplevel(mock_df: MockDataFrame) -> mpd.DataFrame: @@ -259,7 +261,7 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool: # https://github.com/modin-project/modin/issues/4643 allow_zero_rows=False, ) - libinfo_params.append(pytest.param(modin_libinfo, id=modin_libinfo.name)) + unskipped_params.append(pytest.param(modin_libinfo, id=modin_libinfo.name)) # cuDF @@ -267,7 +269,7 @@ def modin_frame_equal(df1: mpd.DataFrame, df2: mpd.DataFrame) -> bool: try: - # cudf has a few issues with upstream pandas that we "fix" with a few hacks + # ethereal hacks! ---------------------------------------------------------- try: import pandas import pyarrow @@ -294,11 +296,14 @@ def register_extension_type(*a, **kw): setattr(pyarrow, "register_extension_type", register_extension_type) setattr(datetimes, "_guess_datetime_format", guess_datetime_format) setattr(pandas, "__version__", "1.4.3") + # ------------------------------------------------------------ end of hacks. import cudf from cudf.core.df_protocol import from_dataframe as cudf_from_dataframe except ImportError as e: - libinfo_params.append(pytest.param("cudf", marks=pytest.mark.skip(reason=e.msg))) + skipped_params.append( + pytest.param(None, id="cudf", marks=pytest.mark.skip(reason=e.msg)) + ) else: def cudf_mock_to_toplevel(mock_df: MockDataFrame) -> cudf.DataFrame: @@ -332,8 +337,11 @@ def cudf_mock_to_toplevel(mock_df: MockDataFrame) -> cudf.DataFrame: NominalDtype.UTF8, }, ) - libinfo_params.append(pytest.param(cudf_libinfo, id=cudf_libinfo.name)) + unskipped_params.append(pytest.param(cudf_libinfo, id=cudf_libinfo.name)) +libinfo_params = skipped_params + unskipped_params +ids = [p.id for p in libinfo_params] +assert len(set(ids)) == len(ids), f"ids: {ids}" # sanity check libname_to_libinfo: Dict[str, LibraryInfo] = {} for param in libinfo_params: @@ -341,3 +349,12 @@ def cudf_mock_to_toplevel(mock_df: MockDataFrame) -> cudf.DataFrame: libinfo = param.values[0] assert isinstance(libinfo, LibraryInfo) # for mypy libname_to_libinfo[libinfo.name] = libinfo + + +if __name__ == "__main__": + print(f"Wrapped libraries: {[p.id for p in unskipped_params]}") + if len(skipped_params) > 0: + print("Skipped libraries:") + for p in skipped_params: + m = next(m for m in p.marks if m.name == "skip") + print(f" {p.id}; reason={m.kwargs['reason']}")