diff --git a/.github/workflows/main.yaml b/.github/workflows/main.yaml index f1b0e614f..5ee46239a 100644 --- a/.github/workflows/main.yaml +++ b/.github/workflows/main.yaml @@ -8,56 +8,47 @@ on: jobs: linux: - name: ${{ matrix.TOXENV }}-pytest + name: ${{ matrix.PY }}-pytest runs-on: ubuntu-latest strategy: fail-fast: false matrix: - TOXENV: [py38, py39, py310, s3fs, gcsfs] + PY: ["3.8", "3.9", "3.10"] env: - TOXENV: ${{ matrix.TOXENV }} CIRUN: true steps: - name: Checkout uses: actions/checkout@v2 - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + - name: Setup conda + uses: mamba-org/provision-with-micromamba@main with: - auto-update-conda: true - auto-activate-base: false - activate-environment: test_env environment-file: ci/environment-py38.yml + extra-specs: python=${{ matrix.PY }} - name: Run Tests shell: bash -l {0} run: | - tox -v + pytest -v win: - name: ${{ matrix.TOXENV }}-pytest-win + name: pytest-win runs-on: windows-2019 strategy: fail-fast: false - matrix: - TOXENV: [py39] env: - TOXENV: ${{ matrix.TOXENV }} CIRUN: true steps: - name: Checkout uses: actions/checkout@v2 - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + - name: Setup conda + uses: mamba-org/provision-with-micromamba@main with: - auto-update-conda: true - auto-activate-base: false - activate-environment: test_env environment-file: ci/environment-win.yml - name: Run Tests @@ -81,12 +72,9 @@ jobs: - name: Checkout uses: actions/checkout@v2 - - name: Setup Miniconda - uses: conda-incubator/setup-miniconda@v2 + - name: Setup conda + uses: mamba-org/provision-with-micromamba@main with: - auto-update-conda: true - auto-activate-base: false - activate-environment: test_env environment-file: ci/environment-downstream.yml - name: Local install @@ -97,10 +85,13 @@ jobs: git tag -a 3000 -m "fake" pip install -e . + - name: Clone s3fs + shell: bash -l {0} + run: git clone https://github.com/fsspec/s3fs + - name: Install s3fs shell: bash -l {0} run: | - git clone https://github.com/fsspec/s3fs pip install -e ./s3fs --no-deps - name: Run fsspec tests @@ -117,3 +108,40 @@ jobs: shell: bash -l {0} run: | pytest -v dask/dask/bytes + + fsspec_friends: + name: ${{ matrix.FRIEND }}-pytest + runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + FRIEND: [gcsfs, s3fs] + + env: + CIRUN: true + BOTO_CONFIG: /dev/null + AWS_ACCESS_KEY_ID: foobar_key + AWS_SECRET_ACCESS_KEY: foobar_secret + + steps: + - name: Checkout + uses: actions/checkout@v2 + + - name: Setup conda + uses: mamba-org/provision-with-micromamba@main + with: + environment-file: ci/environment-friends.yml + + - name: Clone + shell: bash -l {0} + run: git clone https://github.com/fsspec/${{ matrix.FRIEND }} + + - name: Install + shell: bash -l {0} + run: | + pip install -e . --no-deps + pip install -e ./${{ matrix.FRIEND }} --no-deps + + - name: Test + shell: bash -l {0} + run: pytest -v ${{ matrix.FRIEND }} diff --git a/README.md b/README.md index 0dbf41295..b309858ea 100644 --- a/README.md +++ b/README.md @@ -33,26 +33,25 @@ Please refer to [RTD](https://filesystem-spec.readthedocs.io/en/latest/?badge=la ## Develop -fsspec uses [tox](https://tox.readthedocs.io/en/latest/) and -[tox-conda](https://github.com/tox-dev/tox-conda) to manage dev and test -environments. First, install conda with tox and tox-conda in a base environment -(eg. ``conda install -c conda-forge tox tox-conda``). Calls to ``tox`` can then be -used to configure a development environment and run tests. - -First, setup a development conda environment via ``tox -e {env}`` where ``env`` is one of ``{py38,py39,py310}``. -This will install fsspec dependencies, test & dev tools, and install fsspec in develop -mode. You may activate the dev environment under ``.tox/{env}`` via ``conda activate .tox/{env}``. +fsspec uses GitHub Actions for CI. Environment files can be found +in the "ci/" directory. Note that the main environment is called "py38", +but it is expected that the version of python installed be adjustable at +CI runtime. For local use, pick a version suitable for you. ### Testing Tests can be run in the dev environment, if activated, via ``pytest fsspec``. -Alternatively, the full fsspec test suite can also be run via ``tox``, which will -also build the appropriate environment (see above), with the environment specified -by the TOXENV environment variable. - The full fsspec suite requires a system-level docker, docker-compose, and fuse -installation. +installation. If only making changes to one backend implementation, it is +not generally necessary to run all tests locally. + +It is expected that contributors ensure that any change to fsspec does not +cause issues or regressions for either other fsspec-related packages such +as gcsfs and s3fs, nor for downstream users of fsspec. The "downstream" CI +run and corresponding environment file run a set of tests from the dask +test suite, and very minimal tests against pandas and zarr from the test_dowstream.py +module in this repo. ### Code Formatting @@ -62,7 +61,6 @@ Run ``black fsspec`` from the root of the filesystem_spec repository to auto-format your code. Additionally, many editors have plugins that will apply ``black`` as you edit files. ``black`` is included in the ``tox`` environments. - Optionally, you may wish to setup [pre-commit hooks](https://pre-commit.com) to automatically run ``black`` when you make a git commit. Run ``pre-commit install --install-hooks`` from the root of the diff --git a/ci/environment-downstream.yml b/ci/environment-downstream.yml index 923ce8806..20230b75c 100644 --- a/ci/environment-downstream.yml +++ b/ci/environment-downstream.yml @@ -1,7 +1,6 @@ name: test_env channels: - conda-forge - - defaults dependencies: - python=3.9 - dask diff --git a/ci/environment-friends.yml b/ci/environment-friends.yml new file mode 100644 index 000000000..f150f407f --- /dev/null +++ b/ci/environment-friends.yml @@ -0,0 +1,28 @@ +name: test_env +channels: + - conda-forge +dependencies: + - python=3.9 + - pytest + - pytest-asyncio + - pytest-benchmark + - pytest-cov + - pytest-mock + - pytest-vcr + - pip + - pytest + - ujson + - requests + - decorator + - google-auth + - aiohttp + - google-auth-oauthlib + - flake8 + - black + - google-cloud-core + - google-api-core + - google-api-python-client + - httpretty + - aiobotocore + - "moto>=4" + - flask diff --git a/ci/environment-py38.yml b/ci/environment-py38.yml index 2573be43c..6f9ae68b3 100644 --- a/ci/environment-py38.yml +++ b/ci/environment-py38.yml @@ -1,8 +1,40 @@ name: test_env channels: - conda-forge - - defaults dependencies: - - python=3.8 - - tox - - tox-conda + # - python=3.8 # set by env + - pip + - paramiko + - requests + - zstandard + - python-snappy + - aiohttp + - lz4 + - distributed + - dask + - pyarrow + - panel + - notebook + - pygit2 + - git + - s3fs + - pyftpdlib + - cloudpickle + - pytest + - pytest-asyncio + - pytest-benchmark + - pytest-cov + - pytest-mock + - pytest-vcr + - py + - fusepy + - tomli < 2 + - msgpack-python + - python-libarchive-c + - numpy + - nomkl + - jinja2 + - tqdm + - pip: + - hadoop-test-cluster + - smbprotocol diff --git a/ci/environment-win.yml b/ci/environment-win.yml index 1b72f97fd..e3fe7319f 100644 --- a/ci/environment-win.yml +++ b/ci/environment-win.yml @@ -1,8 +1,8 @@ name: test_env channels: - conda-forge - - defaults dependencies: + - python=3.9 - aiohttp - pip - requests diff --git a/fsspec/implementations/reference.py b/fsspec/implementations/reference.py index f1efb7446..2e4d9388c 100644 --- a/fsspec/implementations/reference.py +++ b/fsspec/implementations/reference.py @@ -21,6 +21,16 @@ logger = logging.getLogger("fsspec.reference") +class ReferenceNotReachable(RuntimeError): + def __init__(self, reference, target, *args): + super().__init__(*args) + self.reference = reference + self.target = target + + def __str__(self): + return f'Reference "{self.reference}" failed to fetch target {self.target}' + + def _first(d): return list(d.values())[0] @@ -213,7 +223,10 @@ def loop(self): def _cat_common(self, path, start=None, end=None): path = self._strip_protocol(path) logger.debug(f"cat: {path}") - part = self.references[path] + try: + part = self.references[path] + except KeyError: + raise FileNotFoundError(path) if isinstance(part, str): part = part.encode() if isinstance(part, bytes): @@ -254,7 +267,10 @@ async def _cat_file(self, path, start=None, end=None, **kwargs): if isinstance(part_or_url, bytes): return part_or_url[start:end] protocol, _ = split_protocol(part_or_url) - return await self.fss[protocol]._cat_file(part_or_url, start=start, end=end) + try: + await self.fss[protocol]._cat_file(part_or_url, start=start, end=end) + except Exception as e: + raise ReferenceNotReachable(path, part_or_url) from e def cat_file(self, path, start=None, end=None, **kwargs): part_or_url, start0, end0 = self._cat_common(path, start=start, end=end) @@ -262,7 +278,10 @@ def cat_file(self, path, start=None, end=None, **kwargs): return part_or_url[start:end] protocol, _ = split_protocol(part_or_url) # TODO: start and end should be passed to cat_file, not sliced - return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0) + try: + return self.fss[protocol].cat_file(part_or_url, start=start0, end=end0) + except Exception as e: + raise ReferenceNotReachable(path, part_or_url) from e def pipe_file(self, path, value, **_): """Temporarily add binary data or reference as a file""" @@ -360,6 +379,16 @@ def cat(self, path, recursive=False, on_error="raise", **kwargs): elif np == u and s >= ns and e <= ne: out[p] = b[s - ns : (e - ne) or None] + for k, v in out.copy().items(): + if isinstance(v, Exception): + ex = out[k] + new_ex = ReferenceNotReachable(k, self.references[k]) + new_ex.__cause__ = ex + if on_error == "raise": + raise new_ex + elif on_error != "omit": + out[k] = new_ex + if len(out) == 1 and isinstance(path, str) and "*" not in path: return _first(out) return out diff --git a/fsspec/implementations/tests/test_cached.py b/fsspec/implementations/tests/test_cached.py index 51c451e8c..2b6d53812 100644 --- a/fsspec/implementations/tests/test_cached.py +++ b/fsspec/implementations/tests/test_cached.py @@ -884,7 +884,7 @@ def test_expiry(): assert detail["time"] - start_time > 0.09 -def test_equality(): +def test_equality(tmpdir): """Test sane behaviour for equality and hashing. Make sure that different CachingFileSystem only test equal to each other @@ -897,9 +897,11 @@ def test_equality(): from fsspec.implementations.local import LocalFileSystem lfs = LocalFileSystem() - cfs1 = CachingFileSystem(fs=lfs, cache_storage="raspberry") - cfs2 = CachingFileSystem(fs=lfs, cache_storage="banana") - cfs3 = CachingFileSystem(fs=lfs, cache_storage="banana") + dir1 = f"{tmpdir}/raspberry" + dir2 = f"{tmpdir}/banana" + cfs1 = CachingFileSystem(fs=lfs, cache_storage=dir1) + cfs2 = CachingFileSystem(fs=lfs, cache_storage=dir2) + cfs3 = CachingFileSystem(fs=lfs, cache_storage=dir2) assert cfs1 == cfs1 assert cfs1 != cfs2 assert cfs1 != cfs3 diff --git a/fsspec/implementations/tests/test_reference.py b/fsspec/implementations/tests/test_reference.py index f0b04dd7a..0c1f947d8 100644 --- a/fsspec/implementations/tests/test_reference.py +++ b/fsspec/implementations/tests/test_reference.py @@ -4,7 +4,7 @@ import fsspec from fsspec.implementations.local import LocalFileSystem -from fsspec.implementations.reference import _unmodel_hdf5 +from fsspec.implementations.reference import ReferenceNotReachable, _unmodel_hdf5 from fsspec.tests.conftest import data, realfile, reset_files, server, win # noqa: F401 @@ -404,3 +404,46 @@ def test_cat_file_ranges(m): assert fs.cat_file("d", start=1) == other[4:10][1:] assert fs.cat_file("d", start=-5) == other[4:10][-5:] assert fs.cat_file("d", 1, -3) == other[4:10][1:-3] + + +def test_cat_missing(m): + other = b"other test data" + m.pipe("/b", other) + fs = fsspec.filesystem( + "reference", + fo={ + "c": ["memory://b"], + "d": ["memory://unknown", 4, 6], + }, + ) + with pytest.raises(FileNotFoundError): + fs.cat("notafile") + + with pytest.raises(FileNotFoundError): + fs.cat(["notone", "nottwo"]) + + mapper = fs.get_mapper("") + + with pytest.raises(KeyError): + mapper["notakey"] + + with pytest.raises(KeyError): + mapper.getitems(["notone", "nottwo"]) + + with pytest.raises(ReferenceNotReachable) as ex: + fs.cat("d") + assert ex.value.__cause__ + out = fs.cat("d", on_error="return") + assert isinstance(out, ReferenceNotReachable) + + with pytest.raises(ReferenceNotReachable): + mapper["d"] + + with pytest.raises(ReferenceNotReachable): + mapper.getitems(["c", "d"]) + + out = mapper.getitems(["c", "d"], on_error="return") + assert isinstance(out["d"], ReferenceNotReachable) + + out = mapper.getitems(["c", "d"], on_error="omit") + assert list(out) == ["c"] diff --git a/fsspec/spec.py b/fsspec/spec.py index aeb796391..a49c3d719 100644 --- a/fsspec/spec.py +++ b/fsspec/spec.py @@ -776,7 +776,7 @@ def cat_ranges( try: out.append(self.cat_file(p, s, e)) except Exception as e: - if False: # on_error == "return": + if on_error == "return": out.append(e) else: raise diff --git a/tox.ini b/tox.ini deleted file mode 100644 index edc982633..000000000 --- a/tox.ini +++ /dev/null @@ -1,111 +0,0 @@ -# content of: tox.ini , put in same dir as setup.py -[tox] -envlist = {py38,py39,py310} - -[core] -conda_channels= - conda-forge - defaults -conda_deps= - pip - paramiko - requests - zstandard - python-snappy - aiohttp - lz4 - distributed - dask - 'pyarrow >= 1' - panel - notebook - pygit2 - git - s3fs - pyftpdlib - cloudpickle - pytest - pytest-asyncio - pytest-benchmark - pytest-cov - pytest-mock - pytest-vcr - py - fusepy - tomli < 2 - msgpack-python - python-libarchive-c - numpy - nomkl - jinja2 - tqdm -deps= - hadoop-test-cluster==0.1.0 - smbprotocol - -[testenv] -description=Run test suite against target versions. -conda_channels= - {[core]conda_channels} -conda_deps= - {[core]conda_deps} -deps= - {[core]deps} -commands = - pytest --cov=fsspec -v -r s {posargs} -passenv = CIRUN - -[testenv:s3fs] -description=Run s3fs (@master) test suite against fsspec. -extras=s3 -conda_channels= - defaults - conda-forge -conda_deps= - {[core]conda_deps} - httpretty - aiobotocore - "moto>=4" - flask -changedir=.tox/s3fs/tmp -whitelist_externals= - rm - git -setenv= - BOTO_CONFIG=/dev/null - AWS_ACCESS_KEY_ID=foobar_key - AWS_SECRET_ACCESS_KEY=foobar_secret -commands= - rm -rf s3fs - git clone https://github.com/fsspec/s3fs - pytest -vv s3fs/s3fs - -[testenv:gcsfs] -description=Run gcsfs (@master) test suite against fsspec. -extras=gcs -conda_channels= - conda-forge - defaults -conda_deps= - pytest - ujson - requests - decorator - google-auth - aiohttp - google-auth-oauthlib - flake8 - black - google-cloud-core - google-api-core - google-api-python-client -changedir=.tox/gcsfs/tmp -whitelist_externals= - rm - git -setenv= - GOOGLE_APPLICATION_CREDENTIALS=gcsfs/gcsfs/tests/fake-secret.json -commands= - rm -rf gcsfs - git clone https://github.com/fsspec/gcsfs - pytest -vv gcsfs/gcsfs -k 'not fuse'