diff --git a/dvc/remote/gs.py b/dvc/remote/gs.py index 6f389ec4f6..5f27d32151 100644 --- a/dvc/remote/gs.py +++ b/dvc/remote/gs.py @@ -138,16 +138,45 @@ def remove(self, path_info): blob.delete() - def _list_paths(self, bucket, prefix): - for blob in self.gs.bucket(bucket).list_blobs(prefix=prefix): + def _list_paths(self, path_info, max_items=None): + for blob in self.gs.bucket(path_info.bucket).list_blobs( + prefix=path_info.path, max_results=max_items + ): yield blob.name def list_cache_paths(self): - return self._list_paths(self.path_info.bucket, self.path_info.path) + return self._list_paths(self.path_info) + + def walk_files(self, path_info): + for fname in self._list_paths(path_info / ""): + # skip nested empty directories + if fname.endswith("/"): + continue + yield path_info.replace(fname) + + def makedirs(self, path_info): + self.gs.bucket(path_info.bucket).blob( + (path_info / "").path + ).upload_from_string("") + + def isdir(self, path_info): + dir_path = path_info / "" + return bool(list(self._list_paths(dir_path, max_items=1))) + + def isfile(self, path_info): + if path_info.path.endswith("/"): + return False + + blob = self.gs.bucket(path_info.bucket).blob(path_info.path) + return blob.exists() def exists(self, path_info): - paths = set(self._list_paths(path_info.bucket, path_info.path)) - return any(path_info.path == path for path in paths) + """Check if the blob exists. If it does not exist, + it could be a part of a directory path. + + eg: if `data/file.txt` exists, check for `data` should return True + """ + return self.isfile(path_info) or self.isdir(path_info) def _upload(self, from_file, to_info, name=None, no_progress_bar=True): bucket = self.gs.bucket(to_info.bucket) diff --git a/tests/func/test_api.py b/tests/func/test_api.py index a970d3cd5c..b0e0085e31 100644 --- a/tests/func/test_api.py +++ b/tests/func/test_api.py @@ -3,60 +3,12 @@ import pytest -from .test_data_cloud import _should_test_aws -from .test_data_cloud import _should_test_azure -from .test_data_cloud import _should_test_gcp -from .test_data_cloud import _should_test_hdfs -from .test_data_cloud import _should_test_oss -from .test_data_cloud import _should_test_ssh -from .test_data_cloud import get_aws_url -from .test_data_cloud import get_azure_url -from .test_data_cloud import get_gcp_url -from .test_data_cloud import get_hdfs_url -from .test_data_cloud import get_local_url -from .test_data_cloud import get_oss_url -from .test_data_cloud import get_ssh_url from dvc import api from dvc.exceptions import FileMissingError from dvc.main import main from dvc.path_info import URLInfo from dvc.remote.config import RemoteConfig - - -# NOTE: staticmethod is only needed in Python 2 -class Local: - should_test = staticmethod(lambda: True) - get_url = staticmethod(get_local_url) - - -class S3: - should_test = staticmethod(_should_test_aws) - get_url = staticmethod(get_aws_url) - - -class GCP: - should_test = staticmethod(_should_test_gcp) - get_url = staticmethod(get_gcp_url) - - -class Azure: - should_test = staticmethod(_should_test_azure) - get_url = staticmethod(get_azure_url) - - -class OSS: - should_test = staticmethod(_should_test_oss) - get_url = staticmethod(get_oss_url) - - -class SSH: - should_test = staticmethod(_should_test_ssh) - get_url = staticmethod(get_ssh_url) - - -class HDFS: - should_test = staticmethod(_should_test_hdfs) - get_url = staticmethod(get_hdfs_url) +from tests.remotes import Azure, GCP, HDFS, Local, OSS, S3, SSH remote_params = [S3, GCP, Azure, OSS, SSH, HDFS] diff --git a/tests/func/test_data_cloud.py b/tests/func/test_data_cloud.py index ad0d33020b..0d56734668 100644 --- a/tests/func/test_data_cloud.py +++ b/tests/func/test_data_cloud.py @@ -1,13 +1,8 @@ import copy -import getpass import logging import os -import platform import shutil import uuid -from subprocess import CalledProcessError -from subprocess import check_output -from subprocess import Popen from unittest import SkipTest import pytest @@ -29,7 +24,6 @@ from dvc.remote.base import STATUS_DELETED from dvc.remote.base import STATUS_NEW from dvc.remote.base import STATUS_OK -from dvc.utils import env2bool from dvc.utils import file_md5 from dvc.utils.compat import str from dvc.utils.stage import dump_stage_file @@ -37,208 +31,30 @@ from tests.basic_env import TestDvc from tests.utils import spy - -TEST_REMOTE = "upstream" -TEST_SECTION = 'remote "{}"'.format(TEST_REMOTE) -TEST_CONFIG = { - Config.SECTION_CACHE: {}, - Config.SECTION_CORE: {Config.SECTION_CORE_REMOTE: TEST_REMOTE}, - TEST_SECTION: {Config.SECTION_REMOTE_URL: ""}, -} - -TEST_AWS_REPO_BUCKET = os.environ.get("DVC_TEST_AWS_REPO_BUCKET", "dvc-test") -TEST_GCP_REPO_BUCKET = os.environ.get("DVC_TEST_GCP_REPO_BUCKET", "dvc-test") -TEST_OSS_REPO_BUCKET = "dvc-test" - -TEST_GCP_CREDS_FILE = os.path.abspath( - os.environ.get( - "GOOGLE_APPLICATION_CREDENTIALS", - os.path.join("scripts", "ci", "gcp-creds.json"), - ) -) -# Ensure that absolute path is used -os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = TEST_GCP_CREDS_FILE - -TEST_GDRIVE_CLIENT_ID = ( - "719861249063-v4an78j9grdtuuuqg3lnm0sugna6v3lh.apps.googleusercontent.com" +from tests.remotes import ( + _should_test_aws, + _should_test_azure, + _should_test_gcp, + _should_test_gdrive, + _should_test_hdfs, + _should_test_oss, + _should_test_ssh, + TEST_CONFIG, + TEST_SECTION, + TEST_GCP_CREDS_FILE, + TEST_GDRIVE_CLIENT_ID, + TEST_GDRIVE_CLIENT_SECRET, + TEST_REMOTE, + get_aws_url, + get_azure_url, + get_gcp_url, + get_gdrive_url, + get_hdfs_url, + get_local_url, + get_oss_url, + get_ssh_url, + get_ssh_url_mocked, ) -TEST_GDRIVE_CLIENT_SECRET = "2fy_HyzSwkxkGzEken7hThXb" - - -def _should_test_aws(): - do_test = env2bool("DVC_TEST_AWS", undefined=None) - if do_test is not None: - return do_test - - if os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY"): - return True - - return False - - -def _should_test_gdrive(): - if os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA): - return True - - return False - - -def _should_test_gcp(): - do_test = env2bool("DVC_TEST_GCP", undefined=None) - if do_test is not None: - return do_test - - if not os.path.exists(TEST_GCP_CREDS_FILE): - return False - - try: - check_output( - [ - "gcloud", - "auth", - "activate-service-account", - "--key-file", - TEST_GCP_CREDS_FILE, - ] - ) - except (CalledProcessError, OSError): - return False - return True - - -def _should_test_azure(): - do_test = env2bool("DVC_TEST_AZURE", undefined=None) - if do_test is not None: - return do_test - - return os.getenv("AZURE_STORAGE_CONTAINER_NAME") and os.getenv( - "AZURE_STORAGE_CONNECTION_STRING" - ) - - -def _should_test_oss(): - do_test = env2bool("DVC_TEST_OSS", undefined=None) - if do_test is not None: - return do_test - - return ( - os.getenv("OSS_ENDPOINT") - and os.getenv("OSS_ACCESS_KEY_ID") - and os.getenv("OSS_ACCESS_KEY_SECRET") - ) - - -def _should_test_ssh(): - do_test = env2bool("DVC_TEST_SSH", undefined=None) - if do_test is not None: - return do_test - - # FIXME: enable on windows - if os.name == "nt": - return False - - try: - check_output(["ssh", "-o", "BatchMode=yes", "127.0.0.1", "ls"]) - except (CalledProcessError, IOError): - return False - - return True - - -def _should_test_hdfs(): - if platform.system() != "Linux": - return False - - try: - check_output( - ["hadoop", "version"], shell=True, executable=os.getenv("SHELL") - ) - except (CalledProcessError, IOError): - return False - - p = Popen( - "hadoop fs -ls hdfs://127.0.0.1/", - shell=True, - executable=os.getenv("SHELL"), - ) - p.communicate() - if p.returncode != 0: - return False - - return True - - -def get_local_storagepath(): - return TestDvc.mkdtemp() - - -def get_local_url(): - return get_local_storagepath() - - -def get_ssh_url(): - return "ssh://{}@127.0.0.1:22{}".format( - getpass.getuser(), get_local_storagepath() - ) - - -def get_ssh_url_mocked(user, port): - path = get_local_storagepath() - if os.name == "nt": - # NOTE: On Windows get_local_storagepath() will return an ntpath - # that looks something like `C:\some\path`, which is not compatible - # with SFTP paths [1], so we need to convert it to a proper posixpath. - # To do that, we should construct a posixpath that would be relative - # to the server's root. In our case our ssh server is running with - # `c:/` as a root, and our URL format requires absolute paths, so the - # resulting path would look like `/some/path`. - # - # [1]https://tools.ietf.org/html/draft-ietf-secsh-filexfer-13#section-6 - drive, path = os.path.splitdrive(path) - assert drive.lower() == "c:" - path = path.replace("\\", "/") - url = "ssh://{}@127.0.0.1:{}{}".format(user, port, path) - return url - - -def get_hdfs_url(): - return "hdfs://{}@127.0.0.1{}".format( - getpass.getuser(), get_local_storagepath() - ) - - -def get_aws_storagepath(): - return TEST_AWS_REPO_BUCKET + "/" + str(uuid.uuid4()) - - -def get_aws_url(): - return "s3://" + get_aws_storagepath() - - -def get_gdrive_url(): - return "gdrive://root/" + str(uuid.uuid4()) - - -def get_gcp_storagepath(): - return TEST_GCP_REPO_BUCKET + "/" + str(uuid.uuid4()) - - -def get_gcp_url(): - return "gs://" + get_gcp_storagepath() - - -def get_azure_url(): - container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME") - assert container_name is not None - return "azure://{}/{}".format(container_name, str(uuid.uuid4())) - - -def get_oss_storagepath(): - return "{}/{}".format(TEST_OSS_REPO_BUCKET, (uuid.uuid4())) - - -def get_oss_url(): - return "oss://{}".format(get_oss_storagepath()) class TestDataCloud(TestDvc): diff --git a/tests/func/test_remote.py b/tests/func/test_remote.py index 8e8f00deeb..23f318861b 100644 --- a/tests/func/test_remote.py +++ b/tests/func/test_remote.py @@ -4,14 +4,13 @@ import configobj from mock import patch -from .test_data_cloud import get_local_url from dvc.config import Config from dvc.main import main from dvc.path_info import PathInfo from dvc.remote import RemoteLOCAL from dvc.remote.base import RemoteBASE from tests.basic_env import TestDvc -from tests.func.test_data_cloud import get_local_storagepath +from tests.remotes import get_local_url, get_local_storagepath class TestRemote(TestDvc): diff --git a/tests/func/test_repro.py b/tests/func/test_repro.py index 4e46fdde90..a85995fe9e 100644 --- a/tests/func/test_repro.py +++ b/tests/func/test_repro.py @@ -36,13 +36,15 @@ from dvc.utils.stage import dump_stage_file from dvc.utils.stage import load_stage_file from tests.basic_env import TestDvc -from tests.func.test_data_cloud import _should_test_aws -from tests.func.test_data_cloud import _should_test_gcp -from tests.func.test_data_cloud import _should_test_hdfs -from tests.func.test_data_cloud import _should_test_ssh -from tests.func.test_data_cloud import get_ssh_url -from tests.func.test_data_cloud import TEST_AWS_REPO_BUCKET -from tests.func.test_data_cloud import TEST_GCP_REPO_BUCKET +from tests.remotes import ( + _should_test_aws, + _should_test_gcp, + _should_test_hdfs, + _should_test_ssh, + get_ssh_url, + TEST_AWS_REPO_BUCKET, + TEST_GCP_REPO_BUCKET, +) from tests.utils.httpd import StaticFileServer, ContentMD5Handler diff --git a/tests/func/test_s3.py b/tests/func/test_s3.py index 7a58f9c527..a2cdb26ef3 100644 --- a/tests/func/test_s3.py +++ b/tests/func/test_s3.py @@ -5,7 +5,7 @@ from moto import mock_s3 from dvc.remote.s3 import RemoteS3 -from tests.func.test_data_cloud import get_aws_url +from tests.remotes import get_aws_url # from https://github.com/spulec/moto/blob/v1.3.5/tests/test_s3/test_s3.py#L40 diff --git a/tests/remotes.py b/tests/remotes.py new file mode 100644 index 0000000000..679f3f93b0 --- /dev/null +++ b/tests/remotes.py @@ -0,0 +1,290 @@ +import os +import platform +import uuid +import getpass + +from contextlib import contextmanager +from subprocess import CalledProcessError, check_output, Popen + +from dvc.utils import env2bool +from dvc.config import Config +from dvc.remote import RemoteGDrive +from dvc.remote.gs import RemoteGS +from dvc.remote.s3 import RemoteS3 +from tests.basic_env import TestDvc + +from moto.s3 import mock_s3 + + +TEST_REMOTE = "upstream" +TEST_SECTION = 'remote "{}"'.format(TEST_REMOTE) +TEST_CONFIG = { + Config.SECTION_CACHE: {}, + Config.SECTION_CORE: {Config.SECTION_CORE_REMOTE: TEST_REMOTE}, + TEST_SECTION: {Config.SECTION_REMOTE_URL: ""}, +} + +TEST_AWS_REPO_BUCKET = os.environ.get("DVC_TEST_AWS_REPO_BUCKET", "dvc-test") +TEST_GCP_REPO_BUCKET = os.environ.get("DVC_TEST_GCP_REPO_BUCKET", "dvc-test") +TEST_OSS_REPO_BUCKET = "dvc-test" + +TEST_GCP_CREDS_FILE = os.path.abspath( + os.environ.get( + "GOOGLE_APPLICATION_CREDENTIALS", + os.path.join("scripts", "ci", "gcp-creds.json"), + ) +) +# Ensure that absolute path is used +os.environ["GOOGLE_APPLICATION_CREDENTIALS"] = TEST_GCP_CREDS_FILE + +TEST_GDRIVE_CLIENT_ID = ( + "719861249063-v4an78j9grdtuuuqg3lnm0sugna6v3lh.apps.googleusercontent.com" +) +TEST_GDRIVE_CLIENT_SECRET = "2fy_HyzSwkxkGzEken7hThXb" + + +def _should_test_aws(): + do_test = env2bool("DVC_TEST_AWS", undefined=None) + if do_test is not None: + return do_test + + if os.getenv("AWS_ACCESS_KEY_ID") and os.getenv("AWS_SECRET_ACCESS_KEY"): + return True + + return False + + +def _should_test_gdrive(): + if os.getenv(RemoteGDrive.GDRIVE_USER_CREDENTIALS_DATA): + return True + + return False + + +def _should_test_gcp(): + do_test = env2bool("DVC_TEST_GCP", undefined=None) + if do_test is not None: + return do_test + + if not os.path.exists(TEST_GCP_CREDS_FILE): + return False + + try: + check_output( + [ + "gcloud", + "auth", + "activate-service-account", + "--key-file", + TEST_GCP_CREDS_FILE, + ] + ) + except (CalledProcessError, OSError): + return False + return True + + +def _should_test_azure(): + do_test = env2bool("DVC_TEST_AZURE", undefined=None) + if do_test is not None: + return do_test + + return os.getenv("AZURE_STORAGE_CONTAINER_NAME") and os.getenv( + "AZURE_STORAGE_CONNECTION_STRING" + ) + + +def _should_test_oss(): + do_test = env2bool("DVC_TEST_OSS", undefined=None) + if do_test is not None: + return do_test + + return ( + os.getenv("OSS_ENDPOINT") + and os.getenv("OSS_ACCESS_KEY_ID") + and os.getenv("OSS_ACCESS_KEY_SECRET") + ) + + +def _should_test_ssh(): + do_test = env2bool("DVC_TEST_SSH", undefined=None) + if do_test is not None: + return do_test + + # FIXME: enable on windows + if os.name == "nt": + return False + + try: + check_output(["ssh", "-o", "BatchMode=yes", "127.0.0.1", "ls"]) + except (CalledProcessError, IOError): + return False + + return True + + +def _should_test_hdfs(): + if platform.system() != "Linux": + return False + + try: + check_output( + ["hadoop", "version"], shell=True, executable=os.getenv("SHELL") + ) + except (CalledProcessError, IOError): + return False + + p = Popen( + "hadoop fs -ls hdfs://127.0.0.1/", + shell=True, + executable=os.getenv("SHELL"), + ) + p.communicate() + if p.returncode != 0: + return False + + return True + + +def get_local_storagepath(): + return TestDvc.mkdtemp() + + +def get_local_url(): + return get_local_storagepath() + + +def get_ssh_url(): + return "ssh://{}@127.0.0.1:22{}".format( + getpass.getuser(), get_local_storagepath() + ) + + +def get_ssh_url_mocked(user, port): + path = get_local_storagepath() + if os.name == "nt": + # NOTE: On Windows get_local_storagepath() will return an ntpath + # that looks something like `C:\some\path`, which is not compatible + # with SFTP paths [1], so we need to convert it to a proper posixpath. + # To do that, we should construct a posixpath that would be relative + # to the server's root. In our case our ssh server is running with + # `c:/` as a root, and our URL format requires absolute paths, so the + # resulting path would look like `/some/path`. + # + # [1]https://tools.ietf.org/html/draft-ietf-secsh-filexfer-13#section-6 + drive, path = os.path.splitdrive(path) + assert drive.lower() == "c:" + path = path.replace("\\", "/") + url = "ssh://{}@127.0.0.1:{}{}".format(user, port, path) + return url + + +def get_hdfs_url(): + return "hdfs://{}@127.0.0.1{}".format( + getpass.getuser(), get_local_storagepath() + ) + + +def get_aws_storagepath(): + return TEST_AWS_REPO_BUCKET + "/" + str(uuid.uuid4()) + + +def get_aws_url(): + return "s3://" + get_aws_storagepath() + + +def get_gdrive_url(): + return "gdrive://root/" + str(uuid.uuid4()) + + +def get_gcp_storagepath(): + return TEST_GCP_REPO_BUCKET + "/" + str(uuid.uuid4()) + + +def get_gcp_url(): + return "gs://" + get_gcp_storagepath() + + +def get_azure_url(): + container_name = os.getenv("AZURE_STORAGE_CONTAINER_NAME") + assert container_name is not None + return "azure://{}/{}".format(container_name, str(uuid.uuid4())) + + +def get_oss_storagepath(): + return "{}/{}".format(TEST_OSS_REPO_BUCKET, (uuid.uuid4())) + + +def get_oss_url(): + return "oss://{}".format(get_oss_storagepath()) + + +# NOTE: staticmethod is only needed in Python 2 +class Local: + should_test = staticmethod(lambda: True) + get_url = staticmethod(get_local_url) + + +class S3: + should_test = staticmethod(_should_test_aws) + get_url = staticmethod(get_aws_url) + + +class S3Mocked: + should_test = staticmethod(lambda: True) + get_url = staticmethod(get_aws_url) + + @classmethod + @contextmanager + def remote(cls): + with mock_s3(): + remote = RemoteS3(None, {"url": cls.get_url()}) + yield remote + + @staticmethod + def put_objects(remote, objects): + s3 = remote.s3 + bucket = remote.path_info.bucket + s3.create_bucket(Bucket=bucket) + for key, body in objects.items(): + s3.put_object( + Bucket=bucket, Key=(remote.path_info / key).path, Body=body + ) + + +class GCP: + should_test = staticmethod(_should_test_gcp) + get_url = staticmethod(get_gcp_url) + + @classmethod + @contextmanager + def remote(cls): + remote = RemoteGS(None, {"url": cls.get_url()}) + yield remote + + @staticmethod + def put_objects(remote, objects): + client = remote.gs + bucket = client.get_bucket(remote.path_info.bucket) + for key, body in objects.items(): + bucket.blob((remote.path_info / key).path).upload_from_string(body) + + +class Azure: + should_test = staticmethod(_should_test_azure) + get_url = staticmethod(get_azure_url) + + +class OSS: + should_test = staticmethod(_should_test_oss) + get_url = staticmethod(get_oss_url) + + +class SSH: + should_test = staticmethod(_should_test_ssh) + get_url = staticmethod(get_ssh_url) + + +class HDFS: + should_test = staticmethod(_should_test_hdfs) + get_url = staticmethod(get_hdfs_url) diff --git a/tests/unit/remote/ssh/test_ssh.py b/tests/unit/remote/ssh/test_ssh.py index 95efa231c6..630436cd45 100644 --- a/tests/unit/remote/ssh/test_ssh.py +++ b/tests/unit/remote/ssh/test_ssh.py @@ -9,7 +9,7 @@ from dvc.remote.ssh import RemoteSSH from dvc.system import System -from tests.func.test_data_cloud import get_ssh_url_mocked +from tests.remotes import get_ssh_url_mocked class TestRemoteSSH(TestCase): diff --git a/tests/unit/remote/test_s3.py b/tests/unit/remote/test_remote_dir.py similarity index 59% rename from tests/unit/remote/test_s3.py rename to tests/unit/remote/test_remote_dir.py index bb0abe594e..583898a60d 100644 --- a/tests/unit/remote/test_s3.py +++ b/tests/unit/remote/test_remote_dir.py @@ -1,49 +1,38 @@ # -*- coding: utf-8 -*- import pytest -from moto import mock_s3 from dvc.remote.s3 import RemoteS3 +from tests.remotes import GCP, S3Mocked -@pytest.fixture -def remote(): - """Returns a RemoteS3 connected to a bucket with the following structure: - - bucket - ├── data - │ ├── alice - │ ├── alpha - │ ├── subdir-file.txt - │ └── subdir - │ ├── 1 - │ ├── 2 - │ └── 3 - ├── data1.txt - ├── empty_dir - ├── empty_file - └── foo - """ - with mock_s3(): - remote = RemoteS3(None, {"url": "s3://bucket", "region": "us-east-1"}) - s3 = remote.s3 - - s3.create_bucket(Bucket="bucket") - s3.put_object(Bucket="bucket", Key="data1.txt", Body=b"") - s3.put_object(Bucket="bucket", Key="empty_dir/") - s3.put_object(Bucket="bucket", Key="empty_file", Body=b"") - s3.put_object(Bucket="bucket", Key="foo", Body=b"foo") - s3.put_object(Bucket="bucket", Key="data/alice", Body=b"alice") - s3.put_object(Bucket="bucket", Key="data/alpha", Body=b"alpha") - s3.put_object(Bucket="bucket", Key="data/subdir/1", Body=b"1") - s3.put_object(Bucket="bucket", Key="data/subdir/2", Body=b"2") - s3.put_object(Bucket="bucket", Key="data/subdir/3", Body=b"3") - s3.put_object( - Bucket="bucket", Key="data/subdir-file.txt", Body=b"subdir" - ) +remotes = [GCP, S3Mocked] + +FILE_WITH_CONTENTS = { + "data1.txt": "", + "empty_dir/": "", + "empty_file": "", + "foo": "foo", + "data/alice": "alice", + "data/alpha": "alpha", + "data/subdir-file.txt": "subdir", + "data/subdir/1": "1", + "data/subdir/2": "2", + "data/subdir/3": "3", + "data/subdir/empty_dir/": "", + "data/subdir/empty_file": "", +} + +@pytest.fixture +def remote(request): + if not request.param.should_test(): + raise pytest.skip() + with request.param.remote() as remote: + request.param.put_objects(remote, FILE_WITH_CONTENTS) yield remote +@pytest.mark.parametrize("remote", remotes, indirect=True) def test_isdir(remote): test_cases = [ (True, "data"), @@ -60,6 +49,7 @@ def test_isdir(remote): assert remote.isdir(remote.path_info / path) == expected +@pytest.mark.parametrize("remote", remotes, indirect=True) def test_exists(remote): test_cases = [ (True, "data"), @@ -79,6 +69,7 @@ def test_exists(remote): assert remote.exists(remote.path_info / path) == expected +@pytest.mark.parametrize("remote", remotes, indirect=True) def test_walk_files(remote): files = [ remote.path_info / "data/alice", @@ -87,14 +78,13 @@ def test_walk_files(remote): remote.path_info / "data/subdir/1", remote.path_info / "data/subdir/2", remote.path_info / "data/subdir/3", - remote.path_info / "data1.txt", - remote.path_info / "empty_file", - remote.path_info / "foo", + remote.path_info / "data/subdir/empty_file", ] - assert list(remote.walk_files(remote.path_info)) == files + assert list(remote.walk_files(remote.path_info / "data")) == files +@pytest.mark.parametrize("remote", [S3Mocked], indirect=True) def test_copy_preserve_etag_across_buckets(remote): s3 = remote.s3 s3.create_bucket(Bucket="another") @@ -106,20 +96,23 @@ def test_copy_preserve_etag_across_buckets(remote): remote.copy(from_info, to_info) - from_etag = RemoteS3.get_etag(s3, "bucket", "foo") + from_etag = RemoteS3.get_etag(s3, from_info.bucket, from_info.path) to_etag = RemoteS3.get_etag(s3, "another", "foo") assert from_etag == to_etag +@pytest.mark.parametrize("remote", remotes, indirect=True) def test_makedirs(remote): empty_dir = remote.path_info / "empty_dir" / "" remote.remove(empty_dir) assert not remote.exists(empty_dir) remote.makedirs(empty_dir) assert remote.exists(empty_dir) + assert remote.isdir(empty_dir) +@pytest.mark.parametrize("remote", [GCP, S3Mocked], indirect=True) def test_isfile(remote): test_cases = [ (False, "empty_dir/"), @@ -131,8 +124,9 @@ def test_isfile(remote): (True, "data/subdir/2"), (True, "data/subdir/3"), (False, "data/subdir/empty_dir/"), - (False, "data/subdir/1/"), + (True, "data/subdir/empty_file"), (False, "something-that-does-not-exist"), + (False, "data/subdir/empty-file/"), (False, "empty_dir"), ]