Skip to content

Commit 24d3a87

Browse files
authored
Merge pull request #2873 from skshetry/fix-2871
s3: Check for all files in given path to match
2 parents f09517a + 37fcced commit 24d3a87

File tree

2 files changed

+50
-4
lines changed

2 files changed

+50
-4
lines changed

dvc/remote/s3.py

Lines changed: 21 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@
55
import os
66
import threading
77

8+
from botocore.exceptions import ClientError
89
from funcy import cached_property, wrap_prop
910

1011
from dvc.config import Config
@@ -207,10 +208,26 @@ def _list_paths(self, path_info, max_items=None):
207208
def list_cache_paths(self):
208209
return self._list_paths(self.path_info)
209210

211+
def isfile(self, path_info):
212+
if path_info.path.endswith("/"):
213+
return False
214+
215+
try:
216+
self.s3.head_object(Bucket=path_info.bucket, Key=path_info.path)
217+
except ClientError as exc:
218+
if exc.response["Error"]["Code"] != "404":
219+
raise
220+
return False
221+
222+
return True
223+
210224
def exists(self, path_info):
211-
dir_path = path_info / ""
212-
fname = next(self._list_paths(path_info, max_items=1), "")
213-
return path_info.path == fname or fname.startswith(dir_path.path)
225+
"""Check if the blob exists. If it does not exist,
226+
it could be a part of a directory path.
227+
228+
eg: if `data/file.txt` exists, check for `data` should return True
229+
"""
230+
return self.isfile(path_info) or self.isdir(path_info)
214231

215232
def makedirs(self, path_info):
216233
# We need to support creating empty directories, which means
@@ -279,7 +296,7 @@ def _generate_download_url(self, path_info, expires=3600):
279296
)
280297

281298
def walk_files(self, path_info, max_items=None):
282-
for fname in self._list_paths(path_info, max_items):
299+
for fname in self._list_paths(path_info / "", max_items):
283300
if fname.endswith("/"):
284301
continue
285302

tests/unit/remote/test_s3.py

Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,12 @@ def remote():
1313
├── data
1414
│ ├── alice
1515
│ ├── alpha
16+
│ ├── subdir-file.txt
1617
│ └── subdir
1718
│ ├── 1
1819
│ ├── 2
1920
│ └── 3
21+
├── data1.txt
2022
├── empty_dir
2123
├── empty_file
2224
└── foo
@@ -26,6 +28,7 @@ def remote():
2628
s3 = remote.s3
2729

2830
s3.create_bucket(Bucket="bucket")
31+
s3.put_object(Bucket="bucket", Key="data1.txt", Body=b"")
2932
s3.put_object(Bucket="bucket", Key="empty_dir/")
3033
s3.put_object(Bucket="bucket", Key="empty_file", Body=b"")
3134
s3.put_object(Bucket="bucket", Key="foo", Body=b"foo")
@@ -34,6 +37,9 @@ def remote():
3437
s3.put_object(Bucket="bucket", Key="data/subdir/1", Body=b"1")
3538
s3.put_object(Bucket="bucket", Key="data/subdir/2", Body=b"2")
3639
s3.put_object(Bucket="bucket", Key="data/subdir/3", Body=b"3")
40+
s3.put_object(
41+
Bucket="bucket", Key="data/subdir-file.txt", Body=b"subdir"
42+
)
3743

3844
yield remote
3945

@@ -66,6 +72,7 @@ def test_exists(remote):
6672
(True, "data/subdir/1"),
6773
(False, "data/al"),
6874
(False, "foo/"),
75+
(True, "data1.txt"),
6976
]
7077

7178
for expected, path in test_cases:
@@ -76,9 +83,11 @@ def test_walk_files(remote):
7683
files = [
7784
remote.path_info / "data/alice",
7885
remote.path_info / "data/alpha",
86+
remote.path_info / "data/subdir-file.txt",
7987
remote.path_info / "data/subdir/1",
8088
remote.path_info / "data/subdir/2",
8189
remote.path_info / "data/subdir/3",
90+
remote.path_info / "data1.txt",
8291
remote.path_info / "empty_file",
8392
remote.path_info / "foo",
8493
]
@@ -109,3 +118,23 @@ def test_makedirs(remote):
109118
assert not remote.exists(empty_dir)
110119
remote.makedirs(empty_dir)
111120
assert remote.exists(empty_dir)
121+
122+
123+
def test_isfile(remote):
124+
test_cases = [
125+
(False, "empty_dir/"),
126+
(True, "empty_file"),
127+
(True, "foo"),
128+
(True, "data/alice"),
129+
(True, "data/alpha"),
130+
(True, "data/subdir/1"),
131+
(True, "data/subdir/2"),
132+
(True, "data/subdir/3"),
133+
(False, "data/subdir/empty_dir/"),
134+
(False, "data/subdir/1/"),
135+
(False, "something-that-does-not-exist"),
136+
(False, "empty_dir"),
137+
]
138+
139+
for expected, path in test_cases:
140+
assert remote.isfile(remote.path_info / path) == expected

0 commit comments

Comments
 (0)