Skip to content

Commit 55fbdeb

Browse files
committed
dvc: drop cached external outputs
Fixes iterative#9531 Docs iterative/dvc.org#4574 Kudos @dberenbaum
1 parent 262419e commit 55fbdeb

20 files changed

+63
-305
lines changed

dvc/cachemgr.py

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -32,13 +32,6 @@ def _get_odb(
3232

3333
class CacheManager:
3434
CACHE_DIR = "cache"
35-
CLOUD_SCHEMES = [
36-
Schemes.S3,
37-
Schemes.GS,
38-
Schemes.SSH,
39-
Schemes.HDFS,
40-
Schemes.WEBHDFS,
41-
]
4235
FILES_DIR = "files"
4336

4437
def __init__(self, repo):
@@ -91,16 +84,12 @@ def _init_odb(self, schemes):
9184
)
9285

9386
def __getattr__(self, name):
94-
if name not in self._odb and name in self.CLOUD_SCHEMES:
95-
self._init_odb([name])
96-
9787
try:
9888
return self._odb[name]
9989
except KeyError as exc:
10090
raise AttributeError from exc
10191

10292
def by_scheme(self):
103-
self._init_odb(self.CLOUD_SCHEMES)
10493
yield from self._odb.items()
10594

10695
@property

dvc/commands/add.py

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,6 @@ def validate_args(self) -> None:
2424
invalid_opt = "--glob option"
2525
elif args.no_commit:
2626
invalid_opt = "--no-commit option"
27-
elif args.external:
28-
invalid_opt = "--external option"
2927
else:
3028
message = "{option} can't be used without --to-remote"
3129
if args.remote:
@@ -49,7 +47,6 @@ def run(self):
4947
self.repo.add(
5048
self.args.targets,
5149
no_commit=self.args.no_commit,
52-
external=self.args.external,
5350
glob=self.args.glob,
5451
out=self.args.out,
5552
remote=self.args.remote,
@@ -82,12 +79,6 @@ def add_parser(subparsers, parent_parser):
8279
default=False,
8380
help="Don't put files/directories into cache.",
8481
)
85-
parser.add_argument(
86-
"--external",
87-
action="store_true",
88-
default=False,
89-
help="Allow targets that are outside of the DVC repository.",
90-
)
9182
parser.add_argument(
9283
"--glob",
9384
action="store_true",

dvc/config_schema.py

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -154,13 +154,13 @@ def __call__(self, data):
154154
"machine": Lower,
155155
},
156156
"cache": {
157-
"local": str,
158-
"s3": str,
159-
"gs": str,
160-
"hdfs": str,
161-
"webhdfs": str,
162-
"ssh": str,
163-
"azure": str,
157+
"local": str, # obsoleted
158+
"s3": str, # obsoleted
159+
"gs": str, # obsoleted
160+
"hdfs": str, # obsoleted
161+
"webhdfs": str, # obsoleted
162+
"ssh": str, # obsoleted
163+
"azure": str, # obsoleted
164164
# This is for default local cache
165165
"dir": str,
166166
**LOCAL_COMMON,

dvc/exceptions.py

Lines changed: 0 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -299,21 +299,6 @@ def __init__(self, url):
299299
super().__init__(f"The path '{url}' does not exist")
300300

301301

302-
class RemoteCacheRequiredError(DvcException):
303-
def __init__(self, scheme, fs_path):
304-
super().__init__(
305-
(
306-
"Current operation was unsuccessful because '{}' requires "
307-
"existing cache on '{}' remote. See {} for information on how "
308-
"to set up remote cache."
309-
).format(
310-
fs_path,
311-
scheme,
312-
format_link("https://man.dvc.org/config#cache"),
313-
)
314-
)
315-
316-
317302
class IsADirectoryError(DvcException): # noqa,pylint:disable=redefined-builtin
318303
"""Raised when a file operation is requested on a directory."""
319304

dvc/output.py

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,6 @@
1919
ConfirmRemoveError,
2020
DvcException,
2121
MergeError,
22-
RemoteCacheRequiredError,
2322
)
2423
from dvc.utils.objects import cached_property
2524
from dvc_data.hashfile import check as ocheck
@@ -526,14 +525,9 @@ def use_scm_ignore(self):
526525
def cache(self):
527526
from dvc.cachemgr import LEGACY_HASH_NAMES
528527

529-
if self.is_in_repo:
530-
odb_name = "legacy" if self.hash_name in LEGACY_HASH_NAMES else "repo"
531-
else:
532-
odb_name = self.protocol
533-
odb = getattr(self.repo.cache, odb_name)
534-
if self.use_cache and odb is None:
535-
raise RemoteCacheRequiredError(self.fs.protocol, self.fs_path)
536-
return odb
528+
assert self.is_in_repo
529+
odb_name = "legacy" if self.hash_name in LEGACY_HASH_NAMES else "repo"
530+
return getattr(self.repo.cache, odb_name)
537531

538532
@property
539533
def local_cache(self):

dvc/repo/add.py

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -54,7 +54,6 @@ def find_targets(
5454
def get_or_create_stage(
5555
repo: "Repo",
5656
target: str,
57-
external: bool = False,
5857
out: Optional[str] = None,
5958
to_remote: bool = False,
6059
force: bool = False,
@@ -79,7 +78,6 @@ def get_or_create_stage(
7978
fname=path,
8079
wdir=wdir,
8180
outs=[out],
82-
external=external,
8381
force=force,
8482
)
8583
return StageInfo(stage, output_exists=False)
@@ -199,7 +197,6 @@ def add(
199197
repo: "Repo",
200198
targets: Union["StrOrBytesPath", Iterator["StrOrBytesPath"]],
201199
no_commit: bool = False,
202-
external: bool = False,
203200
glob: bool = False,
204201
out: Optional[str] = None,
205202
remote: Optional[str] = None,
@@ -215,7 +212,6 @@ def add(
215212
target: get_or_create_stage(
216213
repo,
217214
target,
218-
external=external,
219215
out=out,
220216
to_remote=to_remote,
221217
force=force,

dvc/stage/__init__.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -89,7 +89,7 @@ class RawData:
8989
generated_from: Optional[str] = None
9090

9191

92-
def create_stage(cls: Type[_T], repo, path, external=False, **kwargs) -> _T:
92+
def create_stage(cls: Type[_T], repo, path, **kwargs) -> _T:
9393
from dvc.dvcfile import check_dvcfile_path
9494

9595
wdir = os.path.abspath(kwargs.get("wdir", None) or os.curdir)
@@ -101,8 +101,7 @@ def create_stage(cls: Type[_T], repo, path, external=False, **kwargs) -> _T:
101101

102102
stage = loads_from(cls, repo, path, wdir, kwargs)
103103
fill_stage_outputs(stage, **kwargs)
104-
if not external:
105-
check_no_externals(stage)
104+
check_no_externals(stage)
106105
fill_stage_dependencies(
107106
stage, **project(kwargs, ["deps", "erepo", "params", "fs_config"])
108107
)

dvc/stage/cache.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,12 @@ def _can_hash(stage):
4444
return False
4545

4646
for out in stage.outs:
47-
if out.protocol != "local" or not out.def_path or out.persist:
47+
if (
48+
out.protocol != "local"
49+
or not out.def_path
50+
or out.persist
51+
or not out.is_in_repo
52+
):
4853
return False
4954

5055
return True
@@ -112,7 +117,6 @@ def _create_stage(self, cache, wdir=None):
112117
cmd=cache["cmd"],
113118
wdir=wdir,
114119
outs=[out["path"] for out in cache["outs"]],
115-
external=True,
116120
)
117121
StageLoader.fill_from_lock(stage, cache)
118122
return stage
@@ -137,7 +141,7 @@ def _uncached_outs(self, stage, cache):
137141
# NOTE: using copy link to make it look like a git-tracked file
138142
with self._cache_type_copy():
139143
for out in cached_stage.outs:
140-
if out.def_path in outs_no_cache:
144+
if out.def_path in outs_no_cache and out.is_in_repo:
141145
yield out
142146

143147
def save(self, stage):

dvc/stage/utils.py

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -87,17 +87,13 @@ def fill_stage_dependencies(stage, deps=None, erepo=None, params=None, fs_config
8787

8888

8989
def check_no_externals(stage):
90-
from urllib.parse import urlparse
91-
9290
from dvc.utils import format_link
9391

9492
# NOTE: preventing users from accidentally using external outputs. See
9593
# https://github.com/iterative/dvc/issues/1545 for more details.
9694

9795
def _is_external(out):
98-
# NOTE: in case of `remote://` notation, the user clearly knows that
99-
# this is an advanced feature and so we shouldn't error-out.
100-
if out.is_in_repo or urlparse(out.def_path).scheme == "remote":
96+
if out.is_in_repo or not out.use_cache:
10197
return False
10298
return True
10399

dvc/testing/workspace_tests.py

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -153,57 +153,6 @@ def test_import_no_download(self, tmp_dir, dvc, remote_version_aware):
153153
assert dvc.status() == {}
154154

155155

156-
class TestAdd:
157-
@pytest.fixture
158-
def hash_name(self):
159-
pytest.skip()
160-
161-
@pytest.fixture
162-
def hash_value(self):
163-
pytest.skip()
164-
165-
@pytest.fixture
166-
def dir_hash_value(self):
167-
pytest.skip()
168-
169-
def test_add(self, tmp_dir, dvc, workspace, hash_name, hash_value):
170-
from dvc.stage.exceptions import StageExternalOutputsError
171-
172-
workspace.gen("file", "file")
173-
174-
with pytest.raises(StageExternalOutputsError):
175-
dvc.add(workspace.url)
176-
177-
dvc.add("remote://workspace/file")
178-
assert (tmp_dir / "file.dvc").read_text() == (
179-
"outs:\n"
180-
f"- {hash_name}: {hash_value}\n"
181-
" size: 4\n"
182-
" hash: md5\n"
183-
" path: remote://workspace/file\n"
184-
)
185-
assert (workspace / "file").read_text() == "file"
186-
assert (
187-
workspace / "cache" / "files" / "md5" / hash_value[:2] / hash_value[2:]
188-
).read_text() == "file"
189-
190-
assert dvc.status() == {}
191-
192-
# pylint: disable-next=unused-argument
193-
def test_add_dir(self, tmp_dir, dvc, workspace, hash_name, dir_hash_value):
194-
workspace.gen({"dir": {"file": "file", "subdir": {"subfile": "subfile"}}})
195-
196-
dvc.add("remote://workspace/dir")
197-
assert (
198-
workspace
199-
/ "cache"
200-
/ "files"
201-
/ "md5"
202-
/ dir_hash_value[:2]
203-
/ dir_hash_value[2:]
204-
).is_file()
205-
206-
207156
def match_files(fs, entries, expected):
208157
entries_content = {(fs.path.normpath(d["path"]), d["isdir"]) for d in entries}
209158
expected_content = {(fs.path.normpath(d["path"]), d["isdir"]) for d in expected}

tests/func/test_add.py

Lines changed: 6 additions & 68 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222
)
2323
from dvc.stage import Stage
2424
from dvc.stage.exceptions import StageExternalOutputsError, StagePathNotFoundError
25-
from dvc.testing.workspace_tests import TestAdd
2625
from dvc.utils.fs import path_isin
2726
from dvc.utils.serialize import YAMLFileCorruptedError
2827
from dvc_data.hashfile.hash import file_md5
@@ -234,55 +233,6 @@ def test_add_filtered_files_in_dir(
234233
assert stage.outs[0].def_path in expected_def_paths
235234

236235

237-
class TestAddExternal(TestAdd):
238-
@pytest.fixture
239-
def hash_name(self):
240-
return "md5"
241-
242-
@pytest.fixture
243-
def hash_value(self):
244-
return "8c7dd922ad47494fc02c388e12c00eac"
245-
246-
@pytest.fixture
247-
def dir_hash_value(self):
248-
return "b6dcab6ccd17ca0a8bf4a215a37d14cc.dir"
249-
250-
251-
def test_add_external_relpath(tmp_dir, dvc, local_cloud):
252-
(fpath,) = local_cloud.gen("file", "file")
253-
rel = os.path.relpath(fpath)
254-
255-
with pytest.raises(StageExternalOutputsError):
256-
dvc.add(rel)
257-
258-
dvc.add(rel, external=True)
259-
assert (tmp_dir / "file.dvc").read_text() == (
260-
"outs:\n"
261-
"- md5: 8c7dd922ad47494fc02c388e12c00eac\n"
262-
" size: 4\n"
263-
" hash: md5\n"
264-
f" path: {rel}\n"
265-
)
266-
assert fpath.read_text() == "file"
267-
assert dvc.status() == {}
268-
269-
270-
def test_add_local_remote_file(tmp_dir, dvc):
271-
"""
272-
Making sure that 'remote' syntax is handled properly for local outs.
273-
"""
274-
tmp_dir.gen({"foo": "foo", "bar": "bar"})
275-
tmp_dir.add_remote(url=tmp_dir.fs_path, name="myremote")
276-
277-
assert main(["add", "remote://myremote/foo"]) == 0
278-
d = (tmp_dir / "foo.dvc").load_yaml()
279-
assert d["outs"][0]["path"] == "remote://myremote/foo"
280-
281-
assert main(["add", (tmp_dir / "bar").fs_path]) == 0
282-
d = (tmp_dir / "bar.dvc").load_yaml()
283-
assert d["outs"][0]["path"] == "bar"
284-
285-
286236
def test_cmd_add(tmp_dir, dvc):
287237
tmp_dir.gen("foo", "foo")
288238
ret = main(["add", "foo"])
@@ -755,15 +705,9 @@ def test_add_symlink_file(tmp_dir, dvc):
755705
dvc.add(os.path.join("dir", "foo"))
756706

757707

758-
@pytest.mark.parametrize("external", [True, False])
759-
def test_add_symlink_dir(make_tmp_dir, tmp_dir, dvc, external):
760-
if external:
761-
data_dir = make_tmp_dir("data")
762-
data_dir.gen({"foo": "foo"})
763-
target = os.fspath(data_dir)
764-
else:
765-
tmp_dir.gen({"data": {"foo": "foo"}})
766-
target = os.path.join(".", "data")
708+
def test_add_symlink_dir(make_tmp_dir, tmp_dir, dvc):
709+
tmp_dir.gen({"data": {"foo": "foo"}})
710+
target = os.path.join(".", "data")
767711

768712
tmp_dir.gen({"data": {"foo": "foo"}})
769713

@@ -774,15 +718,9 @@ def test_add_symlink_dir(make_tmp_dir, tmp_dir, dvc, external):
774718
dvc.add("dir")
775719

776720

777-
@pytest.mark.parametrize("external", [True, False])
778-
def test_add_file_in_symlink_dir(make_tmp_dir, tmp_dir, dvc, external):
779-
if external:
780-
data_dir = make_tmp_dir("data")
781-
data_dir.gen({"dir": {"foo": "foo"}})
782-
target = os.fspath(data_dir / "dir")
783-
else:
784-
tmp_dir.gen({"data": {"foo": "foo"}})
785-
target = os.path.join(".", "data")
721+
def test_add_file_in_symlink_dir(make_tmp_dir, tmp_dir, dvc):
722+
tmp_dir.gen({"data": {"foo": "foo"}})
723+
target = os.path.join(".", "data")
786724

787725
(tmp_dir / "dir").symlink_to(target)
788726

0 commit comments

Comments
 (0)