Skip to content

Commit fc6a10f

Browse files
authored
stage: resolve stage path before creating the stage (#3630)
Current logic is extremely complex and messy, hiding a lot of bugs inside. There are pretty much only 2 scenarios for stage creation: 1) `dvc add/import/etc` when dvcfile is created beside the output file 2) `dvc run` when dvcfile is created at the place of execution both are transparently affected by `--fname` flag. Since we've removed `--cwd` complexity already, there is no reason to generalize stage path resolution for those two scenarios.
1 parent 63fe289 commit fc6a10f

File tree

6 files changed

+68
-68
lines changed

6 files changed

+68
-68
lines changed

dvc/repo/add.py

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
from ..progress import Tqdm
1313
from ..repo.scm_context import scm_context
1414
from ..stage import Stage
15-
from ..utils import LARGE_DIR_SIZE
15+
from ..utils import LARGE_DIR_SIZE, resolve_paths
1616

1717
logger = logging.getLogger(__name__)
1818

@@ -123,9 +123,8 @@ def _create_stages(repo, targets, fname, pbar=None):
123123
disable=len(targets) < LARGE_DIR_SIZE,
124124
unit="file",
125125
):
126-
stage = Stage.create(
127-
repo, outs=[out], accompany_outs=True, fname=fname
128-
)
126+
path, wdir, out = resolve_paths(repo, out)
127+
stage = Stage.create(repo, fname or path, wdir=wdir, outs=[out])
129128
repo._reset()
130129

131130
if not stage:

dvc/repo/imp_url.py

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1+
import os
2+
13
from . import locked as locked_repo
24
from dvc.repo.scm_context import scm_context
3-
from dvc.utils import resolve_output
5+
from dvc.utils import resolve_output, resolve_paths, relpath
6+
from dvc.utils.fs import path_isin
47

58

69
@locked_repo
@@ -9,15 +12,14 @@ def imp_url(self, url, out=None, fname=None, erepo=None, locked=True):
912
from dvc.stage import Stage
1013

1114
out = resolve_output(url, out)
15+
path, wdir, out = resolve_paths(self, out)
16+
17+
# NOTE: when user is importing something from within his own repository
18+
if os.path.exists(url) and path_isin(os.path.abspath(url), self.root_dir):
19+
url = relpath(url, wdir)
1220

1321
stage = Stage.create(
14-
self,
15-
cmd=None,
16-
deps=[url],
17-
outs=[out],
18-
fname=fname,
19-
erepo=erepo,
20-
accompany_outs=True,
22+
self, fname or path, wdir=wdir, deps=[url], outs=[out], erepo=erepo,
2123
)
2224

2325
if stage is None:

dvc/repo/run.py

Lines changed: 19 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,30 @@
1+
import os
2+
13
from . import locked
24
from .scm_context import scm_context
35

46

57
@locked
68
@scm_context
7-
def run(self, no_exec=False, **kwargs):
9+
def run(self, fname=None, no_exec=False, **kwargs):
810
from dvc.stage import Stage
911

10-
stage = Stage.create(self, **kwargs)
11-
12+
outs = (
13+
kwargs.get("outs", [])
14+
+ kwargs.get("outs_no_cache", [])
15+
+ kwargs.get("metrics", [])
16+
+ kwargs.get("metrics_no_cache", [])
17+
+ kwargs.get("outs_persist", [])
18+
+ kwargs.get("outs_persist_no_cache", [])
19+
)
20+
21+
if outs:
22+
base = os.path.basename(os.path.normpath(outs[0]))
23+
path = base + Stage.STAGE_FILE_SUFFIX
24+
else:
25+
path = Stage.STAGE_FILE
26+
27+
stage = Stage.create(self, fname or path, **kwargs)
1228
if stage is None:
1329
return None
1430

dvc/stage.py

Lines changed: 10 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import threading
88

99
from functools import wraps
10-
from itertools import chain
1110
from funcy import decorator
1211

1312
from voluptuous import Any, Schema, MultipleInvalid
@@ -21,7 +20,6 @@
2120
from dvc.utils import relpath
2221
from dvc.utils.fs import path_isin
2322
from dvc.utils.collections import apply_diff
24-
from dvc.utils.fs import contains_symlink_up_to
2523
from dvc.utils.stage import dump_stage_file
2624
from dvc.utils.stage import parse_stage
2725
from dvc.utils.stage import parse_stage_for_update
@@ -418,23 +416,6 @@ def validate(d, fname=None):
418416
except MultipleInvalid as exc:
419417
raise StageFileFormatError(fname, exc)
420418

421-
@classmethod
422-
def _stage_fname(cls, outs, add):
423-
if not outs:
424-
return cls.STAGE_FILE
425-
426-
out = outs[0]
427-
fname = out.path_info.name + cls.STAGE_FILE_SUFFIX
428-
429-
if (
430-
add
431-
and out.is_in_repo
432-
and not contains_symlink_up_to(out.fspath, out.repo.root_dir)
433-
):
434-
fname = out.path_info.with_name(fname).fspath
435-
436-
return fname
437-
438419
@staticmethod
439420
def _check_stage_path(repo, path, is_wdir=False):
440421
assert repo is not None
@@ -505,12 +486,20 @@ def is_cached(self):
505486
return True
506487

507488
@staticmethod
508-
def create(repo, accompany_outs=False, **kwargs):
489+
def create(repo, path, **kwargs):
509490
wdir = kwargs.get("wdir", None) or os.curdir
510-
fname = kwargs.get("fname", None)
491+
492+
wdir = os.path.abspath(wdir)
493+
path = os.path.abspath(path)
494+
495+
Stage._check_dvc_filename(path)
496+
497+
Stage._check_stage_path(repo, wdir, is_wdir=kwargs.get("wdir"))
498+
Stage._check_stage_path(repo, os.path.dirname(path))
511499

512500
stage = Stage(
513501
repo=repo,
502+
path=path,
514503
wdir=wdir,
515504
cmd=kwargs.get("cmd", None),
516505
locked=kwargs.get("locked", False),
@@ -527,28 +516,6 @@ def create(repo, accompany_outs=False, **kwargs):
527516
stage._check_circular_dependency()
528517
stage._check_duplicated_arguments()
529518

530-
if not fname:
531-
fname = Stage._stage_fname(stage.outs, accompany_outs)
532-
stage._check_dvc_filename(fname)
533-
534-
# Autodetecting wdir for add, we need to create outs first to do that,
535-
# so we start with wdir = . and remap out paths later.
536-
if accompany_outs and kwargs.get("wdir") is None:
537-
wdir = os.path.dirname(fname)
538-
539-
for out in chain(stage.outs, stage.deps):
540-
if out.is_in_repo:
541-
out.def_path = relpath(out.path_info, wdir)
542-
543-
wdir = os.path.abspath(wdir)
544-
path = os.path.abspath(fname)
545-
546-
Stage._check_stage_path(repo, wdir, is_wdir=kwargs.get("wdir"))
547-
Stage._check_stage_path(repo, os.path.dirname(path))
548-
549-
stage.wdir = wdir
550-
stage.path = path
551-
552519
ignore_build_cache = kwargs.get("ignore_build_cache", False)
553520

554521
# NOTE: remove outs before we check build cache

dvc/utils/__init__.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -344,6 +344,32 @@ def resolve_output(inp, out):
344344
return out
345345

346346

347+
def resolve_paths(repo, out):
348+
from ..stage import Stage
349+
from ..path_info import PathInfo
350+
from .fs import contains_symlink_up_to
351+
352+
abspath = os.path.abspath(out)
353+
dirname = os.path.dirname(abspath)
354+
base = os.path.basename(os.path.normpath(out))
355+
356+
# NOTE: `out` might not exist yet, so using `dirname`(aka `wdir`) to check
357+
# if it is a local path.
358+
if (
359+
os.path.exists(dirname) # out might not exist yet, so
360+
and PathInfo(abspath).isin_or_eq(repo.root_dir)
361+
and not contains_symlink_up_to(abspath, repo.root_dir)
362+
):
363+
wdir = dirname
364+
out = base
365+
else:
366+
wdir = os.getcwd()
367+
368+
path = os.path.join(wdir, base + Stage.STAGE_FILE_SUFFIX)
369+
370+
return (path, wdir, out)
371+
372+
347373
def format_link(link):
348374
return "<{blue}{link}{nc}>".format(
349375
blue=colorama.Fore.CYAN, link=link, nc=colorama.Fore.RESET

tests/unit/test_stage.py

Lines changed: 0 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@
77
import pytest
88

99
from dvc.dependency.repo import DependencyREPO
10-
from dvc.path_info import PathInfo
1110
from dvc.stage import Stage
1211
from dvc.stage import StageUpdateError
1312

@@ -61,15 +60,6 @@ def test(self):
6160
self.assertEqual(stage.dumpd()["wdir"], "../..")
6261

6362

64-
@pytest.mark.parametrize("add", [True, False])
65-
def test_stage_fname(add):
66-
out = mock.Mock()
67-
out.is_in_repo = False
68-
out.path_info = PathInfo("path/to/out.txt")
69-
fname = Stage._stage_fname([out], add)
70-
assert fname == "out.txt.dvc"
71-
72-
7363
def test_stage_update(mocker):
7464
dep = DependencyREPO({"url": "example.com"}, None, "dep_path")
7565
mocker.patch.object(dep, "update", return_value=None)

0 commit comments

Comments
 (0)