Skip to content

Commit 4e12e76

Browse files
authored
tests: unit testing for StageLoader for Pipelinefile (#3976)
1 parent 33092e8 commit 4e12e76

File tree

4 files changed

+273
-33
lines changed

4 files changed

+273
-33
lines changed

dvc/stage/loader.py

Lines changed: 17 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,19 @@
11
import logging
2-
import os
32
from collections.abc import Mapping
43
from copy import deepcopy
54
from itertools import chain
65

7-
from funcy import lcat, project
6+
from funcy import get_in, lcat, project
87

98
from dvc import dependency, output
109

11-
from ..dependency import ParamsDependency
12-
from . import fill_stage_dependencies
10+
from . import PipelineStage, Stage, loads_from
1311
from .exceptions import StageNameUnspecified, StageNotFound
12+
from .params import StageParams
13+
from .utils import fill_stage_dependencies, resolve_paths
1414

1515
logger = logging.getLogger(__name__)
1616

17-
DEFAULT_PARAMS_FILE = ParamsDependency.DEFAULT_PARAMS_FILE
18-
19-
20-
def resolve_paths(path, wdir=None):
21-
path = os.path.abspath(path)
22-
wdir = wdir or os.curdir
23-
wdir = os.path.abspath(os.path.join(os.path.dirname(path), wdir))
24-
return path, wdir
25-
2617

2718
class StageLoader(Mapping):
2819
def __init__(self, dvcfile, stages_data, lockfile_data=None):
@@ -31,10 +22,12 @@ def __init__(self, dvcfile, stages_data, lockfile_data=None):
3122
self.lockfile_data = lockfile_data or {}
3223

3324
@staticmethod
34-
def fill_from_lock(stage, lock_data):
25+
def fill_from_lock(stage, lock_data=None):
3526
"""Fill values for params, checksums for outs and deps from lock."""
36-
from .params import StageParams
27+
if not lock_data:
28+
return
3729

30+
assert isinstance(lock_data, dict)
3831
items = chain(
3932
((StageParams.PARAM_DEPS, dep) for dep in stage.deps),
4033
((StageParams.PARAM_OUTS, out) for out in stage.outs),
@@ -45,21 +38,16 @@ def fill_from_lock(stage, lock_data):
4538
for key in [StageParams.PARAM_DEPS, StageParams.PARAM_OUTS]
4639
}
4740
for key, item in items:
48-
if isinstance(item, ParamsDependency):
49-
# load the params with values inside lock dynamically
50-
lock_params = lock_data.get(stage.PARAM_PARAMS, {})
51-
item.fill_values(lock_params.get(item.def_path, {}))
41+
path = item.def_path
42+
if isinstance(item, dependency.ParamsDependency):
43+
item.fill_values(get_in(lock_data, [stage.PARAM_PARAMS, path]))
5244
continue
53-
54-
item.checksum = (
55-
checksums.get(key, {})
56-
.get(item.def_path, {})
57-
.get(item.checksum_type)
58-
)
45+
item.checksum = get_in(checksums, [key, path, item.checksum_type])
5946

6047
@classmethod
61-
def load_stage(cls, dvcfile, name, stage_data, lock_data):
62-
from . import PipelineStage, Stage, loads_from
48+
def load_stage(cls, dvcfile, name, stage_data, lock_data=None):
49+
assert all([name, dvcfile, dvcfile.repo, dvcfile.path])
50+
assert stage_data and isinstance(stage_data, dict)
6351

6452
path, wdir = resolve_paths(
6553
dvcfile.path, stage_data.get(Stage.PARAM_WDIR)
@@ -80,11 +68,9 @@ def load_stage(cls, dvcfile, name, stage_data, lock_data):
8068
)
8169

8270
if lock_data:
83-
stage.cmd_changed = lock_data.get(
84-
Stage.PARAM_CMD
85-
) != stage_data.get(Stage.PARAM_CMD)
86-
cls.fill_from_lock(stage, lock_data)
71+
stage.cmd_changed = lock_data.get(Stage.PARAM_CMD) != stage.cmd
8772

73+
cls.fill_from_lock(stage, lock_data)
8874
return stage
8975

9076
def __getitem__(self, name):
@@ -137,8 +123,6 @@ def __getitem__(self, item):
137123

138124
@classmethod
139125
def load_stage(cls, dvcfile, d, stage_text):
140-
from dvc.stage import Stage, loads_from
141-
142126
path, wdir = resolve_paths(dvcfile.path, d.get(Stage.PARAM_WDIR))
143127
stage = loads_from(Stage, dvcfile.repo, path, wdir, d)
144128
stage._stage_text = stage_text

dvc/stage/utils.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -175,6 +175,13 @@ def resolve_wdir(wdir, path):
175175
return pathlib.PurePath(rel_wdir).as_posix() if rel_wdir != "." else None
176176

177177

178+
def resolve_paths(path, wdir=None):
179+
path = os.path.abspath(path)
180+
wdir = wdir or os.curdir
181+
wdir = os.path.abspath(os.path.join(os.path.dirname(path), wdir))
182+
return path, wdir
183+
184+
178185
def get_dump(stage):
179186
return {
180187
key: value
Lines changed: 229 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,229 @@
1+
import os
2+
from copy import deepcopy
3+
from itertools import chain
4+
5+
import pytest
6+
7+
from dvc.dvcfile import PIPELINE_FILE, Dvcfile
8+
from dvc.serialize import get_params_deps
9+
from dvc.stage import PipelineStage, create_stage
10+
from dvc.stage.loader import StageLoader
11+
12+
13+
@pytest.fixture
14+
def stage_data():
15+
return {"cmd": "command", "deps": ["foo"], "outs": ["bar"]}
16+
17+
18+
@pytest.fixture
19+
def lock_data():
20+
return {
21+
"cmd": "command",
22+
"deps": [{"path": "foo", "md5": "foo_checksum"}],
23+
"outs": [{"path": "bar", "md5": "bar_checksum"}],
24+
}
25+
26+
27+
def test_fill_from_lock_deps_outs(dvc, lock_data):
28+
stage = create_stage(
29+
PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"]
30+
)
31+
32+
for item in chain(stage.deps, stage.outs):
33+
assert not item.checksum and not item.info
34+
35+
StageLoader.fill_from_lock(stage, lock_data)
36+
37+
assert stage.deps[0].info == {"md5": "foo_checksum"}
38+
assert stage.outs[0].info == {"md5": "bar_checksum"}
39+
40+
41+
def test_fill_from_lock_params(dvc, lock_data):
42+
stage = create_stage(
43+
PipelineStage,
44+
dvc,
45+
PIPELINE_FILE,
46+
deps=["foo"],
47+
outs=["bar"],
48+
params=[
49+
"lorem",
50+
"lorem.ipsum",
51+
{"myparams.yaml": ["ipsum", "foobar"]},
52+
],
53+
)
54+
lock_data["params"] = {
55+
"params.yaml": {
56+
"lorem": "lorem",
57+
"lorem.ipsum": ["i", "p", "s", "u", "m"],
58+
},
59+
"myparams.yaml": {
60+
# missing value in lock for `foobar` params
61+
"ipsum": "ipsum"
62+
},
63+
}
64+
params_deps = get_params_deps(stage)[0]
65+
assert set(params_deps[0].params) == {"lorem", "lorem.ipsum"}
66+
assert set(params_deps[1].params) == {"ipsum", "foobar"}
67+
assert not params_deps[0].info
68+
assert not params_deps[1].info
69+
70+
StageLoader.fill_from_lock(stage, lock_data)
71+
assert params_deps[0].info == lock_data["params"]["params.yaml"]
72+
assert params_deps[1].info == lock_data["params"]["myparams.yaml"]
73+
74+
75+
def test_fill_from_lock_missing_params_section(dvc, lock_data):
76+
stage = create_stage(
77+
PipelineStage,
78+
dvc,
79+
PIPELINE_FILE,
80+
deps=["foo"],
81+
outs=["bar"],
82+
params=["lorem", "lorem.ipsum", {"myparams.yaml": ["ipsum"]}],
83+
)
84+
params_deps = get_params_deps(stage)[0]
85+
StageLoader.fill_from_lock(stage, lock_data)
86+
assert not params_deps[0].info and not params_deps[1].info
87+
88+
89+
def test_fill_from_lock_missing_checksums(dvc, lock_data):
90+
stage = create_stage(
91+
PipelineStage,
92+
dvc,
93+
PIPELINE_FILE,
94+
deps=["foo", "foo1"],
95+
outs=["bar", "bar1"],
96+
)
97+
98+
StageLoader.fill_from_lock(stage, lock_data)
99+
100+
assert stage.deps[0].info == {"md5": "foo_checksum"}
101+
assert stage.outs[0].info == {"md5": "bar_checksum"}
102+
assert not stage.deps[1].checksum and not stage.outs[1].checksum
103+
104+
105+
def test_fill_from_lock_use_appropriate_checksum(dvc, lock_data):
106+
stage = create_stage(
107+
PipelineStage,
108+
dvc,
109+
PIPELINE_FILE,
110+
deps=["s3://dvc-temp/foo"],
111+
outs=["bar"],
112+
)
113+
lock_data["deps"] = [
114+
{"path": "s3://dvc-temp/foo", "md5": "high five", "etag": "e-tag"}
115+
]
116+
StageLoader.fill_from_lock(stage, lock_data)
117+
assert stage.deps[0].checksum == "e-tag"
118+
assert stage.outs[0].checksum == "bar_checksum"
119+
120+
121+
def test_fill_from_lock_with_missing_sections(dvc, lock_data):
122+
stage = create_stage(
123+
PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"]
124+
)
125+
lock = deepcopy(lock_data)
126+
del lock["deps"]
127+
StageLoader.fill_from_lock(stage, lock)
128+
assert not stage.deps[0].checksum
129+
assert stage.outs[0].checksum == "bar_checksum"
130+
131+
lock = deepcopy(lock_data)
132+
del lock["outs"]
133+
StageLoader.fill_from_lock(stage, lock)
134+
assert stage.deps[0].checksum == "foo_checksum"
135+
assert not stage.outs[0].checksum
136+
137+
138+
def test_fill_from_lock_empty_data(dvc):
139+
stage = create_stage(
140+
PipelineStage, dvc, PIPELINE_FILE, deps=["foo"], outs=["bar"]
141+
)
142+
StageLoader.fill_from_lock(stage, None)
143+
assert not stage.deps[0].checksum and not stage.outs[0].checksum
144+
StageLoader.fill_from_lock(stage, {})
145+
assert not stage.deps[0].checksum and not stage.outs[0].checksum
146+
147+
148+
def test_load_stage(dvc, stage_data, lock_data):
149+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
150+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
151+
152+
assert stage.wdir == os.path.abspath(os.curdir)
153+
assert stage.name == "stage-1"
154+
assert stage.cmd == "command"
155+
assert stage.path == os.path.abspath(PIPELINE_FILE)
156+
assert stage.deps[0].def_path == "foo"
157+
assert stage.deps[0].checksum == "foo_checksum"
158+
assert stage.outs[0].def_path == "bar"
159+
assert stage.outs[0].checksum == "bar_checksum"
160+
161+
162+
def test_load_stage_outs_with_flags(dvc, stage_data, lock_data):
163+
stage_data["outs"] = [{"foo": {"cache": False}}]
164+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
165+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
166+
assert stage.outs[0].use_cache is False
167+
168+
169+
def test_load_stage_no_lock(dvc, stage_data):
170+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
171+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data)
172+
assert stage.deps[0].def_path == "foo" and stage.outs[0].def_path == "bar"
173+
assert not stage.deps[0].checksum
174+
assert not stage.outs[0].checksum
175+
176+
177+
def test_load_stage_with_params(dvc, stage_data, lock_data):
178+
lock_data["params"] = {"params.yaml": {"lorem": "ipsum"}}
179+
stage_data["params"] = ["lorem"]
180+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
181+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
182+
183+
params, deps = get_params_deps(stage)
184+
assert deps[0].def_path == "foo" and stage.outs[0].def_path == "bar"
185+
assert params[0].def_path == "params.yaml"
186+
assert params[0].info == {"lorem": "ipsum"}
187+
assert deps[0].checksum == "foo_checksum"
188+
assert stage.outs[0].checksum == "bar_checksum"
189+
190+
191+
@pytest.mark.parametrize("typ", ["metrics", "plots"])
192+
def test_load_stage_with_metrics_and_plots(dvc, stage_data, lock_data, typ):
193+
stage_data[typ] = stage_data.pop("outs")
194+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
195+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
196+
197+
assert stage.outs[0].def_path == "bar"
198+
assert stage.outs[0].checksum == "bar_checksum"
199+
200+
201+
def test_load_changed_command(dvc, stage_data, lock_data):
202+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
203+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data)
204+
assert not stage.cmd_changed
205+
assert stage.cmd == "command"
206+
207+
lock_data["cmd"] = "different-command"
208+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
209+
assert stage.cmd_changed
210+
assert stage.cmd == "command"
211+
212+
213+
def test_load_stage_wdir_and_path_correctly(dvc, stage_data, lock_data):
214+
stage_data["wdir"] = "dir"
215+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
216+
stage = StageLoader.load_stage(dvcfile, "stage-1", stage_data, lock_data)
217+
218+
assert stage.wdir == os.path.abspath("dir")
219+
assert stage.path == os.path.abspath(PIPELINE_FILE)
220+
221+
222+
def test_load_stage_mapping(dvc, stage_data, lock_data):
223+
dvcfile = Dvcfile(dvc, PIPELINE_FILE)
224+
loader = StageLoader(dvcfile, {"stage": stage_data}, {"stage": lock_data})
225+
assert len(loader) == 1
226+
assert "stage" in loader
227+
assert "stage1" not in loader
228+
assert loader.keys() == {"stage"}
229+
assert isinstance(loader["stage"], PipelineStage)

tests/unit/stage/test_utils.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import os
2+
3+
from dvc.stage.utils import resolve_paths
4+
5+
6+
def test_resolve_paths():
7+
p = os.path.join("dir", "subdir")
8+
file_path = os.path.join(p, "dvc.yaml")
9+
10+
path, wdir = resolve_paths(path=file_path, wdir="dir")
11+
assert path == os.path.abspath(file_path)
12+
assert wdir == os.path.abspath(os.path.join(p, "dir"))
13+
14+
path, wdir = resolve_paths(path=file_path)
15+
assert path == os.path.abspath(file_path)
16+
assert wdir == os.path.abspath(p)
17+
18+
path, wdir = resolve_paths(path=file_path, wdir="../../some-dir")
19+
assert path == os.path.abspath(file_path)
20+
assert wdir == os.path.abspath("some-dir")

0 commit comments

Comments
 (0)