Skip to content

Commit afc183f

Browse files
authored
tests serializing of stage to lockfile (#3988)
* tests: serialize stage for lockfile * config: make isort/black compatible * convert class based tests to pytest
1 parent d201fad commit afc183f

File tree

4 files changed

+249
-35
lines changed

4 files changed

+249
-35
lines changed

dvc/stage/serialize.py

Lines changed: 35 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -63,51 +63,56 @@ def _serialize_outs(outputs: List[BaseOutput]):
6363
return outs, metrics, plots
6464

6565

66-
def _serialize_params(params: List[ParamsDependency]):
67-
"""Return two types of values from stage:
68-
69-
`keys` - which is list of params without values, used in a pipeline file
70-
71-
which is in the shape of:
72-
['lr', 'train', {'params2.yaml': ['lr']}]
66+
def _serialize_params_keys(params):
67+
"""
68+
Returns the following format of data:
69+
['lr', 'train', {'params2.yaml': ['lr']}]
7370
74-
`key_vals` - which is list of params with values, used in a lockfile
75-
which is in the shape of:
76-
{'params.yaml': {'lr': '1', 'train': 2}, {'params2.yaml': {'lr': '1'}}
71+
The output is sorted, with keys of params from default params file being
72+
at the first, and then followed by entry of other files in lexicographic
73+
order. The keys of those custom files are also sorted in the same order.
7774
"""
7875
keys = []
79-
key_vals = OrderedDict()
80-
8176
for param_dep in sort_by_path(params):
8277
dump = param_dep.dumpd()
8378
path, params = dump[PARAM_PATH], dump[PARAM_PARAMS]
84-
if isinstance(params, dict):
85-
k = sorted(params.keys())
86-
if not k:
87-
continue
88-
key_vals[path] = OrderedDict([(key, params[key]) for key in k])
89-
else:
90-
assert isinstance(params, list)
91-
# no params values available here, entry will be skipped for lock
92-
k = sorted(params)
79+
assert isinstance(params, (dict, list))
80+
# when on no_exec, params are not filled and are saved as list
81+
k = sorted(params.keys() if isinstance(params, dict) else params)
82+
if not k:
83+
continue
9384

94-
# params from default file is always kept at the start of the `params:`
9585
if path == DEFAULT_PARAMS_FILE:
9686
keys = k + keys
97-
if key_vals:
98-
key_vals.move_to_end(path, last=False)
9987
else:
100-
# if it's not a default file, change the shape
101-
# to: {path: k}
10288
keys.append({path: k})
103-
return keys, key_vals
89+
return keys
90+
91+
92+
def _serialize_params_values(params: List[ParamsDependency]):
93+
"""Returns output of following format, used for lockfile:
94+
{'params.yaml': {'lr': '1', 'train': 2}, {'params2.yaml': {'lr': '1'}}
95+
96+
Default params file are always kept at the start, followed by others in
97+
alphabetical order. The param values are sorted too(not recursively though)
98+
"""
99+
key_vals = OrderedDict()
100+
for param_dep in sort_by_path(params):
101+
dump = param_dep.dumpd()
102+
path, params = dump[PARAM_PATH], dump[PARAM_PARAMS]
103+
if isinstance(params, dict):
104+
kv = [(key, params[key]) for key in sorted(params.keys())]
105+
key_vals[path] = OrderedDict(kv)
106+
if path == DEFAULT_PARAMS_FILE:
107+
key_vals.move_to_end(path, last=False)
108+
return key_vals
104109

105110

106111
def to_pipeline_file(stage: "PipelineStage"):
107112
wdir = resolve_wdir(stage.wdir, stage.path)
108113
params, deps = split_params_deps(stage)
109114
deps = sorted([d.def_path for d in deps])
110-
params, _ = _serialize_params(params)
115+
params = _serialize_params_keys(params)
111116

112117
outs, metrics, plots = _serialize_outs(stage.outs)
113118
res = [
@@ -143,10 +148,11 @@ def to_single_stage_lockfile(stage: "Stage") -> dict:
143148
]
144149
for items in [deps, stage.outs]
145150
]
151+
params = _serialize_params_values(params)
146152
if deps:
147153
res[PARAM_DEPS] = deps
148154
if params:
149-
_, res[PARAM_PARAMS] = _serialize_params(params)
155+
res[PARAM_PARAMS] = params
150156
if outs:
151157
res[PARAM_OUTS] = outs
152158

setup.cfg

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,4 +14,6 @@ include_trailing_comma=true
1414
known_first_party=dvc,tests
1515
known_third_party=PyInstaller,RangeHTTPServer,boto3,colorama,configobj,distro,dpath,flaky,flufl,funcy,git,google,grandalf,mock,mockssh,moto,nanotime,networkx,packaging,paramiko,pathspec,pytest,requests,ruamel,setuptools,shortuuid,tqdm,voluptuous,yaml,zc
1616
line_length=79
17+
force_grid_wrap=0
18+
use_parentheses=True
1719
multi_line_output=3
Lines changed: 208 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,208 @@
1+
from collections import OrderedDict
2+
3+
import pytest
4+
from voluptuous import Schema as _Schema
5+
6+
from dvc.dvcfile import PIPELINE_FILE
7+
from dvc.schema import LOCK_FILE_STAGE_SCHEMA, LOCKFILE_SCHEMA
8+
from dvc.stage import PipelineStage, create_stage
9+
from dvc.stage.serialize import DEFAULT_PARAMS_FILE, to_lockfile
10+
from dvc.stage.serialize import (
11+
to_single_stage_lockfile as _to_single_stage_lockfile,
12+
)
13+
from dvc.stage.utils import split_params_deps
14+
15+
kwargs = {"name": "something", "cmd": "command", "path": PIPELINE_FILE}
16+
Schema = _Schema(LOCK_FILE_STAGE_SCHEMA)
17+
18+
19+
def to_single_stage_lockfile(stage):
20+
"""Validate schema on each serialization."""
21+
e = _to_single_stage_lockfile(stage)
22+
assert Schema(e)
23+
return e
24+
25+
26+
def test_lock(dvc):
27+
stage = create_stage(PipelineStage, dvc, **kwargs)
28+
assert to_single_stage_lockfile(stage) == {"cmd": "command"}
29+
30+
31+
def test_lock_deps(dvc):
32+
stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs)
33+
stage.deps[0].info = {"md5": "md-five"}
34+
assert to_single_stage_lockfile(stage) == OrderedDict(
35+
[
36+
("cmd", "command"),
37+
("deps", [OrderedDict([("path", "input"), ("md5", "md-five")])]),
38+
]
39+
)
40+
41+
42+
def test_lock_deps_order(dvc):
43+
stage = create_stage(
44+
PipelineStage, dvc, deps=["input1", "input0"], **kwargs
45+
)
46+
stage.deps[0].info = {"md5": "md-one1"}
47+
stage.deps[1].info = {"md5": "md-zer0"}
48+
assert to_single_stage_lockfile(stage) == OrderedDict(
49+
[
50+
("cmd", "command"),
51+
(
52+
"deps",
53+
[
54+
OrderedDict([("path", "input0"), ("md5", "md-zer0")]),
55+
OrderedDict([("path", "input1"), ("md5", "md-one1")]),
56+
],
57+
),
58+
]
59+
)
60+
61+
62+
def test_lock_params(dvc):
63+
stage = create_stage(
64+
PipelineStage, dvc, params=["lorem.ipsum", "abc"], **kwargs
65+
)
66+
stage.deps[0].info = {"lorem.ipsum": {"lorem1": 1, "lorem2": 2}, "abc": 3}
67+
assert to_single_stage_lockfile(stage)["params"][
68+
DEFAULT_PARAMS_FILE
69+
] == OrderedDict([("abc", 3), ("lorem.ipsum", {"lorem1": 1, "lorem2": 2})])
70+
71+
72+
def test_lock_params_file_sorted(dvc):
73+
stage = create_stage(
74+
PipelineStage,
75+
dvc,
76+
params=[
77+
"lorem.ipsum",
78+
"abc",
79+
{"myparams.yaml": ["foo", "foobar"]},
80+
{"a-params-file.yaml": ["bar", "barr"]},
81+
],
82+
**kwargs
83+
)
84+
stage.deps[0].info = {"lorem.ipsum": {"lorem1": 1, "lorem2": 2}, "abc": 3}
85+
stage.deps[1].info = {"foo": ["f", "o", "o"], "foobar": "foobar"}
86+
stage.deps[2].info = {"bar": ["b", "a", "r"], "barr": "barr"}
87+
assert to_single_stage_lockfile(stage)["params"] == OrderedDict(
88+
[
89+
(
90+
DEFAULT_PARAMS_FILE,
91+
OrderedDict(
92+
[("abc", 3), ("lorem.ipsum", {"lorem1": 1, "lorem2": 2})]
93+
),
94+
),
95+
(
96+
"a-params-file.yaml",
97+
OrderedDict([("bar", ["b", "a", "r"]), ("barr", "barr")]),
98+
),
99+
(
100+
"myparams.yaml",
101+
OrderedDict([("foo", ["f", "o", "o"]), ("foobar", "foobar")]),
102+
),
103+
]
104+
)
105+
106+
107+
def test_lock_params_no_values_filled(dvc):
108+
stage = create_stage(
109+
PipelineStage, dvc, params=["lorem.ipsum", "abc"], **kwargs
110+
)
111+
assert to_single_stage_lockfile(stage) == {"cmd": "command"}
112+
113+
114+
@pytest.mark.parametrize("typ", ["plots", "metrics", "outs"])
115+
def test_lock_outs(dvc, typ):
116+
stage = create_stage(PipelineStage, dvc, **{typ: ["input"]}, **kwargs)
117+
stage.outs[0].info = {"md5": "md-five"}
118+
assert to_single_stage_lockfile(stage) == OrderedDict(
119+
[
120+
("cmd", "command"),
121+
("outs", [OrderedDict([("path", "input"), ("md5", "md-five")])]),
122+
]
123+
)
124+
125+
126+
@pytest.mark.parametrize("typ", ["plots", "metrics", "outs"])
127+
def test_lock_outs_order(dvc, typ):
128+
stage = create_stage(
129+
PipelineStage, dvc, **{typ: ["input1", "input0"]}, **kwargs
130+
)
131+
stage.outs[0].info = {"md5": "md-one1"}
132+
stage.outs[1].info = {"md5": "md-zer0"}
133+
assert to_single_stage_lockfile(stage) == OrderedDict(
134+
[
135+
("cmd", "command"),
136+
(
137+
"outs",
138+
[
139+
OrderedDict([("path", "input0"), ("md5", "md-zer0")]),
140+
OrderedDict([("path", "input1"), ("md5", "md-one1")]),
141+
],
142+
),
143+
]
144+
)
145+
146+
147+
def test_dump_appropriate_checksums(dvc):
148+
stage = create_stage(
149+
PipelineStage, dvc, deps=["s3://dvc-temp/file"], **kwargs
150+
)
151+
stage.deps[0].info = {"etag": "is-it-etag", "md5": "or-md5?"}
152+
assert to_single_stage_lockfile(stage) == OrderedDict(
153+
[
154+
("cmd", "command"),
155+
(
156+
"deps",
157+
[
158+
OrderedDict(
159+
[
160+
("path", "s3://dvc-temp/file"),
161+
("etag", "is-it-etag"),
162+
]
163+
)
164+
],
165+
),
166+
]
167+
)
168+
169+
170+
def test_order(dvc):
171+
stage = create_stage(
172+
PipelineStage,
173+
dvc,
174+
deps=["input"],
175+
outs=["output"],
176+
params=["foo-param"],
177+
**kwargs
178+
)
179+
params, deps = split_params_deps(stage)
180+
181+
deps[0].info = {"md5": "md-five"}
182+
params[0].info = {"foo-param": "value"}
183+
stage.outs[0].info = {"md5": "md5-output"}
184+
185+
assert to_single_stage_lockfile(stage) == OrderedDict(
186+
[
187+
("cmd", "command"),
188+
("deps", [{"path": "input", "md5": "md-five"}]),
189+
("params", {"params.yaml": {"foo-param": "value"}}),
190+
("outs", [{"path": "output", "md5": "md5-output"}]),
191+
]
192+
)
193+
194+
195+
def test_to_lockfile(dvc):
196+
stage = create_stage(PipelineStage, dvc, deps=["input"], **kwargs)
197+
stage.deps[0].info = {"md5": "md-five"}
198+
entry = to_lockfile(stage)
199+
assert len(entry) == 1
200+
_Schema(LOCKFILE_SCHEMA)(entry)
201+
assert entry == {
202+
"something": OrderedDict(
203+
[
204+
("cmd", "command"),
205+
("deps", [{"path": "input", "md5": "md-five"}]),
206+
]
207+
)
208+
}

tests/unit/stage/test_stage.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@
22
import signal
33
import subprocess
44
import threading
5-
from unittest import TestCase
65

76
import mock
87
import pytest
@@ -50,12 +49,11 @@ def test_meta_ignored():
5049
assert stage.compute_md5() == "e9521a22111493406ea64a88cda63e0b"
5150

5251

53-
class TestPathConversion(TestCase):
54-
def test(self):
55-
stage = Stage(None, "path")
52+
def test_path_conversion(dvc):
53+
stage = Stage(dvc, "path")
5654

57-
stage.wdir = os.path.join("..", "..")
58-
self.assertEqual(stage.dumpd()["wdir"], "../..")
55+
stage.wdir = os.path.join("..", "..")
56+
assert stage.dumpd()["wdir"] == "../.."
5957

6058

6159
def test_stage_update(mocker):

0 commit comments

Comments
 (0)