Skip to content

Commit 52369bd

Browse files
Fix repro duplicating effort while checking steps #3644 (#3645)
* Only reproduce steps once * Add test * Fix linter nasties * Got the order wrong * Check for previous run * Formatting * Do the de-duplication in _reproduce_stages Co-authored-by: Charles Baynham <[email protected]>
1 parent 254cf83 commit 52369bd

File tree

2 files changed

+46
-21
lines changed

2 files changed

+46
-21
lines changed

dvc/repo/reproduce.py

Lines changed: 28 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -95,17 +95,12 @@ def reproduce(
9595
path, name=name, recursive=recursive, graph=active_graph
9696
)
9797

98-
ret = []
99-
for target in targets:
100-
stages = _reproduce_stages(active_graph, target, **kwargs)
101-
ret.extend(stages)
102-
103-
return ret
98+
return _reproduce_stages(active_graph, targets, **kwargs)
10499

105100

106101
def _reproduce_stages(
107102
G,
108-
stage,
103+
stages,
109104
downstream=False,
110105
ignore_build_cache=False,
111106
single_item=False,
@@ -148,23 +143,34 @@ def _reproduce_stages(
148143
import networkx as nx
149144

150145
if single_item:
151-
pipeline = [stage]
152-
elif downstream:
153-
# NOTE (py3 only):
154-
# Python's `deepcopy` defaults to pickle/unpickle the object.
155-
# Stages are complex objects (with references to `repo`, `outs`,
156-
# and `deps`) that cause struggles when you try to serialize them.
157-
# We need to create a copy of the graph itself, and then reverse it,
158-
# instead of using graph.reverse() directly because it calls
159-
# `deepcopy` underneath -- unless copy=False is specified.
160-
pipeline = nx.dfs_preorder_nodes(G.copy().reverse(copy=False), stage)
146+
all_pipelines = stages
161147
else:
162-
pipeline = nx.dfs_postorder_nodes(G, stage)
148+
all_pipelines = []
149+
for stage in stages:
150+
if downstream:
151+
# NOTE (py3 only):
152+
# Python's `deepcopy` defaults to pickle/unpickle the object.
153+
# Stages are complex objects (with references to `repo`,
154+
# `outs`, and `deps`) that cause struggles when you try
155+
# to serialize them. We need to create a copy of the graph
156+
# itself, and then reverse it, instead of using
157+
# graph.reverse() directly because it calls `deepcopy`
158+
# underneath -- unless copy=False is specified.
159+
all_pipelines += nx.dfs_preorder_nodes(
160+
G.copy().reverse(copy=False), stage
161+
)
162+
else:
163+
all_pipelines += nx.dfs_postorder_nodes(G, stage)
164+
165+
pipeline = []
166+
for stage in all_pipelines:
167+
if stage not in pipeline:
168+
pipeline.append(stage)
163169

164170
result = []
165-
for st in pipeline:
171+
for stage in pipeline:
166172
try:
167-
ret = _reproduce_stage(st, **kwargs)
173+
ret = _reproduce_stage(stage, **kwargs)
168174

169175
if len(ret) != 0 and ignore_build_cache:
170176
# NOTE: we are walking our pipeline from the top to the
@@ -176,5 +182,6 @@ def _reproduce_stages(
176182

177183
result.extend(ret)
178184
except Exception as exc:
179-
raise ReproductionError(st.relpath) from exc
185+
raise ReproductionError(stage.relpath) from exc
186+
180187
return result

tests/unit/repo/test_reproduce.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import mock
2+
13
from dvc.repo.reproduce import _get_active_graph
24

35

@@ -23,3 +25,19 @@ def test_get_active_graph(tmp_dir, dvc):
2325
active_graph = _get_active_graph(graph)
2426
assert set(active_graph.nodes) == {bar_stage, baz_stage}
2527
assert not active_graph.edges
28+
29+
30+
@mock.patch("dvc.repo.reproduce._reproduce_stage", returns=[])
31+
def test_number_reproduces(reproduce_stage_mock, tmp_dir, dvc):
32+
tmp_dir.dvc_gen({"pre-foo": "pre-foo"})
33+
34+
dvc.run(deps=["pre-foo"], outs=["foo"], cmd="echo foo > foo")
35+
dvc.run(deps=["foo"], outs=["bar"], cmd="echo bar > bar")
36+
dvc.run(deps=["foo"], outs=["baz"], cmd="echo baz > baz")
37+
dvc.run(deps=["bar"], outs=["boop"], cmd="echo boop > boop")
38+
39+
reproduce_stage_mock.reset_mock()
40+
41+
dvc.reproduce(all_pipelines=True)
42+
43+
assert reproduce_stage_mock.call_count == 5

0 commit comments

Comments
 (0)