Skip to content

Commit 06a1a96

Browse files
Do the de-duplication in _reproduce_stages
1 parent f5d89f6 commit 06a1a96

File tree

1 file changed

+41
-42
lines changed

1 file changed

+41
-42
lines changed

dvc/repo/reproduce.py

Lines changed: 41 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -94,22 +94,12 @@ def reproduce(
9494
path, name=name, recursive=recursive, graph=active_graph
9595
)
9696

97-
ret = []
98-
checked_stages = set()
99-
for target in targets:
100-
stages, these_checked_stages = _reproduce_stages(
101-
active_graph, target, checked_stages, **kwargs
102-
)
103-
ret.extend(stages)
104-
checked_stages.update(these_checked_stages)
105-
106-
return ret
97+
return _reproduce_stages(active_graph, targets, **kwargs)
10798

10899

109100
def _reproduce_stages(
110101
G,
111-
stage,
112-
checked_stages,
102+
stages,
113103
downstream=False,
114104
ignore_build_cache=False,
115105
single_item=False,
@@ -152,36 +142,45 @@ def _reproduce_stages(
152142
import networkx as nx
153143

154144
if single_item:
155-
pipeline = [stage]
156-
elif downstream:
157-
# NOTE (py3 only):
158-
# Python's `deepcopy` defaults to pickle/unpickle the object.
159-
# Stages are complex objects (with references to `repo`, `outs`,
160-
# and `deps`) that cause struggles when you try to serialize them.
161-
# We need to create a copy of the graph itself, and then reverse it,
162-
# instead of using graph.reverse() directly because it calls
163-
# `deepcopy` underneath -- unless copy=False is specified.
164-
pipeline = nx.dfs_preorder_nodes(G.copy().reverse(copy=False), stage)
145+
all_pipelines = stages
165146
else:
166-
pipeline = nx.dfs_postorder_nodes(G, stage)
147+
all_pipelines = []
148+
for stage in stages:
149+
if downstream:
150+
# NOTE (py3 only):
151+
# Python's `deepcopy` defaults to pickle/unpickle the object.
152+
# Stages are complex objects (with references to `repo`,
153+
# `outs`, and `deps`) that cause struggles when you try
154+
# to serialize them. We need to create a copy of the graph
155+
# itself, and then reverse it, instead of using
156+
# graph.reverse() directly because it calls `deepcopy`
157+
# underneath -- unless copy=False is specified.
158+
all_pipelines += nx.dfs_preorder_nodes(
159+
G.copy().reverse(copy=False), stage
160+
)
161+
else:
162+
all_pipelines += nx.dfs_postorder_nodes(G, stage)
163+
164+
pipeline = []
165+
for stage in all_pipelines:
166+
if stage not in pipeline:
167+
pipeline.append(stage)
167168

168169
result = []
169-
these_checked_stages = []
170-
for st in pipeline:
171-
if st not in checked_stages:
172-
try:
173-
ret = _reproduce_stage(st, **kwargs)
174-
these_checked_stages.append(st)
175-
176-
if len(ret) != 0 and ignore_build_cache:
177-
# NOTE: we are walking our pipeline from the top to the
178-
# bottom. If one stage is changed, it will be reproduced,
179-
# which tells us that we should force reproducing all of
180-
# the other stages down below, even if their direct
181-
# dependencies didn't change.
182-
kwargs["force"] = True
183-
184-
result.extend(ret)
185-
except Exception as exc:
186-
raise ReproductionError(st.relpath) from exc
187-
return result, these_checked_stages
170+
for stage in pipeline:
171+
try:
172+
ret = _reproduce_stage(stage, **kwargs)
173+
174+
if len(ret) != 0 and ignore_build_cache:
175+
# NOTE: we are walking our pipeline from the top to the
176+
# bottom. If one stage is changed, it will be reproduced,
177+
# which tells us that we should force reproducing all of
178+
# the other stages down below, even if their direct
179+
# dependencies didn't change.
180+
kwargs["force"] = True
181+
182+
result.extend(ret)
183+
except Exception as exc:
184+
raise ReproductionError(stage.relpath) from exc
185+
186+
return result

0 commit comments

Comments
 (0)