Skip to content

Commit 4fdf7ca

Browse files
authored
repo: use reverse post-order DFS in repro --downstream (#3689)
* tests: ensure repro --downstream preserves dependency order * repo: use reverse post-order DFS in repro --downstream - Fixes #3602 * make test_repro and test_repro_multistage consistent * rebase master, update multistage test
1 parent a7b7767 commit 4fdf7ca

File tree

3 files changed

+77
-36
lines changed

3 files changed

+77
-36
lines changed

dvc/repo/reproduce.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -126,7 +126,8 @@ def _reproduce_stages(
126126
is to derive the evaluation starting from the given stage up to the
127127
ancestors. However, the `networkx.ancestors` returns a set, without
128128
any guarantee of any order, so we are going to reverse the graph and
129-
use a pre-ordered search using the given stage as a starting point.
129+
use a reverse post-ordered search using the given stage as a starting
130+
point.
130131
131132
E A
132133
/ \ / \
@@ -154,9 +155,10 @@ def _reproduce_stages(
154155
# itself, and then reverse it, instead of using
155156
# graph.reverse() directly because it calls `deepcopy`
156157
# underneath -- unless copy=False is specified.
157-
all_pipelines += nx.dfs_preorder_nodes(
158+
nodes = nx.dfs_postorder_nodes(
158159
G.copy().reverse(copy=False), stage
159160
)
161+
all_pipelines += reversed(list(nodes))
160162
else:
161163
all_pipelines += nx.dfs_postorder_nodes(G, stage)
162164

tests/func/test_repro.py

Lines changed: 43 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -1586,40 +1586,49 @@ def _hide_md5(text):
15861586
return re.sub(r"\b[a-f0-9]{32}\b", "<md5>", text)
15871587

15881588

1589-
class TestReproDownstream(TestDvc):
1590-
def test(self):
1591-
# The dependency graph should look like this:
1592-
#
1593-
# E
1594-
# / \
1595-
# D F
1596-
# / \ \
1597-
# B C G
1598-
# \ /
1599-
# A
1600-
#
1601-
assert main(["run", "-o", "A", "echo A>A"]) == 0
1602-
assert main(["run", "-d", "A", "-o", "B", "echo B>B"]) == 0
1603-
assert main(["run", "-d", "A", "-o", "C", "echo C>C"]) == 0
1604-
assert main(["run", "-d", "B", "-d", "C", "-o", "D", "echo D>D"]) == 0
1605-
assert main(["run", "-o", "G", "echo G>G"]) == 0
1606-
assert main(["run", "-d", "G", "-o", "F", "echo F>F"]) == 0
1607-
assert main(["run", "-d", "D", "-d", "F", "-o", "E", "echo E>E"]) == 0
1608-
1609-
# We want the evaluation to move from B to E
1610-
#
1611-
# E
1612-
# /
1613-
# D
1614-
# /
1615-
# B
1616-
#
1617-
evaluation = self.dvc.reproduce("B.dvc", downstream=True, force=True)
1618-
1619-
assert len(evaluation) == 3
1620-
assert evaluation[0].relpath == "B.dvc"
1621-
assert evaluation[1].relpath == "D.dvc"
1622-
assert evaluation[2].relpath == "E.dvc"
1589+
def test_downstream(dvc):
1590+
# The dependency graph should look like this:
1591+
#
1592+
# E
1593+
# / \
1594+
# D F
1595+
# / \ \
1596+
# B C G
1597+
# \ /
1598+
# A
1599+
#
1600+
assert main(["run", "-o", "A", "echo A>A"]) == 0
1601+
assert main(["run", "-d", "A", "-o", "B", "echo B>B"]) == 0
1602+
assert main(["run", "-d", "A", "-o", "C", "echo C>C"]) == 0
1603+
assert main(["run", "-d", "B", "-d", "C", "-o", "D", "echo D>D"]) == 0
1604+
assert main(["run", "-o", "G", "echo G>G"]) == 0
1605+
assert main(["run", "-d", "G", "-o", "F", "echo F>F"]) == 0
1606+
assert main(["run", "-d", "D", "-d", "F", "-o", "E", "echo E>E"]) == 0
1607+
1608+
# We want the evaluation to move from B to E
1609+
#
1610+
# E
1611+
# /
1612+
# D
1613+
# /
1614+
# B
1615+
#
1616+
evaluation = dvc.reproduce("B.dvc", downstream=True, force=True)
1617+
1618+
assert len(evaluation) == 3
1619+
assert evaluation[0].relpath == "B.dvc"
1620+
assert evaluation[1].relpath == "D.dvc"
1621+
assert evaluation[2].relpath == "E.dvc"
1622+
1623+
# B, C should be run (in any order) before D
1624+
# See https://github.com/iterative/dvc/issues/3602
1625+
evaluation = dvc.reproduce("A.dvc", downstream=True, force=True)
1626+
1627+
assert len(evaluation) == 5
1628+
assert evaluation[0].relpath == "A.dvc"
1629+
assert {evaluation[1].relpath, evaluation[2].relpath} == {"B.dvc", "C.dvc"}
1630+
assert evaluation[3].relpath == "D.dvc"
1631+
assert evaluation[4].relpath == "E.dvc"
16231632

16241633

16251634
@pytest.mark.skipif(

tests/func/test_repro_multistage.py

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -259,6 +259,36 @@ def test_downstream(tmp_dir, dvc):
259259
and evaluation[2].relpath == "E.dvc"
260260
)
261261

262+
# B, C should be run (in any order) before D
263+
# See https://github.com/iterative/dvc/issues/3602
264+
evaluation = dvc.reproduce(
265+
PIPELINE_FILE + ":A-gen", downstream=True, force=True
266+
)
267+
268+
assert len(evaluation) == 5
269+
assert (
270+
isinstance(evaluation[0], PipelineStage)
271+
and evaluation[0].relpath == PIPELINE_FILE
272+
and evaluation[0].name == "A-gen"
273+
)
274+
names = set()
275+
for stage in evaluation[1:3]:
276+
if isinstance(stage, PipelineStage):
277+
assert stage.relpath == PIPELINE_FILE
278+
names.add(stage.name)
279+
else:
280+
names.add(stage.relpath)
281+
assert names == {"B-gen", "C.dvc"}
282+
assert (
283+
isinstance(evaluation[3], PipelineStage)
284+
and evaluation[3].relpath == PIPELINE_FILE
285+
and evaluation[3].name == "D-gen"
286+
)
287+
assert (
288+
not isinstance(evaluation[4], PipelineStage)
289+
and evaluation[4].relpath == "E.dvc"
290+
)
291+
262292

263293
def test_repro_when_cmd_changes(tmp_dir, dvc, run_copy):
264294
from dvc.dvcfile import PipelineFile

0 commit comments

Comments
 (0)