Skip to content

Commit f309b79

Browse files
committed
dvc: add a support for multistage dvcfile.
Now, pipeline stage files can house multiple stages, and separate lockfiles are created which has the checksums, whereas Dvcfile will be clean and human readable and editable. The *.dvc files will be generated for output files. It is available via a hidden flag: -n. Fixes #1871 PR: #3584
1 parent 11e56db commit f309b79

23 files changed

+1735
-293
lines changed

dvc/command/pipeline.py

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -8,15 +8,26 @@
88
logger = logging.getLogger(__name__)
99

1010

11+
def _stage_repr(stage):
12+
from dvc.stage import PipelineStage
13+
14+
return (
15+
"{}:{}".format(stage.relpath, stage.name)
16+
if isinstance(stage, PipelineStage)
17+
else stage.relpath
18+
)
19+
20+
1121
class CmdPipelineShow(CmdBase):
1222
def _show(self, target, commands, outs, locked):
1323
import networkx
14-
from dvc.dvcfile import Dvcfile
24+
from dvc import dvcfile
25+
from dvc.utils import parse_target
1526

16-
stage = Dvcfile(self.repo, target).load()
17-
G = self.repo.graph
27+
path, name = parse_target(target)
28+
stage = dvcfile.Dvcfile(self.repo, path).load_one(name)
29+
G = self.repo.pipeline_graph
1830
stages = networkx.dfs_postorder_nodes(G, stage)
19-
2031
if locked:
2132
stages = [s for s in stages if s.locked]
2233

@@ -29,14 +40,16 @@ def _show(self, target, commands, outs, locked):
2940
for out in stage.outs:
3041
logger.info(str(out))
3142
else:
32-
logger.info(stage.path_in_repo)
43+
logger.info(_stage_repr(stage))
3344

34-
def _build_graph(self, target, commands, outs):
45+
def _build_graph(self, target, commands=False, outs=False):
3546
import networkx
36-
from dvc.dvcfile import Dvcfile
47+
from dvc import dvcfile
3748
from dvc.repo.graph import get_pipeline
49+
from dvc.utils import parse_target
3850

39-
target_stage = Dvcfile(self.repo, target).load()
51+
path, name = parse_target(target)
52+
target_stage = dvcfile.Dvcfile(self.repo, path).load_one(name)
4053
G = get_pipeline(self.repo.pipelines, target_stage)
4154

4255
nodes = set()
@@ -49,7 +62,7 @@ def _build_graph(self, target, commands, outs):
4962
for out in stage.outs:
5063
nodes.add(str(out))
5164
else:
52-
nodes.add(stage.relpath)
65+
nodes.add(_stage_repr(stage))
5366

5467
edges = []
5568
for from_stage, to_stage in networkx.edge_dfs(G, target_stage):
@@ -62,7 +75,7 @@ def _build_graph(self, target, commands, outs):
6275
for to_out in to_stage.outs:
6376
edges.append((str(from_out), str(to_out)))
6477
else:
65-
edges.append((from_stage.relpath, to_stage.relpath))
78+
edges.append((_stage_repr(from_stage), _stage_repr(to_stage)))
6679

6780
return list(nodes), edges, networkx.is_tree(G)
6881

@@ -150,7 +163,7 @@ def run(self):
150163
pipelines = self.repo.pipelines
151164
for pipeline in pipelines:
152165
for stage in pipeline:
153-
logger.info(stage.relpath)
166+
logger.info(_stage_repr(stage))
154167
if len(pipeline) != 0:
155168
logger.info("=" * 80)
156169
logger.info("{} pipelines total".format(len(pipelines)))

dvc/command/run.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ def run(self):
5151
outs_persist=self.args.outs_persist,
5252
outs_persist_no_cache=self.args.outs_persist_no_cache,
5353
always_changed=self.args.always_changed,
54+
name=self.args.name,
5455
)
5556
except DvcException:
5657
logger.exception("failed to run command")
@@ -96,6 +97,7 @@ def add_parser(subparsers, parent_parser):
9697
default=[],
9798
help="Declare dependencies for reproducible cmd.",
9899
)
100+
run_parser.add_argument("-n", "--name", help=argparse.SUPPRESS)
99101
run_parser.add_argument(
100102
"-o",
101103
"--outs",

0 commit comments

Comments
 (0)