Skip to content

Commit 8a89b5c

Browse files
committed
implement removing stage
This should allow us to be able to remove a stage from a dvcfile, (if it's a single stage file, it removes it), and make us able to delete the overwrite the stage on run as well. This commit: * changes `run` `--overwrite-dvcfile` flag to `--overwrite` * removes prompt in `run` for overwrite and fails hard * changes `remove`: now it by default removes stage entry and unprotect outs * `remove` has `--outs` to remove outputs instead of unprotecting them
1 parent bb50fb8 commit 8a89b5c

14 files changed

+290
-188
lines changed

dvc/command/remove.py

Lines changed: 4 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,74 +1,37 @@
11
import argparse
22
import logging
33

4-
import dvc.prompt as prompt
54
from dvc.command.base import CmdBase, append_doc_link
65
from dvc.exceptions import DvcException
76

87
logger = logging.getLogger(__name__)
98

109

1110
class CmdRemove(CmdBase):
12-
def _is_dvc_only(self, target):
13-
if not self.args.purge:
14-
return True
15-
16-
if self.args.force:
17-
return False
18-
19-
msg = "Are you sure you want to remove '{}' with its outputs?".format(
20-
target
21-
)
22-
23-
if prompt.confirm(msg):
24-
return False
25-
26-
raise DvcException(
27-
"Cannot purge without a confirmation from the user."
28-
" Use `-f` to force."
29-
)
30-
3111
def run(self):
3212
for target in self.args.targets:
3313
try:
34-
dvc_only = self._is_dvc_only(target)
35-
self.repo.remove(target, dvc_only=dvc_only)
14+
self.repo.remove(target, outs=self.args.outs)
3615
except DvcException:
3716
logger.exception(f"failed to remove '{target}'")
3817
return 1
3918
return 0
4019

4120

4221
def add_parser(subparsers, parent_parser):
43-
REMOVE_HELP = "Remove DVC-tracked files or directories."
22+
REMOVE_HELP = "Remove stage entry and unprotect outputs"
4423
remove_parser = subparsers.add_parser(
4524
"remove",
4625
parents=[parent_parser],
4726
description=append_doc_link(REMOVE_HELP, "remove"),
4827
help=REMOVE_HELP,
4928
formatter_class=argparse.RawDescriptionHelpFormatter,
5029
)
51-
remove_parser_group = remove_parser.add_mutually_exclusive_group()
52-
remove_parser_group.add_argument(
53-
"-o",
54-
"--outs",
55-
action="store_true",
56-
default=True,
57-
help="Only remove DVC-file outputs. (Default)",
58-
)
59-
remove_parser_group.add_argument(
60-
"-p",
61-
"--purge",
62-
action="store_true",
63-
default=False,
64-
help="Remove DVC-file and all its outputs.",
65-
)
6630
remove_parser.add_argument(
67-
"-f",
68-
"--force",
31+
"--outs",
6932
action="store_true",
7033
default=False,
71-
help="Force purge.",
34+
help="Remove outputs as well.",
7235
)
7336
remove_parser.add_argument(
7437
"targets", nargs="+", help="DVC-files to remove."

dvc/command/run.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,7 +45,7 @@ def run(self):
4545
fname=self.args.file,
4646
wdir=self.args.wdir,
4747
no_exec=self.args.no_exec,
48-
overwrite=self.args.overwrite_dvcfile,
48+
overwrite=self.args.overwrite,
4949
run_cache=not self.args.no_run_cache,
5050
no_commit=self.args.no_commit,
5151
outs_persist=self.args.outs_persist,
@@ -177,10 +177,10 @@ def add_parser(subparsers, parent_parser):
177177
help="Only create stage file without actually running it.",
178178
)
179179
run_parser.add_argument(
180-
"--overwrite-dvcfile",
180+
"--overwrite",
181181
action="store_true",
182182
default=False,
183-
help="Overwrite existing DVC-file without asking for confirmation.",
183+
help="Overwrite existing stage",
184184
)
185185
run_parser.add_argument(
186186
"--no-run-cache",

dvc/dvcfile.py

Lines changed: 37 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ def dump(self, stage, **kwargs):
150150
"Saving information to '{file}'.".format(file=relpath(self.path))
151151
)
152152
dump_stage_file(self.path, serialize.to_single_stage_file(stage))
153-
self.repo.scm.track_file(relpath(self.path))
153+
self.repo.scm.track_file(self.relpath)
154154

155155
def remove_with_prompt(self, force=False):
156156
if not self.exists():
@@ -165,6 +165,9 @@ def remove_with_prompt(self, force=False):
165165

166166
self.remove()
167167

168+
def remove_stage(self, stage):
169+
self.remove()
170+
168171

169172
class PipelineFile(FileMixin):
170173
"""Abstraction for pipelines file, .yaml + .lock combined."""
@@ -212,7 +215,7 @@ def _dump_pipeline_file(self, stage):
212215
"Adding stage '%s' to '%s'", stage.name, self.relpath,
213216
)
214217
dump_stage_file(self.path, data)
215-
self.repo.scm.track_file(relpath(self.path))
218+
self.repo.scm.track_file(self.relpath)
216219

217220
@property
218221
def stage(self):
@@ -234,6 +237,22 @@ def remove(self, force=False):
234237
super().remove()
235238
self._lockfile.remove()
236239

240+
def remove_stage(self, stage):
241+
self._lockfile.remove_stage(stage)
242+
if not self.exists():
243+
return
244+
245+
with open(self.path, "r") as f:
246+
d = parse_stage_for_update(f.read(), self.path)
247+
248+
self.validate(d, self.path)
249+
if stage.name not in d.get("stages", {}):
250+
return
251+
252+
logger.debug("Removing '%s' from '%s'", stage.name, self.path)
253+
del d["stages"][stage.name]
254+
dump_stage_file(self.path, d)
255+
237256

238257
class Lockfile(FileMixin):
239258
from dvc.schema import COMPILED_LOCKFILE_SCHEMA as SCHEMA
@@ -271,6 +290,22 @@ def dump(self, stage, **kwargs):
271290
if modified:
272291
self.repo.scm.track_file(self.relpath)
273292

293+
def remove_stage(self, stage):
294+
if not self.exists():
295+
return
296+
297+
with open(self.path) as f:
298+
d = parse_stage_for_update(f.read(), self.path)
299+
self.validate(d, self.path)
300+
301+
if stage.name not in d:
302+
return
303+
304+
logger.debug("Removing '%s' from '%s'", stage.name, self.path)
305+
del d[stage.name]
306+
307+
dump_stage_file(self.path, d)
308+
274309

275310
class Dvcfile:
276311
def __new__(cls, repo, path, **kwargs):

dvc/repo/remove.py

Lines changed: 3 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -7,15 +7,11 @@
77

88

99
@locked
10-
def remove(self, target, dvc_only=False):
11-
from ..dvcfile import Dvcfile, is_valid_filename
12-
10+
def remove(self, target, outs=False):
1311
path, name = parse_target(target)
1412
stages = self.get_stages(path, name)
15-
for stage in stages:
16-
stage.remove_outs(force=True)
1713

18-
if path and is_valid_filename(path) and not dvc_only:
19-
Dvcfile(self, path).remove()
14+
for stage in stages:
15+
stage.remove(remove_outs=outs, force=outs)
2016

2117
return stages

dvc/repo/run.py

Lines changed: 10 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,11 @@
33
from funcy import concat, first
44

55
from dvc.exceptions import InvalidArgumentError
6-
from dvc.stage.exceptions import DuplicateStageName, InvalidStageName
6+
from dvc.stage.exceptions import (
7+
DuplicateStageName,
8+
InvalidStageName,
9+
StageFileAlreadyExistsError,
10+
)
711

812
from ..exceptions import OutputDuplicationError
913
from . import locked
@@ -74,10 +78,12 @@ def run(self, fname=None, no_exec=False, single_stage=False, **kwargs):
7478

7579
dvcfile = Dvcfile(self, stage.path)
7680
if dvcfile.exists():
77-
if stage_name and stage_name in dvcfile.stages:
81+
if kwargs.get("overwrite", True):
82+
dvcfile.remove_stage(stage)
83+
elif stage_cls != PipelineStage:
84+
raise StageFileAlreadyExistsError(dvcfile.relpath)
85+
elif stage_name and stage_name in dvcfile.stages:
7886
raise DuplicateStageName(stage_name, dvcfile)
79-
if stage_cls != PipelineStage:
80-
dvcfile.remove_with_prompt(force=kwargs.get("overwrite", True))
8187

8288
try:
8389
self.check_modified_graph([stage])

dvc/stage/__init__.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -280,12 +280,13 @@ def unprotect_outs(self):
280280
out.unprotect()
281281

282282
@rwlocked(write=["outs"])
283-
def remove(self, force=False, remove_outs=True):
283+
def remove(self, force=False, remove_outs=True, purge=True):
284284
if remove_outs:
285285
self.remove_outs(ignore_remove=True, force=force)
286286
else:
287287
self.unprotect_outs()
288-
self.dvcfile.remove()
288+
if purge:
289+
self.dvcfile.remove_stage(self)
289290

290291
@rwlocked(read=["deps"], write=["outs"])
291292
def reproduce(self, interactive=False, **kwargs):

tests/func/test_dvcfile.py

Lines changed: 132 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,14 @@
1+
import textwrap
2+
13
import pytest
24

3-
from dvc.dvcfile import PIPELINE_FILE, Dvcfile
4-
from dvc.stage.exceptions import StageFileDoesNotExistError
5+
from dvc.dvcfile import PIPELINE_FILE, PIPELINE_LOCK, Dvcfile
6+
from dvc.stage.exceptions import (
7+
StageFileDoesNotExistError,
8+
StageFileFormatError,
9+
)
510
from dvc.stage.loader import StageNotFound
11+
from dvc.utils.stage import dump_stage_file
612

713

814
def test_run_load_one_for_multistage(tmp_dir, dvc):
@@ -160,3 +166,127 @@ def test_stage_collection(tmp_dir, dvc):
160166
single_stage=True,
161167
)
162168
assert {s for s in dvc.stages} == {stage1, stage3, stage2}
169+
170+
171+
def test_remove_stage(tmp_dir, dvc, run_copy):
172+
tmp_dir.gen("foo", "foo")
173+
stage = run_copy("foo", "bar", name="copy-foo-bar")
174+
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")
175+
176+
dvc_file = Dvcfile(dvc, PIPELINE_FILE)
177+
assert dvc_file.exists()
178+
assert {"copy-bar-foobar", "copy-foo-bar"} == set(
179+
dvc_file._load()[0]["stages"].keys()
180+
)
181+
182+
dvc_file.remove_stage(stage)
183+
184+
assert ["copy-bar-foobar"] == list(dvc_file._load()[0]["stages"].keys())
185+
186+
# sanity check
187+
stage2.reload()
188+
189+
# re-check to see if it fails if there's no stage entry
190+
dvc_file.remove_stage(stage)
191+
dvc_file.remove(force=True)
192+
# should not fail when there's no file at all.
193+
dvc_file.remove_stage(stage)
194+
195+
196+
def test_remove_stage_lockfile(tmp_dir, dvc, run_copy):
197+
tmp_dir.gen("foo", "foo")
198+
stage = run_copy("foo", "bar", name="copy-foo-bar")
199+
stage2 = run_copy("bar", "foobar", name="copy-bar-foobar")
200+
201+
dvc_file = Dvcfile(dvc, PIPELINE_FILE)
202+
lock_file = dvc_file._lockfile
203+
assert dvc_file.exists()
204+
assert lock_file.exists()
205+
assert {"copy-bar-foobar", "copy-foo-bar"} == set(lock_file.load().keys())
206+
lock_file.remove_stage(stage)
207+
208+
assert ["copy-bar-foobar"] == list(lock_file.load().keys())
209+
210+
# sanity check
211+
stage2.reload()
212+
213+
# re-check to see if it fails if there's no stage entry
214+
lock_file.remove_stage(stage)
215+
lock_file.remove()
216+
# should not fail when there's no file at all.
217+
lock_file.remove_stage(stage)
218+
219+
220+
def test_remove_stage_dvcfiles(tmp_dir, dvc, run_copy):
221+
tmp_dir.gen("foo", "foo")
222+
stage = run_copy("foo", "bar", single_stage=True)
223+
224+
dvc_file = Dvcfile(dvc, stage.path)
225+
assert dvc_file.exists()
226+
dvc_file.remove_stage(stage)
227+
assert not dvc_file.exists()
228+
229+
# re-check to see if it fails if there's no stage entry
230+
dvc_file.remove_stage(stage)
231+
dvc_file.remove(force=True)
232+
233+
# should not fail when there's no file at all.
234+
dvc_file.remove_stage(stage)
235+
236+
237+
def test_remove_stage_on_lockfile_format_error(tmp_dir, dvc, run_copy):
238+
tmp_dir.gen("foo", "foo")
239+
stage = run_copy("foo", "bar", name="copy-foo-bar")
240+
dvc_file = Dvcfile(dvc, stage.path)
241+
lock_file = dvc_file._lockfile
242+
243+
data = dvc_file._load()[0]
244+
lock_data = lock_file.load()
245+
lock_data["gibberish"] = True
246+
data["gibberish"] = True
247+
dump_stage_file(lock_file.relpath, lock_data)
248+
with pytest.raises(StageFileFormatError):
249+
dvc_file.remove_stage(stage)
250+
251+
lock_file.remove()
252+
dvc_file.dump(stage)
253+
254+
dump_stage_file(dvc_file.relpath, data)
255+
with pytest.raises(StageFileFormatError):
256+
dvc_file.remove_stage(stage)
257+
258+
259+
def test_remove_stage_preserves_comment(tmp_dir, dvc, run_copy):
260+
tmp_dir.gen(
261+
"dvc.yaml",
262+
textwrap.dedent(
263+
"""\
264+
stages:
265+
generate-foo:
266+
cmd: "echo foo > foo"
267+
# This copies 'foo' text to 'foo' file.
268+
outs:
269+
- foo
270+
copy-foo-bar:
271+
cmd: "python copy.py foo bar"
272+
deps:
273+
- foo
274+
outs:
275+
- bar"""
276+
),
277+
)
278+
279+
dvc.reproduce(PIPELINE_FILE)
280+
281+
dvc_file = Dvcfile(dvc, PIPELINE_FILE)
282+
283+
assert dvc_file.exists()
284+
assert (tmp_dir / PIPELINE_LOCK).exists()
285+
assert (tmp_dir / "foo").exists()
286+
assert (tmp_dir / "bar").exists()
287+
288+
dvc_file.remove_stage(dvc_file.stages["copy-foo-bar"])
289+
assert (
290+
"# This copies 'foo' text to 'foo' file."
291+
in (tmp_dir / PIPELINE_FILE).read_text()
292+
)

0 commit comments

Comments
 (0)