Skip to content

Commit 8932e0b

Browse files
authored
dvc: get rid of can_be_skipped logic (#5385)
1 parent 9ad1e0e commit 8932e0b

File tree

7 files changed

+31
-153
lines changed

7 files changed

+31
-153
lines changed

dvc/repo/add.py

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -255,12 +255,9 @@ def _create_stages(
255255
external=external,
256256
)
257257
restore_meta(stage)
258-
if stage.can_be_skipped:
259-
stage = None
260-
else:
261-
Dvcfile(repo, stage.path).remove()
262-
if desc:
263-
stage.outs[0].desc = desc
258+
Dvcfile(repo, stage.path).remove()
259+
if desc:
260+
stage.outs[0].desc = desc
264261

265262
repo._reset() # pylint: disable=protected-access
266263

dvc/repo/imp_url.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -57,8 +57,6 @@ def imp_url(
5757
erepo=erepo,
5858
)
5959
restore_meta(stage)
60-
if stage.can_be_skipped:
61-
return None
6260

6361
if desc:
6462
stage.outs[0].desc = desc

dvc/repo/run.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@ def run(
2222
from dvc.stage.utils import validate_state
2323

2424
stage = self.stage.create_from_cli(**kwargs)
25-
if run_cache and stage.can_be_skipped:
26-
return None
2725

2826
validate_state(self, stage, force=force)
2927

dvc/stage/__init__.py

Lines changed: 0 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,6 @@
3232
fill_stage_dependencies,
3333
fill_stage_outputs,
3434
get_dump,
35-
stage_dump_eq,
3635
)
3736

3837
if TYPE_CHECKING:
@@ -416,51 +415,9 @@ def update(self, rev=None):
416415
raise StageUpdateError(self.relpath)
417416
update_import(self, rev=rev)
418417

419-
@property
420-
def can_be_skipped(self):
421-
if not self.dvcfile.exists():
422-
return False
423-
424-
has_persist_outs = any(out.persist for out in self.outs)
425-
if has_persist_outs:
426-
logger.warning("Build cache is ignored when persisting outputs.")
427-
return False
428-
429-
if self.is_cached and not self.is_callback and not self.always_changed:
430-
logger.info("Stage is cached, skipping")
431-
return True
432-
433-
return False
434-
435418
def reload(self):
436419
return self.dvcfile.stage
437420

438-
@property
439-
def is_cached(self):
440-
"""Checks if this stage has been already ran and stored"""
441-
old = self.reload()
442-
if old.changed_outs():
443-
return False
444-
445-
# NOTE: need to save checksums for deps in order to compare them
446-
# with what is written in the old stage.
447-
self.save_deps()
448-
if not stage_dump_eq(Stage, old.dumpd(), self.dumpd()):
449-
return False
450-
451-
# NOTE: committing to prevent potential data duplication. For example
452-
#
453-
# $ dvc config cache.type hardlink
454-
# $ echo foo > foo
455-
# $ dvc add foo
456-
# $ rm -f foo
457-
# $ echo foo > foo
458-
# $ dvc add foo # should replace foo with a link to cache
459-
#
460-
old.commit()
461-
462-
return True
463-
464421
def dumpd(self):
465422
return get_dump(self)
466423

@@ -729,10 +686,6 @@ def addressing(self):
729686
def reload(self):
730687
return self.dvcfile.stages[self.name]
731688

732-
@property
733-
def is_cached(self):
734-
return self.name in self.dvcfile.stages and super().is_cached
735-
736689
def _status_stage(self, ret):
737690
if self.cmd_changed:
738691
ret.append("changed command")

dvc/stage/utils.py

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,6 @@
11
import os
22
import pathlib
33
from contextlib import suppress
4-
from itertools import product
54
from typing import TYPE_CHECKING, Any, Union
65

76
from funcy import concat, first, lsplit, rpartial, without
@@ -166,32 +165,6 @@ def check_missing_outputs(stage):
166165
raise MissingDataSource(paths)
167166

168167

169-
def stage_dump_eq(stage_cls, old_d, new_d):
170-
# NOTE: need to remove checksums from old dict in order to compare
171-
# it to the new one, since the new one doesn't have checksums yet.
172-
from ..tree.local import LocalTree
173-
from ..tree.s3 import S3Tree
174-
175-
old_d.pop(stage_cls.PARAM_MD5, None)
176-
new_d.pop(stage_cls.PARAM_MD5, None)
177-
outs = old_d.get(stage_cls.PARAM_OUTS, [])
178-
for out in outs:
179-
out.pop(LocalTree.PARAM_CHECKSUM, None)
180-
out.pop(S3Tree.PARAM_CHECKSUM, None)
181-
out.pop(HashInfo.PARAM_SIZE, None)
182-
out.pop(HashInfo.PARAM_NFILES, None)
183-
184-
# outs and deps are lists of dicts. To check equality, we need to make
185-
# them independent of the order, so, we convert them to dicts.
186-
combination = product(
187-
[old_d, new_d], [stage_cls.PARAM_DEPS, stage_cls.PARAM_OUTS]
188-
)
189-
for coll, key in combination:
190-
if coll.get(key):
191-
coll[key] = {item["path"]: item for item in coll[key]}
192-
return old_d == new_d
193-
194-
195168
def compute_md5(stage):
196169
from dvc.output.base import BaseOutput
197170

tests/func/test_run_multistage.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -73,13 +73,15 @@ def test_run_multi_stage_repeat(tmp_dir, dvc, run_copy):
7373
}
7474

7575

76-
def test_multi_stage_run_cached(tmp_dir, dvc, run_copy):
76+
def test_multi_stage_run_cached(tmp_dir, dvc, run_copy, mocker):
77+
from dvc.stage.run import subprocess
78+
7779
tmp_dir.dvc_gen("foo", "foo")
7880

7981
run_copy("foo", "foo2", name="copy-foo1-foo2")
80-
stage2 = run_copy("foo", "foo2", name="copy-foo1-foo2")
81-
82-
assert stage2 is None
82+
spy = mocker.spy(subprocess, "Popen")
83+
run_copy("foo", "foo2", name="copy-foo1-foo2")
84+
assert not spy.called
8385

8486

8587
def test_multistage_dump_on_non_cached_outputs(tmp_dir, dvc):

tests/func/test_run_single_stage.py

Lines changed: 22 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -520,32 +520,6 @@ def test(self):
520520

521521
time.sleep(1)
522522

523-
ret = main(
524-
[
525-
"run",
526-
"-d",
527-
self.FOO,
528-
"-d",
529-
self.CODE,
530-
"-o",
531-
"out",
532-
"--file",
533-
"out.dvc",
534-
"--single-stage",
535-
"python",
536-
self.CODE,
537-
self.FOO,
538-
"out",
539-
]
540-
)
541-
self.assertEqual(ret, 0)
542-
543-
# NOTE: check that dvcfile was NOT overwritten
544-
self.assertEqual(stage_mtime, os.path.getmtime("out.dvc"))
545-
stage_mtime = os.path.getmtime("out.dvc")
546-
547-
time.sleep(1)
548-
549523
ret = main(
550524
[
551525
"run",
@@ -660,20 +634,34 @@ def test_fname_changes_path_and_wdir(self):
660634
self.assertEqual(d[Stage.PARAM_WDIR], "..")
661635

662636

663-
def test_rerun_deterministic(tmp_dir, run_copy):
637+
def test_rerun_deterministic(tmp_dir, run_copy, mocker):
638+
from dvc.stage.run import subprocess
639+
664640
tmp_dir.gen("foo", "foo content")
665641

666-
assert run_copy("foo", "out", single_stage=True) is not None
667-
assert run_copy("foo", "out", single_stage=True) is None
642+
spy = mocker.spy(subprocess, "Popen")
668643

644+
run_copy("foo", "out", single_stage=True)
645+
assert spy.called
646+
647+
spy.reset_mock()
648+
run_copy("foo", "out", single_stage=True)
649+
assert not spy.called
650+
651+
652+
def test_rerun_deterministic_ignore_cache(tmp_dir, run_copy, mocker):
653+
from dvc.stage.run import subprocess
669654

670-
def test_rerun_deterministic_ignore_cache(tmp_dir, run_copy):
671655
tmp_dir.gen("foo", "foo content")
672656

673-
assert run_copy("foo", "out", single_stage=True) is not None
674-
assert (
675-
run_copy("foo", "out", run_cache=False, single_stage=True) is not None
676-
)
657+
spy = mocker.spy(subprocess, "Popen")
658+
659+
run_copy("foo", "out", single_stage=True)
660+
assert spy.called
661+
662+
spy.reset_mock()
663+
run_copy("foo", "out", run_cache=False, single_stage=True)
664+
assert spy.called
677665

678666

679667
def test_rerun_callback(dvc):
@@ -936,37 +924,6 @@ def test(self):
936924
mock_checkout.assert_not_called()
937925

938926

939-
class TestPersistentOutput(TestDvc):
940-
def test_ignore_run_cache(self):
941-
warning = "Build cache is ignored when persisting outputs."
942-
943-
with open("immutable", "w") as fobj:
944-
fobj.write("1")
945-
946-
cmd = [
947-
"run",
948-
"--force",
949-
"--single-stage",
950-
"--deps",
951-
"immutable",
952-
"--outs-persist",
953-
"greetings",
954-
"echo hello>>greetings",
955-
]
956-
957-
with self._caplog.at_level(logging.WARNING, logger="dvc"):
958-
assert main(cmd) == 0
959-
assert warning not in self._caplog.text
960-
961-
assert main(cmd) == 0
962-
assert warning in self._caplog.text
963-
964-
# Even if the "immutable" dependency didn't change
965-
# it should run the command again, as it is "ignoring build cache"
966-
with open("greetings") as fobj:
967-
assert "hello\nhello\n" == fobj.read()
968-
969-
970927
def test_bad_stage_fname(tmp_dir, dvc, run_copy):
971928
tmp_dir.dvc_gen("foo", "foo content")
972929

0 commit comments

Comments
 (0)