Skip to content

Commit e94bc88

Browse files
authored
Merge pull request #1759 from pared/1214
[WIP] run: add --outs-persist and --outs-persist-no-cache options
2 parents ac15a42 + 85b26c2 commit e94bc88

File tree

15 files changed

+282
-86
lines changed

15 files changed

+282
-86
lines changed

dvc/command/run.py

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,8 @@ def run(self):
4343
ignore_build_cache=self.args.ignore_build_cache,
4444
remove_outs=self.args.remove_outs,
4545
no_commit=self.args.no_commit,
46+
outs_persist=self.args.outs_persist,
47+
outs_persist_no_cache=self.args.outs_persist_no_cache,
4648
)
4749
except DvcException:
4850
logger.error("failed to run command")
@@ -175,6 +177,20 @@ def add_parser(subparsers, parent_parser):
175177
default=False,
176178
help="Don't put files/directories into cache.",
177179
)
180+
run_parser.add_argument(
181+
"--outs-persist",
182+
action="append",
183+
default=[],
184+
help="Declare output file or directory that will not be "
185+
"removed upon repro.",
186+
)
187+
run_parser.add_argument(
188+
"--outs-persist-no-cache",
189+
action="append",
190+
default=[],
191+
help="Declare output file or directory that will not be "
192+
"removed upon repro (do not put into DVC cache).",
193+
)
178194
run_parser.add_argument(
179195
"command", nargs=argparse.REMAINDER, help="Command to execute."
180196
)

dvc/output/__init__.py

Lines changed: 33 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -48,23 +48,38 @@
4848
schema.Optional(RemoteHDFS.PARAM_CHECKSUM): schema.Or(str, None),
4949
schema.Optional(OutputBase.PARAM_CACHE): bool,
5050
schema.Optional(OutputBase.PARAM_METRIC): OutputBase.METRIC_SCHEMA,
51+
schema.Optional(OutputBase.PARAM_PERSIST): bool,
5152
}
5253

5354

54-
def _get(stage, p, info, cache, metric):
55+
def _get(stage, p, info, cache, metric, persist):
5556
parsed = urlparse(p)
5657
if parsed.scheme == "remote":
5758
name = Config.SECTION_REMOTE_FMT.format(parsed.netloc)
5859
sect = stage.repo.config.config[name]
5960
remote = Remote(stage.repo, sect)
6061
return OUTS_MAP[remote.scheme](
61-
stage, p, info, cache=cache, remote=remote, metric=metric
62+
stage,
63+
p,
64+
info,
65+
cache=cache,
66+
remote=remote,
67+
metric=metric,
68+
persist=persist,
6269
)
6370

6471
for o in OUTS:
6572
if o.supported(p):
6673
return o(stage, p, info, cache=cache, remote=None, metric=metric)
67-
return OutputLOCAL(stage, p, info, cache=cache, remote=None, metric=metric)
74+
return OutputLOCAL(
75+
stage,
76+
p,
77+
info,
78+
cache=cache,
79+
remote=None,
80+
metric=metric,
81+
persist=persist,
82+
)
6883

6984

7085
def loadd_from(stage, d_list):
@@ -73,12 +88,24 @@ def loadd_from(stage, d_list):
7388
p = d.pop(OutputBase.PARAM_PATH)
7489
cache = d.pop(OutputBase.PARAM_CACHE, True)
7590
metric = d.pop(OutputBase.PARAM_METRIC, False)
76-
ret.append(_get(stage, p, info=d, cache=cache, metric=metric))
91+
persist = d.pop(OutputBase.PARAM_PERSIST, False)
92+
ret.append(
93+
_get(stage, p, info=d, cache=cache, metric=metric, persist=persist)
94+
)
7795
return ret
7896

7997

80-
def loads_from(stage, s_list, use_cache=True, metric=False):
98+
def loads_from(stage, s_list, use_cache=True, metric=False, persist=False):
8199
ret = []
82100
for s in s_list:
83-
ret.append(_get(stage, s, info={}, cache=use_cache, metric=metric))
101+
ret.append(
102+
_get(
103+
stage,
104+
s,
105+
info={},
106+
cache=use_cache,
107+
metric=metric,
108+
persist=persist,
109+
)
110+
)
84111
return ret

dvc/output/base.py

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@ class OutputBase(object):
3636
PARAM_METRIC = "metric"
3737
PARAM_METRIC_TYPE = "type"
3838
PARAM_METRIC_XPATH = "xpath"
39+
PARAM_PERSIST = "persist"
3940

4041
METRIC_SCHEMA = Or(
4142
None,
@@ -50,7 +51,14 @@ class OutputBase(object):
5051
IsNotFileOrDirError = OutputIsNotFileOrDirError
5152

5253
def __init__(
53-
self, stage, path, info=None, remote=None, cache=True, metric=False
54+
self,
55+
stage,
56+
path,
57+
info=None,
58+
remote=None,
59+
cache=True,
60+
metric=False,
61+
persist=False,
5462
):
5563
self.stage = stage
5664
self.repo = stage.repo
@@ -59,6 +67,7 @@ def __init__(
5967
self.remote = remote or self.REMOTE(self.repo, {})
6068
self.use_cache = False if self.IS_DEPENDENCY else cache
6169
self.metric = False if self.IS_DEPENDENCY else metric
70+
self.persist = persist
6271

6372
if (
6473
self.use_cache
@@ -186,6 +195,7 @@ def dumpd(self):
186195
del self.metric[self.PARAM_METRIC_XPATH]
187196

188197
ret[self.PARAM_METRIC] = self.metric
198+
ret[self.PARAM_PERSIST] = self.persist
189199

190200
return ret
191201

@@ -231,3 +241,7 @@ def get_files_number(self):
231241
return 1
232242

233243
return 0
244+
245+
def unprotect(self):
246+
if self.exists:
247+
self.remote.unprotect(self.path_info)

dvc/output/hdfs.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,23 @@ class OutputHDFS(OutputBase):
1111
REMOTE = RemoteHDFS
1212

1313
def __init__(
14-
self, stage, path, info=None, remote=None, cache=True, metric=False
14+
self,
15+
stage,
16+
path,
17+
info=None,
18+
remote=None,
19+
cache=True,
20+
metric=False,
21+
persist=False,
1522
):
1623
super(OutputHDFS, self).__init__(
17-
stage, path, info=info, remote=remote, cache=cache, metric=metric
24+
stage,
25+
path,
26+
info=info,
27+
remote=remote,
28+
cache=cache,
29+
metric=metric,
30+
persist=persist,
1831
)
1932
if remote:
2033
path = posixpath.join(remote.url, urlparse(path).path.lstrip("/"))

dvc/output/local.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -14,10 +14,23 @@ class OutputLOCAL(OutputBase):
1414
REMOTE = RemoteLOCAL
1515

1616
def __init__(
17-
self, stage, path, info=None, remote=None, cache=True, metric=False
17+
self,
18+
stage,
19+
path,
20+
info=None,
21+
remote=None,
22+
cache=True,
23+
metric=False,
24+
persist=False,
1825
):
1926
super(OutputLOCAL, self).__init__(
20-
stage, path, info, remote=remote, cache=cache, metric=metric
27+
stage,
28+
path,
29+
info,
30+
remote=remote,
31+
cache=cache,
32+
metric=metric,
33+
persist=persist,
2134
)
2235
if remote:
2336
p = os.path.join(

dvc/output/s3.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,10 +11,23 @@ class OutputS3(OutputBase):
1111
REMOTE = RemoteS3
1212

1313
def __init__(
14-
self, stage, path, info=None, remote=None, cache=True, metric=False
14+
self,
15+
stage,
16+
path,
17+
info=None,
18+
remote=None,
19+
cache=True,
20+
metric=False,
21+
persist=False,
1522
):
1623
super(OutputS3, self).__init__(
17-
stage, path, info=info, remote=remote, cache=cache, metric=metric
24+
stage,
25+
path,
26+
info=info,
27+
remote=remote,
28+
cache=cache,
29+
metric=metric,
30+
persist=persist,
1831
)
1932
bucket = remote.bucket if remote else urlparse(path).netloc
2033
path = urlparse(path).path.lstrip("/")

dvc/output/ssh.py

Lines changed: 15 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,10 +12,23 @@ class OutputSSH(OutputBase):
1212
REMOTE = RemoteSSH
1313

1414
def __init__(
15-
self, stage, path, info=None, remote=None, cache=True, metric=False
15+
self,
16+
stage,
17+
path,
18+
info=None,
19+
remote=None,
20+
cache=True,
21+
metric=False,
22+
persist=False,
1623
):
1724
super(OutputSSH, self).__init__(
18-
stage, path, info=info, remote=remote, cache=cache, metric=metric
25+
stage,
26+
path,
27+
info=info,
28+
remote=remote,
29+
cache=cache,
30+
metric=metric,
31+
persist=persist,
1932
)
2033
parsed = urlparse(path)
2134
host = remote.host if remote else parsed.hostname

dvc/remote/base.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -362,3 +362,7 @@ def checkout(self, output, force=False, progress_callback=None):
362362
self.do_checkout(
363363
output, force=force, progress_callback=progress_callback
364364
)
365+
366+
@staticmethod
367+
def unprotect(path_info):
368+
pass

dvc/remote/local.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -743,3 +743,44 @@ def _log_missing_caches(self, checksum_info_dict):
743743
"nor on remote. Missing cache files: {}".format(missing_desc)
744744
)
745745
logger.warning(msg)
746+
747+
@staticmethod
748+
def _unprotect_file(path):
749+
if System.is_symlink(path) or System.is_hardlink(path):
750+
logger.debug("Unprotecting '{}'".format(path))
751+
tmp = os.path.join(os.path.dirname(path), "." + str(uuid.uuid4()))
752+
753+
# The operations order is important here - if some application
754+
# would access the file during the process of copyfile then it
755+
# would get only the part of file. So, at first, the file should be
756+
# copied with the temporary name, and then original file should be
757+
# replaced by new.
758+
copyfile(path, tmp)
759+
remove(path)
760+
os.rename(tmp, path)
761+
762+
else:
763+
logger.debug(
764+
"Skipping copying for '{}', since it is not "
765+
"a symlink or a hardlink.".format(path)
766+
)
767+
768+
os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)
769+
770+
@staticmethod
771+
def _unprotect_dir(path):
772+
for path in walk_files(path):
773+
RemoteLOCAL._unprotect_file(path)
774+
775+
@staticmethod
776+
def unprotect(path_info):
777+
path = path_info["path"]
778+
if not os.path.exists(path):
779+
raise DvcException(
780+
"can't unprotect non-existing data '{}'".format(path)
781+
)
782+
783+
if os.path.isdir(path):
784+
RemoteLOCAL._unprotect_dir(path)
785+
else:
786+
RemoteLOCAL._unprotect_file(path)

dvc/repo/__init__.py

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -101,11 +101,9 @@ def init(root_dir=os.curdir, no_scm=False, force=False):
101101
init(root_dir=root_dir, no_scm=no_scm, force=force)
102102
return Repo(root_dir)
103103

104-
@staticmethod
105-
def unprotect(target):
106-
from dvc.repo.unprotect import unprotect
107-
108-
return unprotect(target)
104+
def unprotect(self, target):
105+
path_info = {"schema": "local", "path": target}
106+
return self.cache.local.unprotect(path_info)
109107

110108
def _ignore(self):
111109
flist = [

dvc/repo/run.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@ def run(
2020
ignore_build_cache=False,
2121
remove_outs=False,
2222
no_commit=False,
23+
outs_persist=None,
24+
outs_persist_no_cache=None,
2325
):
2426
from dvc.stage import Stage
2527

@@ -33,6 +35,10 @@ def run(
3335
metrics = []
3436
if metrics_no_cache is None:
3537
metrics_no_cache = []
38+
if outs_persist is None:
39+
outs_persist = []
40+
if outs_persist_no_cache is None:
41+
outs_persist_no_cache = []
3642

3743
with self.state:
3844
stage = Stage.create(
@@ -49,6 +55,8 @@ def run(
4955
overwrite=overwrite,
5056
ignore_build_cache=ignore_build_cache,
5157
remove_outs=remove_outs,
58+
outs_persist=outs_persist,
59+
outs_persist_no_cache=outs_persist_no_cache,
5260
)
5361

5462
if stage is None:

dvc/repo/unprotect.py

Lines changed: 0 additions & 49 deletions
This file was deleted.

0 commit comments

Comments
 (0)