Skip to content

run: add --outs-persist and --outs-persist-no-cache options #1759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Mar 25, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions dvc/command/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ def run(self):
ignore_build_cache=self.args.ignore_build_cache,
remove_outs=self.args.remove_outs,
no_commit=self.args.no_commit,
outs_persist=self.args.outs_persist,
outs_persist_no_cache=self.args.outs_persist_no_cache,
)
except DvcException:
logger.error("failed to run command")
Expand Down Expand Up @@ -175,6 +177,20 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Don't put files/directories into cache.",
)
run_parser.add_argument(
"--outs-persist",
action="append",
default=[],
help="Declare output file or directory that will not be "
"removed upon repro.",
)
run_parser.add_argument(
"--outs-persist-no-cache",
action="append",
default=[],
help="Declare output file or directory that will not be "
"removed upon repro (do not put into DVC cache).",
)
run_parser.add_argument(
"command", nargs=argparse.REMAINDER, help="Command to execute."
)
Expand Down
39 changes: 33 additions & 6 deletions dvc/output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,23 +48,38 @@
schema.Optional(RemoteHDFS.PARAM_CHECKSUM): schema.Or(str, None),
schema.Optional(OutputBase.PARAM_CACHE): bool,
schema.Optional(OutputBase.PARAM_METRIC): OutputBase.METRIC_SCHEMA,
schema.Optional(OutputBase.PARAM_PERSIST): bool,
}


def _get(stage, p, info, cache, metric):
def _get(stage, p, info, cache, metric, persist):
parsed = urlparse(p)
if parsed.scheme == "remote":
name = Config.SECTION_REMOTE_FMT.format(parsed.netloc)
sect = stage.repo.config.config[name]
remote = Remote(stage.repo, sect)
return OUTS_MAP[remote.scheme](
stage, p, info, cache=cache, remote=remote, metric=metric
stage,
p,
info,
cache=cache,
remote=remote,
metric=metric,
persist=persist,
)

for o in OUTS:
if o.supported(p):
return o(stage, p, info, cache=cache, remote=None, metric=metric)
return OutputLOCAL(stage, p, info, cache=cache, remote=None, metric=metric)
return OutputLOCAL(
stage,
p,
info,
cache=cache,
remote=None,
metric=metric,
persist=persist,
)


def loadd_from(stage, d_list):
Expand All @@ -73,12 +88,24 @@ def loadd_from(stage, d_list):
p = d.pop(OutputBase.PARAM_PATH)
cache = d.pop(OutputBase.PARAM_CACHE, True)
metric = d.pop(OutputBase.PARAM_METRIC, False)
ret.append(_get(stage, p, info=d, cache=cache, metric=metric))
persist = d.pop(OutputBase.PARAM_PERSIST, False)
ret.append(
_get(stage, p, info=d, cache=cache, metric=metric, persist=persist)
)
return ret


def loads_from(stage, s_list, use_cache=True, metric=False):
def loads_from(stage, s_list, use_cache=True, metric=False, persist=False):
ret = []
for s in s_list:
ret.append(_get(stage, s, info={}, cache=use_cache, metric=metric))
ret.append(
_get(
stage,
s,
info={},
cache=use_cache,
metric=metric,
persist=persist,
)
)
return ret
16 changes: 15 additions & 1 deletion dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class OutputBase(object):
PARAM_METRIC = "metric"
PARAM_METRIC_TYPE = "type"
PARAM_METRIC_XPATH = "xpath"
PARAM_PERSIST = "persist"

METRIC_SCHEMA = Or(
None,
Expand All @@ -50,7 +51,14 @@ class OutputBase(object):
IsNotFileOrDirError = OutputIsNotFileOrDirError

def __init__(
self, stage, path, info=None, remote=None, cache=True, metric=False
self,
stage,
path,
info=None,
remote=None,
cache=True,
metric=False,
persist=False,
):
self.stage = stage
self.repo = stage.repo
Expand All @@ -59,6 +67,7 @@ def __init__(
self.remote = remote or self.REMOTE(self.repo, {})
self.use_cache = False if self.IS_DEPENDENCY else cache
self.metric = False if self.IS_DEPENDENCY else metric
self.persist = persist

if (
self.use_cache
Expand Down Expand Up @@ -186,6 +195,7 @@ def dumpd(self):
del self.metric[self.PARAM_METRIC_XPATH]

ret[self.PARAM_METRIC] = self.metric
ret[self.PARAM_PERSIST] = self.persist

return ret

Expand Down Expand Up @@ -231,3 +241,7 @@ def get_files_number(self):
return 1

return 0

def unprotect(self):
if self.exists:
self.remote.unprotect(self.path_info)
17 changes: 15 additions & 2 deletions dvc/output/hdfs.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,23 @@ class OutputHDFS(OutputBase):
REMOTE = RemoteHDFS

def __init__(
self, stage, path, info=None, remote=None, cache=True, metric=False
self,
stage,
path,
info=None,
remote=None,
cache=True,
metric=False,
persist=False,
):
super(OutputHDFS, self).__init__(
stage, path, info=info, remote=remote, cache=cache, metric=metric
stage,
path,
info=info,
remote=remote,
cache=cache,
metric=metric,
persist=persist,
)
if remote:
path = posixpath.join(remote.url, urlparse(path).path.lstrip("/"))
Expand Down
17 changes: 15 additions & 2 deletions dvc/output/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,10 +14,23 @@ class OutputLOCAL(OutputBase):
REMOTE = RemoteLOCAL

def __init__(
self, stage, path, info=None, remote=None, cache=True, metric=False
self,
stage,
path,
info=None,
remote=None,
cache=True,
metric=False,
persist=False,
):
super(OutputLOCAL, self).__init__(
stage, path, info, remote=remote, cache=cache, metric=metric
stage,
path,
info,
remote=remote,
cache=cache,
metric=metric,
persist=persist,
)
if remote:
p = os.path.join(
Expand Down
17 changes: 15 additions & 2 deletions dvc/output/s3.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,10 +11,23 @@ class OutputS3(OutputBase):
REMOTE = RemoteS3

def __init__(
self, stage, path, info=None, remote=None, cache=True, metric=False
self,
stage,
path,
info=None,
remote=None,
cache=True,
metric=False,
persist=False,
):
super(OutputS3, self).__init__(
stage, path, info=info, remote=remote, cache=cache, metric=metric
stage,
path,
info=info,
remote=remote,
cache=cache,
metric=metric,
persist=persist,
)
bucket = remote.bucket if remote else urlparse(path).netloc
path = urlparse(path).path.lstrip("/")
Expand Down
17 changes: 15 additions & 2 deletions dvc/output/ssh.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,23 @@ class OutputSSH(OutputBase):
REMOTE = RemoteSSH

def __init__(
self, stage, path, info=None, remote=None, cache=True, metric=False
self,
stage,
path,
info=None,
remote=None,
cache=True,
metric=False,
persist=False,
):
super(OutputSSH, self).__init__(
stage, path, info=info, remote=remote, cache=cache, metric=metric
stage,
path,
info=info,
remote=remote,
cache=cache,
metric=metric,
persist=persist,
)
parsed = urlparse(path)
host = remote.host if remote else parsed.hostname
Expand Down
4 changes: 4 additions & 0 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -362,3 +362,7 @@ def checkout(self, output, force=False, progress_callback=None):
self.do_checkout(
output, force=force, progress_callback=progress_callback
)

@staticmethod
def unprotect(path_info):
pass
41 changes: 41 additions & 0 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,3 +743,44 @@ def _log_missing_caches(self, checksum_info_dict):
"nor on remote. Missing cache files: {}".format(missing_desc)
)
logger.warning(msg)

@staticmethod
def _unprotect_file(path):
if System.is_symlink(path) or System.is_hardlink(path):
logger.debug("Unprotecting '{}'".format(path))
tmp = os.path.join(os.path.dirname(path), "." + str(uuid.uuid4()))

# The operations order is important here - if some application
# would access the file during the process of copyfile then it
# would get only the part of file. So, at first, the file should be
# copied with the temporary name, and then original file should be
# replaced by new.
copyfile(path, tmp)
remove(path)
os.rename(tmp, path)

else:
logger.debug(
"Skipping copying for '{}', since it is not "
"a symlink or a hardlink.".format(path)
)

os.chmod(path, os.stat(path).st_mode | stat.S_IWRITE)

@staticmethod
def _unprotect_dir(path):
for path in walk_files(path):
RemoteLOCAL._unprotect_file(path)

@staticmethod
def unprotect(path_info):
path = path_info["path"]
if not os.path.exists(path):
raise DvcException(
"can't unprotect non-existing data '{}'".format(path)
)

if os.path.isdir(path):
RemoteLOCAL._unprotect_dir(path)
else:
RemoteLOCAL._unprotect_file(path)
8 changes: 3 additions & 5 deletions dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,11 +101,9 @@ def init(root_dir=os.curdir, no_scm=False, force=False):
init(root_dir=root_dir, no_scm=no_scm, force=force)
return Repo(root_dir)

@staticmethod
def unprotect(target):
from dvc.repo.unprotect import unprotect

return unprotect(target)
def unprotect(self, target):
path_info = {"schema": "local", "path": target}
return self.cache.local.unprotect(path_info)

def _ignore(self):
flist = [
Expand Down
8 changes: 8 additions & 0 deletions dvc/repo/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ def run(
ignore_build_cache=False,
remove_outs=False,
no_commit=False,
outs_persist=None,
outs_persist_no_cache=None,
):
from dvc.stage import Stage

Expand All @@ -33,6 +35,10 @@ def run(
metrics = []
if metrics_no_cache is None:
metrics_no_cache = []
if outs_persist is None:
outs_persist = []
if outs_persist_no_cache is None:
outs_persist_no_cache = []

with self.state:
stage = Stage.create(
Expand All @@ -49,6 +55,8 @@ def run(
overwrite=overwrite,
ignore_build_cache=ignore_build_cache,
remove_outs=remove_outs,
outs_persist=outs_persist,
outs_persist_no_cache=outs_persist_no_cache,
)

if stage is None:
Expand Down
49 changes: 0 additions & 49 deletions dvc/repo/unprotect.py

This file was deleted.

Loading