Skip to content

gc: added --all-commits #2643

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Oct 23, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion dvc/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,9 @@ def update(self, cache, suffix=""):
for scheme, src in cache._items.items():
dst = self._items[scheme]
for checksum, names in src.items():
dst[checksum].update(names)
if suffix:
dst[checksum].update(n + suffix for n in names)
else:
dst[checksum].update(names)

self.repo.extend(cache.repo)
29 changes: 19 additions & 10 deletions dvc/command/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,17 +13,19 @@

class CmdGC(CmdBase):
def run(self):
msg = "this will remove all cache except the cache that is used in "
if not self.args.all_branches and not self.args.all_tags:
msg += "the current git branch"
elif self.args.all_branches and not self.args.all_tags:
msg += "all git branches"
elif not self.args.all_branches and self.args.all_tags:
msg += "all git tags"
else:
msg += "all git branches and all git tags"
msg = "This will remove all cache except items used in "

msg += "the working tree"
if self.args.all_commits:
msg += " and all git commits"
elif self.args.all_branches and self.args.all_tags:
msg += " and all git branches and tags"
elif self.args.all_branches:
msg += " and all git branches"
elif self.args.all_tags:
msg += " and all git tags"

if self.args.repos is not None and len(self.args.repos) > 0:
if self.args.repos:
msg += " of the current and the following repos:"

for repo_path in self.args.repos:
Expand All @@ -40,6 +42,7 @@ def run(self):
self.repo.gc(
all_branches=self.args.all_branches,
all_tags=self.args.all_tags,
all_commits=self.args.all_commits,
cloud=self.args.cloud,
remote=self.args.remote,
force=self.args.force,
Expand Down Expand Up @@ -76,6 +79,12 @@ def add_parser(subparsers, parent_parser):
default=False,
help="Keep data files for all git tags.",
)
gc_parser.add_argument(
"--all-commits",
action="store_true",
default=False,
help="Keep data files for all commits.",
)
gc_parser.add_argument(
"-c",
"--cloud",
Expand Down
5 changes: 4 additions & 1 deletion dvc/repo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,7 @@ def used_cache(
all_branches=False,
with_deps=False,
all_tags=False,
all_commits=False,
remote=None,
force=False,
jobs=None,
Expand All @@ -236,7 +237,9 @@ def used_cache(
cache = NamedCache()

for branch in self.brancher(
all_branches=all_branches, all_tags=all_tags
all_branches=all_branches,
all_tags=all_tags,
all_commits=all_commits,
):
if targets:
stages = []
Expand Down
28 changes: 18 additions & 10 deletions dvc/repo/brancher.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,12 @@


def brancher( # noqa: E302
self, branches=None, all_branches=False, tags=None, all_tags=False
self,
branches=None,
all_branches=False,
tags=None,
all_tags=False,
all_commits=False,
):
"""Generator that iterates over specified revisions.

Expand All @@ -19,7 +24,7 @@ def brancher( # noqa: E302
- empty string it there is no branches to iterate over
- "Working Tree" if there are uncommited changes in the SCM repo
"""
if not any([branches, all_branches, tags, all_tags]):
if not any([branches, all_branches, tags, all_tags, all_commits]):
yield ""
return

Expand All @@ -31,17 +36,20 @@ def brancher( # noqa: E302
self.tree = WorkingTree(self.root_dir)
yield "working tree"

if all_branches:
branches = scm.list_branches()
if all_commits:
revs = scm.list_all_commits()
else:
if all_branches:
branches = scm.list_branches()

if all_tags:
tags = scm.list_tags()
if all_tags:
tags = scm.list_tags()

if branches is not None:
revs.extend(branches)
if branches is not None:
revs.extend(branches)

if tags is not None:
revs.extend(tags)
if tags is not None:
revs.extend(tags)

# NOTE: it might be a good idea to wrap this loop in try/finally block
# to don't leave the tree on some unexpected branch after the
Expand Down
2 changes: 2 additions & 0 deletions dvc/repo/gc.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ def gc(
remote=None,
with_deps=False,
all_tags=False,
all_commits=False,
force=False,
jobs=None,
repos=None,
Expand All @@ -47,6 +48,7 @@ def gc(
all_branches=all_branches,
with_deps=with_deps,
all_tags=all_tags,
all_commits=all_commits,
remote=remote,
force=force,
jobs=jobs,
Expand Down
4 changes: 4 additions & 0 deletions dvc/scm/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,10 @@ def list_tags(self): # pylint: disable=no-self-use
"""Returns a list of available tags in the repo."""
return []

def list_all_commits(self): # pylint: disable=no-self-use
"""Returns a list of commits in the repo."""
return []

def install(self):
"""Adds dvc commands to SCM hooks for the repo."""

Expand Down
3 changes: 3 additions & 0 deletions dvc/scm/git/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ def list_branches(self):
def list_tags(self):
return [t.name for t in self.repo.tags]

def list_all_commits(self):
return [c.hexsha for c in self.repo.iter_commits("--all")]

def _install_hook(self, name, cmd):
command = (
'[ "$3" = "0" ]'
Expand Down
35 changes: 35 additions & 0 deletions tests/func/test_gc.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
from __future__ import unicode_literals
import os
import configobj

from git import Repo

from dvc.utils.compat import pathlib
from dvc.main import main
from dvc.repo import Repo as DvcRepo

Expand Down Expand Up @@ -173,3 +175,36 @@ def test(self):

self.dvc.gc()
self._check_cache(2)


def test_all_commits(git, dvc_repo):
def add_and_commit():
stages = dvc_repo.add(str(testfile))
dvc_repo.scm.add([s.relpath for s in stages])
dvc_repo.scm.commit("message")

cache_dir = os.path.join(dvc_repo.root_dir, ".dvc", "cache")
testfile = pathlib.Path("testfile")

testfile.write_text("uncommited")
dvc_repo.add(str(testfile))

testfile.write_text("commited")
add_and_commit()

testfile.write_text("modified")
add_and_commit()

testfile.write_text("workspace")
dvc_repo.add(str(testfile))

N = _count_files(cache_dir)

dvc_repo.gc(all_commits=True)

# Only one uncommited file should go away
assert _count_files(cache_dir) == N - 1


def _count_files(path):
return sum(len(files) for _, _, files in os.walk(path))