Skip to content

cache migrate: add utility for migrating local cache files to 3.x #9591

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jun 13, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 38 additions & 1 deletion dvc/cachemgr.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
import os
from typing import Optional, Tuple
from typing import TYPE_CHECKING, Optional, Tuple

from dvc.fs import GitFileSystem, Schemes
from dvc_data.hashfile.db import get_odb
from dvc_data.hashfile.hash import DEFAULT_ALGORITHM

if TYPE_CHECKING:
from dvc.repo import Repo

LEGACY_HASH_NAMES = {"md5-dos2unix", "params"}


Expand Down Expand Up @@ -99,3 +102,37 @@ def local_cache_dir(self) -> str:
(i.e. `dvc cache dir`).
"""
return self.legacy.path


def migrate_2_to_3(repo: "Repo", dry: bool = False):
"""Migrate legacy 2.x objects to 3.x cache.

Legacy 'md5-dos2unix' objects will be re-hashed with 'md5', added to 3.x cache,
and then a link from the legacy 2.x location to the 3.x location will be created.
"""
from dvc.fs.callbacks import TqdmCallback
from dvc.ui import ui
from dvc_data.hashfile.db.migrate import migrate, prepare

src = repo.cache.legacy
dest = repo.cache.local
if dry:
oids = list(src._list_oids()) # pylint: disable=protected-access
ui.write(
f"{len(oids)} files will be re-hashed and migrated to the DVC 3.0 cache "
"location."
)
return

with TqdmCallback(
desc="Computing DVC 3.0 hashes",
unit="files",
) as cb:
migration = prepare(src, dest, callback=cb)

with TqdmCallback(
desc="Migrating to DVC 3.0 cache",
unit="files",
) as cb:
count = migrate(migration, callback=cb)
ui.write(f"Migrated {count} files to DVC 3.0 cache location.")
31 changes: 29 additions & 2 deletions dvc/commands/cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import os

from dvc.cli import completion
from dvc.cli.command import CmdBase
from dvc.cli.utils import append_doc_link, fix_subparsers
from dvc.commands.config import CmdConfig
from dvc.ui import ui
Expand Down Expand Up @@ -36,6 +37,14 @@ def run(self):
return 0


class CmdCacheMigrate(CmdBase):
def run(self):
from dvc.cachemgr import migrate_2_to_3

migrate_2_to_3(self.repo, dry=self.args.dry)
return 0


def add_parser(subparsers, parent_parser):
from dvc.commands.config import parent_config_parser

Expand All @@ -54,8 +63,6 @@ def add_parser(subparsers, parent_parser):
help="Use `dvc cache CMD --help` for command-specific help.",
)

fix_subparsers(cache_subparsers)

parent_cache_config_parser = argparse.ArgumentParser(
add_help=False, parents=[parent_config_parser]
)
Expand Down Expand Up @@ -86,3 +93,23 @@ def add_parser(subparsers, parent_parser):
nargs="?",
).complete = completion.DIR
cache_dir_parser.set_defaults(func=CmdCacheDir)

CACHE_MIGRATE_HELP = "Migrate cached files to the DVC 3.0 cache location."
cache_migrate_parser = cache_subparsers.add_parser(
"migrate",
parents=[parent_parser],
description=append_doc_link(CACHE_HELP, "cache/migrate"),
help=CACHE_MIGRATE_HELP,
formatter_class=argparse.RawDescriptionHelpFormatter,
)
cache_migrate_parser.add_argument(
"--dry",
help=(
"Only print actions which would be taken without actually migrating "
"any data."
),
action="store_true",
)
cache_migrate_parser.set_defaults(func=CmdCacheMigrate)

fix_subparsers(cache_subparsers)
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ dependencies = [
"configobj>=5.0.6",
"distro>=1.3",
"dpath<3,>=2.1.0",
"dvc-data>=1.10.0,<1.11.0",
"dvc-data>=1.11.0,<1.12.0",
"dvc-http>=2.29.0",
"dvc-render>=0.3.1,<1",
"dvc-studio-client>=0.9.2,<1",
Expand Down