Skip to content

Commit 3bc1287

Browse files
committed
cache migrate: add utility for migrating local cache files to 3.x
1 parent bbb7736 commit 3bc1287

File tree

2 files changed

+67
-3
lines changed

2 files changed

+67
-3
lines changed

dvc/cachemgr.py

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,13 @@
11
import os
2-
from typing import Optional, Tuple
2+
from typing import TYPE_CHECKING, Optional, Tuple
33

44
from dvc.fs import GitFileSystem, Schemes
55
from dvc_data.hashfile.db import get_odb
66
from dvc_data.hashfile.hash import DEFAULT_ALGORITHM
77

8+
if TYPE_CHECKING:
9+
from dvc.repo import Repo
10+
811
LEGACY_HASH_NAMES = {"md5-dos2unix", "params"}
912

1013

@@ -99,3 +102,37 @@ def local_cache_dir(self) -> str:
99102
(i.e. `dvc cache dir`).
100103
"""
101104
return self.legacy.path
105+
106+
107+
def migrate_2_to_3(repo: "Repo", dry: bool = False):
108+
"""Migrate legacy 2.x objects to 3.x cache.
109+
110+
Legacy 'md5-dos2unix' objects will be re-hashed with 'md5', added to 3.x cache,
111+
and then a link from the legacy 2.x location to the 3.x location will be created.
112+
"""
113+
from dvc.fs.callbacks import TqdmCallback
114+
from dvc.ui import ui
115+
from dvc_data.hashfile.db.migrate import migrate, prepare
116+
117+
src = repo.cache.legacy
118+
dest = repo.cache.local
119+
if dry:
120+
oids = list(src._list_oids()) # pylint: disable=protected-access
121+
ui.write(
122+
f"{len(oids)} files will be re-hashed and migrated to the DVC 3.0 cache "
123+
"location."
124+
)
125+
return
126+
127+
with TqdmCallback(
128+
desc="Computing DVC 3.0 hashes",
129+
unit="files",
130+
) as cb:
131+
migration = prepare(src, dest, callback=cb)
132+
133+
with TqdmCallback(
134+
desc="Migrating to DVC 3.0 cache",
135+
unit="files",
136+
) as cb:
137+
count = migrate(migration, callback=cb)
138+
ui.write(f"Migrated {count} files to DVC 3.0 cache location.")

dvc/commands/cache.py

Lines changed: 29 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
import os
33

44
from dvc.cli import completion
5+
from dvc.cli.command import CmdBase
56
from dvc.cli.utils import append_doc_link, fix_subparsers
67
from dvc.commands.config import CmdConfig
78
from dvc.ui import ui
@@ -36,6 +37,14 @@ def run(self):
3637
return 0
3738

3839

40+
class CmdCacheMigrate(CmdBase):
41+
def run(self):
42+
from dvc.cachemgr import migrate_2_to_3
43+
44+
migrate_2_to_3(self.repo, dry=self.args.dry)
45+
return 0
46+
47+
3948
def add_parser(subparsers, parent_parser):
4049
from dvc.commands.config import parent_config_parser
4150

@@ -54,8 +63,6 @@ def add_parser(subparsers, parent_parser):
5463
help="Use `dvc cache CMD --help` for command-specific help.",
5564
)
5665

57-
fix_subparsers(cache_subparsers)
58-
5966
parent_cache_config_parser = argparse.ArgumentParser(
6067
add_help=False, parents=[parent_config_parser]
6168
)
@@ -86,3 +93,23 @@ def add_parser(subparsers, parent_parser):
8693
nargs="?",
8794
).complete = completion.DIR
8895
cache_dir_parser.set_defaults(func=CmdCacheDir)
96+
97+
CACHE_MIGRATE_HELP = "Migrate cached files to the DVC 3.0 cache location."
98+
cache_migrate_parser = cache_subparsers.add_parser(
99+
"migrate",
100+
parents=[parent_parser],
101+
description=append_doc_link(CACHE_HELP, "cache/migrate"),
102+
help=CACHE_MIGRATE_HELP,
103+
formatter_class=argparse.RawDescriptionHelpFormatter,
104+
)
105+
cache_migrate_parser.add_argument(
106+
"--dry",
107+
help=(
108+
"Only print actions which would be taken without actually migrating "
109+
"any data."
110+
),
111+
action="store_true",
112+
)
113+
cache_migrate_parser.set_defaults(func=CmdCacheMigrate)
114+
115+
fix_subparsers(cache_subparsers)

0 commit comments

Comments
 (0)