Skip to content

Commit 9afdd60

Browse files
committed
introduce repo concept, make plumbing CLI available as dvc-data command
We need to introduce repo concept so that the plumbing CLI command is compatible with `dvc init` created repositories.
1 parent 8dd0a50 commit 9afdd60

File tree

6 files changed

+111
-34
lines changed

6 files changed

+111
-34
lines changed

noxfile.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ def lint(session: nox.Session) -> None:
2828
args = *(session.posargs or ("--show-diff-on-failure",)), "--all-files"
2929
session.run("pre-commit", "run", *args)
3030
session.run("python", "-m", "mypy")
31-
session.run("python", "-m", "pylint", "src", "cli.py")
31+
session.run("python", "-m", "pylint", "src")
3232

3333

3434
@nox.session

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,7 @@ warn_no_return = true
6262
warn_redundant_casts = true
6363
warn_unreachable = true
6464
ignore_missing_imports = true
65-
files = ["src", "tests", "cli.py"]
65+
files = ["src", "tests"]
6666

6767
[tool.pylint.message_control]
6868
enable = ["c-extension-no-member", "no-else-return"]

setup.cfg

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,10 @@ install_requires=
3232
nanotime>=0.5.2
3333

3434
[options.extras_require]
35+
cli =
36+
typer>=0.4,<0.5
37+
all =
38+
%(cli)s
3539
tests =
3640
pytest==7.1.2
3741
pytest-sugar==0.9.4
@@ -45,8 +49,12 @@ tests =
4549
universal-pathlib==0.0.19
4650
s3fs[boto3]>=2022.02.0; python_version < '3.11'
4751
dev =
52+
%(all)s
4853
%(tests)s
49-
typer>=0.4,<0.5
54+
55+
[options.entry_points]
56+
console_scripts =
57+
dvc-data = dvc_data.__main__:app
5058

5159
[options.packages.find]
5260
exclude =
@@ -67,4 +75,4 @@ select = B,C,E,F,W,T4,B902,T,P
6775
show_source = true
6876
count = true
6977
per-file-ignores =
70-
cli.py:B008
78+
src/dvc_data/cli.py:B008

src/dvc_data/__main__.py

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
try:
2+
from .cli import app
3+
except ImportError: # pragma: no cover
4+
5+
def app(): # type: ignore[misc]
6+
print(
7+
"dvc-data could not run because the required "
8+
"dependencies are not installed.\n"
9+
"Please install it with: pip install 'dvc-data[cli]'"
10+
)
11+
return 1
12+
13+
14+
if __name__ == "__main__":
15+
import sys
16+
17+
sys.exit(app())

cli.py renamed to src/dvc_data/cli.py

Lines changed: 27 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
#! /usr/bin/env python3
2-
31
import enum
42
import hashlib
53
import json
@@ -28,6 +26,7 @@
2826
from dvc_data.hashfile.hash_info import HashInfo
2927
from dvc_data.hashfile.state import State
3028
from dvc_data.objects.tree import Tree, merge
29+
from dvc_data.repo import NotARepo, Repo
3130
from dvc_data.stage import stage as _stage
3231
from dvc_data.transfer import transfer as _transfer
3332

@@ -59,9 +58,6 @@
5958
"LinkEnum", {lt: lt for lt in ["reflink", "hardlink", "symlink", "copy"]}
6059
)
6160
SIZE_HELP = "Human readable size, eg: '1kb', '100Mb', '10GB' etc"
62-
ODB_PATH = typer.Option(
63-
".dvc/cache", help="Path to the root of the odb", envvar="ODB_PATH"
64-
)
6561

6662

6763
class Application(typer.Typer):
@@ -138,21 +134,27 @@ def from_shortoid(odb: HashFileDB, oid: str) -> str:
138134
raise typer.Exit(1) from exc
139135

140136

141-
def get_odb(path, **config):
142-
state = State(root_dir=os.getcwd(), tmp_dir=os.path.join(path, "tmp"))
143-
return HashFileDB(LocalFileSystem(), path, state=state, **config)
137+
def get_odb(**config):
138+
try:
139+
repo = Repo.discover()
140+
except NotARepo as exc:
141+
typer.echo(exc, err=True)
142+
raise typer.Abort(1)
143+
144+
state = State(root_dir=repo.root, tmp_dir=repo.tmp_dir)
145+
return HashFileDB(repo.fs, repo.object_dir, state=state, **config)
144146

145147

146148
@app.command(help="Oid to path")
147-
def o2p(oid: str = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
148-
odb = get_odb(db)
149+
def o2p(oid: str = typer.Argument(..., allow_dash=True)):
150+
odb = get_odb()
149151
path = odb.oid_to_path(from_shortoid(odb, oid))
150152
typer.echo(path)
151153

152154

153155
@app.command(help="Path to Oid")
154-
def p2o(path: Path = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
155-
odb = get_odb(db)
156+
def p2o(path: Path = typer.Argument(..., allow_dash=True)):
157+
odb = get_odb()
156158
fs_path = relpath(path)
157159
if fs_path == "-":
158160
fs_path = sys.stdin.read().strip()
@@ -164,10 +166,9 @@ def p2o(path: Path = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
164166
@app.command(help="Provide content of the objects")
165167
def cat(
166168
oid: str = typer.Argument(..., allow_dash=True),
167-
db: str = ODB_PATH,
168169
check: bool = typer.Option(False, "--check", "-c"),
169170
):
170-
odb = get_odb(db)
171+
odb = get_odb()
171172
oid = from_shortoid(odb, oid)
172173
if check:
173174
try:
@@ -184,11 +185,10 @@ def cat(
184185
@app.command(help="Stage and optionally write object to the database")
185186
def stage(
186187
path: Path = dir_file_type,
187-
db: str = ODB_PATH,
188188
write: bool = typer.Option(False, "--write", "-w"),
189189
shallow: bool = False,
190190
):
191-
odb = get_odb(db)
191+
odb = get_odb()
192192
fs_path = relpath(path)
193193

194194
fs = odb.fs
@@ -210,8 +210,8 @@ def stage(
210210

211211
@app.command("ls", help="List objects in a tree")
212212
@app.command("ls-tree", help="List objects in a tree")
213-
def ls(oid: str = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
214-
odb = get_odb(db)
213+
def ls(oid: str = typer.Argument(..., allow_dash=True)):
214+
odb = get_odb()
215215
oid = from_shortoid(odb, oid)
216216
try:
217217
tree = Tree.load(odb, HashInfo("md5", oid))
@@ -224,8 +224,8 @@ def ls(oid: str = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
224224

225225

226226
@app.command(help="Verify objects in the database")
227-
def fsck(db: str = ODB_PATH):
228-
odb = get_odb(db)
227+
def fsck():
228+
odb = get_odb()
229229
ret = 0
230230
for oid in odb.all():
231231
try:
@@ -237,10 +237,8 @@ def fsck(db: str = ODB_PATH):
237237

238238

239239
@app.command(help="Diff two objects in the database")
240-
def diff(
241-
short_oid1, short_oid2: str, db: str = ODB_PATH, unchanged: bool = False
242-
):
243-
odb = get_odb(db)
240+
def diff(short_oid1, short_oid2: str, unchanged: bool = False):
241+
odb = get_odb()
244242
obj1 = odb.get(from_shortoid(odb, short_oid1))
245243
obj2 = odb.get(from_shortoid(odb, short_oid2))
246244
d = _diff(load(odb, obj1.hash_info), load(odb, obj2.hash_info), odb)
@@ -271,8 +269,8 @@ def _prepare_info(entry):
271269

272270

273271
@app.command(help="Merge two trees and optionally write to the database.")
274-
def merge_tree(oid1: str, oid2: str, db: str = ODB_PATH, force: bool = False):
275-
odb = get_odb(db)
272+
def merge_tree(oid1: str, oid2: str, force: bool = False):
273+
odb = get_odb()
276274
oid1 = from_shortoid(odb, oid1)
277275
oid2 = from_shortoid(odb, oid2)
278276
obj1 = load(odb, odb.get(oid1).hash_info)
@@ -300,7 +298,7 @@ def merge_tree(oid1: str, oid2: str, db: str = ODB_PATH, force: bool = False):
300298

301299

302300
@app.command()
303-
def update_tree(oid: str, patch_file: Path = file_type, db: str = ODB_PATH):
301+
def update_tree(oid: str, patch_file: Path = file_type):
304302
"""Update tree contents virtually with a patch file in json format.
305303
306304
Example patch file for reference:
@@ -316,7 +314,7 @@ def update_tree(oid: str, patch_file: Path = file_type, db: str = ODB_PATH):
316314
317315
Example: ./cli.py update-tree f23d4 patch.json
318316
"""
319-
odb = get_odb(db)
317+
odb = get_odb()
320318
oid = from_shortoid(odb, oid)
321319
obj = load(odb, odb.get(oid).hash_info)
322320
assert isinstance(obj, Tree)
@@ -374,9 +372,8 @@ def checkout(
374372
type: List[LinkEnum] = typer.Option( # pylint: disable=redefined-builtin
375373
["copy"]
376374
),
377-
db: str = ODB_PATH,
378375
):
379-
odb = get_odb(db, type=[t.value for t in type])
376+
odb = get_odb(type=[t.value for t in type])
380377
oid = from_shortoid(odb, oid)
381378
obj = load(odb, odb.get(oid).hash_info)
382379
with Tqdm(total=len(obj), desc="Checking out", unit="obj") as pbar:

src/dvc_data/repo.py

Lines changed: 55 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,55 @@
1+
import os
2+
3+
from dvc_objects.fs import LocalFileSystem
4+
from dvc_objects.fs.base import FileSystem
5+
6+
7+
class NotARepo(Exception):
8+
pass
9+
10+
11+
localfs = LocalFileSystem()
12+
13+
14+
class Repo:
15+
def __init__(self, root: str = "", fs: FileSystem = None) -> None:
16+
fs = fs or localfs
17+
root = root or fs.path.getcwd()
18+
control_dir: str = os.getenv("DVC_DIR") or fs.path.join(root, ".dvc")
19+
20+
if not fs.isdir(control_dir):
21+
raise NotARepo(f"{root} is not a data repo.")
22+
23+
self.fs = fs or localfs
24+
self.root = root
25+
self._control_dir = control_dir
26+
self._tmp_dir: str = fs.path.join(self._control_dir, "tmp")
27+
self._object_dir: str = fs.path.join(self._control_dir, "cache")
28+
29+
@classmethod
30+
def discover(
31+
cls,
32+
start: str = ".",
33+
fs: FileSystem = None,
34+
) -> "Repo":
35+
remaining = True
36+
fs = fs or localfs
37+
path = start = fs.path.abspath(start)
38+
while remaining:
39+
try:
40+
return cls(path, fs)
41+
except NotARepo:
42+
path, remaining = fs.path.split(path)
43+
raise NotARepo(f"No data repository was found at {start}")
44+
45+
@property
46+
def control_dir(self):
47+
return self._control_dir
48+
49+
@property
50+
def tmp_dir(self):
51+
return self._tmp_dir
52+
53+
@property
54+
def object_dir(self):
55+
return self._object_dir

0 commit comments

Comments
 (0)