Skip to content

introduce repo concept, make plumbing CLI available as dvc-data command #82

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 9, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def lint(session: nox.Session) -> None:
args = *(session.posargs or ("--show-diff-on-failure",)), "--all-files"
session.run("pre-commit", "run", *args)
session.run("python", "-m", "mypy")
session.run("python", "-m", "pylint", "src", "cli.py")
session.run("python", "-m", "pylint", "src")


@nox.session
Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ warn_no_return = true
warn_redundant_casts = true
warn_unreachable = true
ignore_missing_imports = true
files = ["src", "tests", "cli.py"]
files = ["src", "tests"]

[tool.pylint.message_control]
enable = ["c-extension-no-member", "no-else-return"]
Expand Down
12 changes: 10 additions & 2 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ install_requires=
nanotime>=0.5.2

[options.extras_require]
cli =
typer>=0.4,<0.5
all =
%(cli)s
tests =
pytest==7.1.2
pytest-sugar==0.9.4
Expand All @@ -45,8 +49,12 @@ tests =
universal-pathlib==0.0.19
s3fs[boto3]>=2022.02.0; python_version < '3.11'
dev =
%(all)s
%(tests)s
typer>=0.4,<0.5

[options.entry_points]
console_scripts =
dvc-data = dvc_data.__main__:app

[options.packages.find]
exclude =
Expand All @@ -67,4 +75,4 @@ select = B,C,E,F,W,T4,B902,T,P
show_source = true
count = true
per-file-ignores =
cli.py:B008
src/dvc_data/cli.py:B008
17 changes: 17 additions & 0 deletions src/dvc_data/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
try:
from .cli import app
except ImportError: # pragma: no cover

def app(): # type: ignore[misc]
import sys

print(
"dvc-data could not run because the required "
"dependencies are not installed.\n"
"Please install it with: pip install 'dvc-data[cli]'"
)
sys.exit(1)


if __name__ == "__main__":
app()
57 changes: 27 additions & 30 deletions cli.py → src/dvc_data/cli.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,5 +1,3 @@
#! /usr/bin/env python3

import enum
import hashlib
import json
Expand Down Expand Up @@ -29,6 +27,7 @@
from dvc_data.hashfile.hash_info import HashInfo
from dvc_data.hashfile.state import State
from dvc_data.objects.tree import Tree, merge
from dvc_data.repo import NotARepo, Repo
from dvc_data.transfer import transfer as _transfer

file_type = typer.Argument(
Expand Down Expand Up @@ -59,9 +58,6 @@
"LinkEnum", {lt: lt for lt in ["reflink", "hardlink", "symlink", "copy"]}
)
SIZE_HELP = "Human readable size, eg: '1kb', '100Mb', '10GB' etc"
ODB_PATH = typer.Option(
".dvc/cache", help="Path to the root of the odb", envvar="ODB_PATH"
)


class Application(typer.Typer):
Expand Down Expand Up @@ -138,21 +134,27 @@ def from_shortoid(odb: HashFileDB, oid: str) -> str:
raise typer.Exit(1) from exc


def get_odb(path, **config):
state = State(root_dir=os.getcwd(), tmp_dir=os.path.join(path, "tmp"))
return HashFileDB(LocalFileSystem(), path, state=state, **config)
def get_odb(**config):
try:
repo = Repo.discover()
except NotARepo as exc:
typer.echo(exc, err=True)
raise typer.Abort(1)

state = State(root_dir=repo.root, tmp_dir=repo.tmp_dir)
return HashFileDB(repo.fs, repo.object_dir, state=state, **config)


@app.command(help="Oid to path")
def o2p(oid: str = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
odb = get_odb(db)
def o2p(oid: str = typer.Argument(..., allow_dash=True)):
odb = get_odb()
path = odb.oid_to_path(from_shortoid(odb, oid))
typer.echo(path)


@app.command(help="Path to Oid")
def p2o(path: Path = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
odb = get_odb(db)
def p2o(path: Path = typer.Argument(..., allow_dash=True)):
odb = get_odb()
fs_path = relpath(path)
if fs_path == "-":
fs_path = sys.stdin.read().strip()
Expand All @@ -164,10 +166,9 @@ def p2o(path: Path = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
@app.command(help="Provide content of the objects")
def cat(
oid: str = typer.Argument(..., allow_dash=True),
db: str = ODB_PATH,
check: bool = typer.Option(False, "--check", "-c"),
):
odb = get_odb(db)
odb = get_odb()
oid = from_shortoid(odb, oid)
if check:
try:
Expand All @@ -184,11 +185,10 @@ def cat(
@app.command(help="Build and optionally write object to the database")
def build(
path: Path = dir_file_type,
db: str = ODB_PATH,
write: bool = typer.Option(False, "--write", "-w"),
shallow: bool = False,
):
odb = get_odb(db)
odb = get_odb()
fs_path = relpath(path)

fs = odb.fs
Expand All @@ -210,8 +210,8 @@ def build(

@app.command("ls", help="List objects in a tree")
@app.command("ls-tree", help="List objects in a tree")
def ls(oid: str = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):
odb = get_odb(db)
def ls(oid: str = typer.Argument(..., allow_dash=True)):
odb = get_odb()
oid = from_shortoid(odb, oid)
try:
tree = Tree.load(odb, HashInfo("md5", oid))
Expand All @@ -224,8 +224,8 @@ def ls(oid: str = typer.Argument(..., allow_dash=True), db: str = ODB_PATH):


@app.command(help="Verify objects in the database")
def fsck(db: str = ODB_PATH):
odb = get_odb(db)
def fsck():
odb = get_odb()
ret = 0
for oid in odb.all():
try:
Expand All @@ -237,10 +237,8 @@ def fsck(db: str = ODB_PATH):


@app.command(help="Diff two objects in the database")
def diff(
short_oid1, short_oid2: str, db: str = ODB_PATH, unchanged: bool = False
):
odb = get_odb(db)
def diff(short_oid1, short_oid2: str, unchanged: bool = False):
odb = get_odb()
obj1 = odb.get(from_shortoid(odb, short_oid1))
obj2 = odb.get(from_shortoid(odb, short_oid2))
d = _diff(load(odb, obj1.hash_info), load(odb, obj2.hash_info), odb)
Expand Down Expand Up @@ -271,8 +269,8 @@ def _prepare_info(entry):


@app.command(help="Merge two trees and optionally write to the database.")
def merge_tree(oid1: str, oid2: str, db: str = ODB_PATH, force: bool = False):
odb = get_odb(db)
def merge_tree(oid1: str, oid2: str, force: bool = False):
odb = get_odb()
oid1 = from_shortoid(odb, oid1)
oid2 = from_shortoid(odb, oid2)
obj1 = load(odb, odb.get(oid1).hash_info)
Expand Down Expand Up @@ -300,7 +298,7 @@ def merge_tree(oid1: str, oid2: str, db: str = ODB_PATH, force: bool = False):


@app.command()
def update_tree(oid: str, patch_file: Path = file_type, db: str = ODB_PATH):
def update_tree(oid: str, patch_file: Path = file_type):
"""Update tree contents virtually with a patch file in json format.

Example patch file for reference:
Expand All @@ -316,7 +314,7 @@ def update_tree(oid: str, patch_file: Path = file_type, db: str = ODB_PATH):

Example: ./cli.py update-tree f23d4 patch.json
"""
odb = get_odb(db)
odb = get_odb()
oid = from_shortoid(odb, oid)
obj = load(odb, odb.get(oid).hash_info)
assert isinstance(obj, Tree)
Expand Down Expand Up @@ -374,9 +372,8 @@ def checkout(
type: List[LinkEnum] = typer.Option( # pylint: disable=redefined-builtin
["copy"]
),
db: str = ODB_PATH,
):
odb = get_odb(db, type=[t.value for t in type])
odb = get_odb(type=[t.value for t in type])
oid = from_shortoid(odb, oid)
obj = load(odb, odb.get(oid).hash_info)
with Tqdm(total=len(obj), desc="Checking out", unit="obj") as pbar:
Expand Down
55 changes: 55 additions & 0 deletions src/dvc_data/repo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
import os

from dvc_objects.fs import LocalFileSystem
from dvc_objects.fs.base import FileSystem


class NotARepo(Exception):
pass


localfs = LocalFileSystem()


class Repo:
def __init__(self, root: str = "", fs: FileSystem = None) -> None:
fs = fs or localfs
root = root or fs.path.getcwd()
control_dir: str = os.getenv("DVC_DIR") or fs.path.join(root, ".dvc")

if not fs.isdir(control_dir):
raise NotARepo(f"{root} is not a data repo.")

self.fs = fs or localfs
self.root = root
self._control_dir = control_dir
self._tmp_dir: str = fs.path.join(self._control_dir, "tmp")
self._object_dir: str = fs.path.join(self._control_dir, "cache")

@classmethod
def discover(
cls,
start: str = ".",
fs: FileSystem = None,
) -> "Repo":
remaining = True
fs = fs or localfs
path = start = fs.path.abspath(start)
while remaining:
try:
return cls(path, fs)
except NotARepo:
path, remaining = fs.path.split(path)
raise NotARepo(f"No data repository was found at {start}")

@property
def control_dir(self):
return self._control_dir

@property
def tmp_dir(self):
return self._tmp_dir

@property
def object_dir(self):
return self._object_dir