Skip to content

Restyle get/list/import/api: subrepo support #4265

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 26 additions & 10 deletions dvc/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@
from contextlib import _GeneratorContextManager as GCM
from contextlib import contextmanager

from dvc.exceptions import DvcException, NotDvcRepoError
from dvc.exceptions import (
DvcException,
FileMissingError,
NotDvcRepoError,
PathMissingError,
)
from dvc.external_repo import external_repo
from dvc.repo import Repo

Expand All @@ -26,10 +31,14 @@ def get_url(path, repo=None, rev=None, remote=None):
directory in the remote storage.
"""
with _make_repo(repo, rev=rev) as _repo:
if not isinstance(_repo, Repo):
raise UrlNotDvcRepoError(_repo.url) # pylint: disable=no-member
out = _repo.find_out_by_relpath(path)
remote_obj = _repo.cloud.get_remote(remote)
# pylint: disable=no-member
path = os.path.join(_repo.root_dir, path)
is_erepo = not isinstance(_repo, Repo)
r = _repo.in_repo(path) if is_erepo else _repo
if is_erepo and not r:
raise UrlNotDvcRepoError(_repo.url)
out = r.find_out_by_relpath(path)
remote_obj = r.cloud.get_remote(remote)
return str(remote_obj.tree.hash_to_path_info(out.checksum))


Expand Down Expand Up @@ -74,10 +83,17 @@ def __getattr__(self, name):

def _open(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
with _make_repo(repo, rev=rev) as _repo:
with _repo.open_by_relpath(
path, remote=remote, mode=mode, encoding=encoding
) as fd:
yield fd
is_erepo = not isinstance(_repo, Repo)
try:
with _repo.repo_tree.open_by_relpath(
path, remote=remote, mode=mode, encoding=encoding
) as fd:
yield fd
except FileNotFoundError as exc:
if is_erepo:
# pylint: disable=no-member
raise PathMissingError(path, _repo.url) from exc
raise FileMissingError(path) from exc


def read(path, repo=None, rev=None, remote=None, mode="r", encoding=None):
Expand All @@ -101,5 +117,5 @@ def _make_repo(repo_url=None, rev=None):
return
except NotDvcRepoError:
pass # fallthrough to external_repo
with external_repo(url=repo_url, rev=rev) as repo:
with external_repo(url=repo_url, rev=rev, stream=True) as repo:
yield repo
27 changes: 6 additions & 21 deletions dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@

from voluptuous import Required

from dvc.exceptions import OutputNotFoundError
from dvc.path_info import PathInfo

from .local import LocalDependency
Expand Down Expand Up @@ -42,30 +41,17 @@ def repo_pair(self):
def __str__(self):
return "{} ({})".format(self.def_path, self.def_repo[self.PARAM_URL])

def _make_repo(self, *, locked=True):
def _make_repo(self, *, locked=True, **kwargs):
from dvc.external_repo import external_repo

d = self.def_repo
rev = (d.get("rev_lock") if locked else None) or d.get("rev")
return external_repo(d["url"], rev=rev)
return external_repo(d["url"], rev=rev, **kwargs)

def _get_checksum(self, locked=True):
from dvc.repo.tree import RepoTree

with self._make_repo(locked=locked) as repo:
try:
return repo.find_out_by_relpath(self.def_path).info["md5"]
except OutputNotFoundError:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))

# we want stream but not fetch, so DVC out directories are
# walked, but dir contents is not fetched
tree = RepoTree(repo, stream=True)

# We are polluting our repo cache with some dir listing here
if tree.isdir(path):
return self.repo.cache.local.tree.get_hash(path, tree=tree)
return tree.get_file_hash(path)
with self._make_repo(locked=locked, stream=True) as repo:
path = PathInfo(os.path.join(repo.root_dir, self.def_path))
return repo.get_checksum(path, self.repo.cache.local)

def status(self):
current_checksum = self._get_checksum(locked=True)
Expand All @@ -88,8 +74,7 @@ def download(self, to):
self.def_repo[self.PARAM_REV_LOCK] = repo.get_rev()

cache = self.repo.cache.local
with repo.use_cache(cache):
_, _, cache_infos = repo.fetch_external([self.def_path])
_, _, cache_infos = repo.fetch_external([self.def_path], cache)
cache.checkout(to.path_info, cache_infos[0])

def update(self, rev=None):
Expand Down
Loading