Skip to content

tree: get rid of explicit tree arguments #4310

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 31, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions dvc/cache/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -276,8 +276,6 @@ def save(self, path_info, tree, hash_info, save_link=True, **kwargs):
self.tree.scheme,
)

if not hash_info:
hash_info = self.tree.save_info(path_info, tree=tree, **kwargs)
hash_ = hash_info[self.tree.PARAM_CHECKSUM]
return self._save(path_info, tree, hash_, save_link, **kwargs)

Expand Down
28 changes: 21 additions & 7 deletions dvc/external_repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -90,15 +90,26 @@ def use_cache(self, cache):
"""Use the specified cache in place of default tmpdir cache for
download operations.
"""
if hasattr(self, "cache"):
save_cache = self.cache.local
self.cache.local = cache
has_cache = hasattr(self, "cache")

if has_cache:
save_cache = self.cache.local # pylint: disable=E0203
self.cache.local = cache # pylint: disable=E0203
else:
from collections import namedtuple

mock_cache = namedtuple("MockCache", ["local"])(local=cache)
self.cache = mock_cache # pylint: disable=W0201

Comment on lines +98 to +103
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is temporary, will be no longer needed after #4144 , just need to remove tree= arg from tree methods.

self._local_cache = cache

yield

if hasattr(self, "cache"):
if has_cache:
self.cache.local = save_cache
else:
del self.cache

self._local_cache = None

@cached_property
Expand Down Expand Up @@ -130,14 +141,17 @@ def download_update(result):
for path in paths:
if not self.repo_tree.exists(path):
raise PathMissingError(path, self.url)
save_info = self.local_cache.save(
hash_info = self.repo_tree.save_info(
path, download_callback=download_update
)
self.local_cache.save(
path,
self.repo_tree,
None,
hash_info,
save_link=False,
download_callback=download_update,
)
save_infos.append(save_info)
save_infos.append(hash_info)

return sum(download_results), failed, save_infos

Expand Down
3 changes: 3 additions & 0 deletions dvc/repo/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,9 @@ class RepoTree(BaseTree): # pylint:disable=abstract-method
Any kwargs will be passed to `DvcTree()`.
"""

scheme = "local"
PARAM_CHECKSUM = "md5"

def __init__(self, repo, **kwargs):
super().__init__(repo, {"url": repo.root_dir})
if hasattr(repo, "dvc_dir"):
Expand Down
44 changes: 18 additions & 26 deletions dvc/tree/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,22 +236,17 @@ def is_dir_hash(cls, hash_):
return False
return hash_.endswith(cls.CHECKSUM_DIR_SUFFIX)

def get_hash(self, path_info, tree=None, **kwargs):
def get_hash(self, path_info, **kwargs):
assert path_info and (
isinstance(path_info, str) or path_info.scheme == self.scheme
)

if not tree:
tree = self

if not tree.exists(path_info):
if not self.exists(path_info):
return None

if tree == self:
# pylint: disable=assignment-from-none
hash_ = self.state.get(path_info)
else:
hash_ = None
# pylint: disable=assignment-from-none
hash_ = self.state.get(path_info)

# If we have dir hash in state db, but dir cache file is lost,
# then we need to recollect the dir via .get_dir_hash() call below,
# see https://github.com/iterative/dvc/issues/2219 for context
Expand All @@ -267,10 +262,10 @@ def get_hash(self, path_info, tree=None, **kwargs):
if hash_:
return hash_

if tree.isdir(path_info):
hash_ = self.get_dir_hash(path_info, tree, **kwargs)
if self.isdir(path_info):
hash_ = self.get_dir_hash(path_info, **kwargs)
else:
hash_ = tree.get_file_hash(path_info)
hash_ = self.get_file_hash(path_info)

if hash_ and self.exists(path_info):
self.state.save(path_info, hash_)
Expand All @@ -280,11 +275,11 @@ def get_hash(self, path_info, tree=None, **kwargs):
def get_file_hash(self, path_info):
raise NotImplementedError

def get_dir_hash(self, path_info, tree, **kwargs):
def get_dir_hash(self, path_info, **kwargs):
if not self.cache:
raise RemoteCacheRequiredError(path_info)

dir_info = self._collect_dir(path_info, tree, **kwargs)
dir_info = self._collect_dir(path_info, **kwargs)
return self._save_dir_info(dir_info, path_info)

def hash_to_path_info(self, hash_):
Expand All @@ -298,29 +293,26 @@ def path_to_hash(self, path):

return "".join(parts)

def save_info(self, path_info, tree=None, **kwargs):
return {
self.PARAM_CHECKSUM: self.get_hash(path_info, tree=tree, **kwargs)
}
def save_info(self, path_info, **kwargs):
return {self.PARAM_CHECKSUM: self.get_hash(path_info, **kwargs)}

@staticmethod
def _calculate_hashes(file_infos, tree):
def _calculate_hashes(self, file_infos):
file_infos = list(file_infos)
with Tqdm(
total=len(file_infos),
unit="md5",
desc="Computing file/dir hashes (only done once)",
) as pbar:
worker = pbar.wrap_fn(tree.get_file_hash)
with ThreadPoolExecutor(max_workers=tree.hash_jobs) as executor:
worker = pbar.wrap_fn(self.get_file_hash)
with ThreadPoolExecutor(max_workers=self.hash_jobs) as executor:
tasks = executor.map(worker, file_infos)
hashes = dict(zip(file_infos, tasks))
return hashes

def _collect_dir(self, path_info, tree, **kwargs):
def _collect_dir(self, path_info, **kwargs):
file_infos = set()

for fname in tree.walk_files(path_info, **kwargs):
for fname in self.walk_files(path_info, **kwargs):
if DvcIgnore.DVCIGNORE_FILE == fname.name:
raise DvcIgnoreInCollectedDirError(fname.parent)

Expand All @@ -329,7 +321,7 @@ def _collect_dir(self, path_info, tree, **kwargs):
hashes = {fi: self.state.get(fi) for fi in file_infos}
not_in_state = {fi for fi, hash_ in hashes.items() if hash_ is None}

new_hashes = self._calculate_hashes(not_in_state, tree)
new_hashes = self._calculate_hashes(not_in_state)
hashes.update(new_hashes)

result = [
Expand Down
5 changes: 4 additions & 1 deletion tests/func/test_tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,10 @@ def test_repotree_cache_save(tmp_dir, dvc, scm, erepo_dir, local_cloud):
with erepo_dir.dvc.state:
cache = dvc.cache.local
with cache.tree.state:
cache.save(PathInfo(erepo_dir / "dir"), tree, None)
path_info = PathInfo(erepo_dir / "dir")
hash_info = cache.tree.save_info(path_info)
cache.save(path_info, tree, hash_info)

for hash_ in expected:
assert os.path.exists(cache.tree.hash_to_path_info(hash_))

Expand Down