Skip to content

dvc: remove tree-specific wrappers from cache #4214

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Jul 15, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion dvc/data_cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,7 @@ def pull(

def _save_pulled_checksums(self, cache):
for checksum in cache.scheme_keys("local"):
cache_file = self.repo.cache.local.hash_to_path_info(checksum)
cache_file = self.repo.cache.local.tree.hash_to_path_info(checksum)
if self.repo.cache.local.tree.exists(cache_file):
# We can safely save here, as existing corrupted files will
# be removed upon status, while files corrupted during
Expand Down
2 changes: 1 addition & 1 deletion dvc/dependency/repo.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def _get_checksum(self, locked=True):

# We are polluting our repo cache with some dir listing here
if tree.isdir(path):
return self.repo.cache.local.get_hash(path, tree=tree)
return self.repo.cache.local.tree.get_hash(path, tree=tree)
return tree.get_file_hash(path)

def status(self):
Expand Down
2 changes: 1 addition & 1 deletion dvc/output/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ def supported(cls, url):

@property
def cache_path(self):
return self.cache.hash_to_path_info(self.checksum).url
return self.cache.tree.hash_to_path_info(self.checksum).url

@property
def checksum_type(self):
Expand Down
97 changes: 36 additions & 61 deletions dvc/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -216,39 +216,6 @@ def __init__(self, tree):
self.cache_type_confirmed = False
self._dir_info = {}

@property
def path_info(self):
return self.tree.path_info

@property
def cache(self):
return getattr(self.repo.cache, self.scheme)

@property
def scheme(self):
return self.tree.scheme

@property
def state(self):
return self.tree.state

def open(self, *args, **kwargs):
return self.tree.open(*args, **kwargs)

def is_dir_hash(self, hash_):
return self.tree.is_dir_hash(hash_)

def get_hash(self, path_info, **kwargs):
return self.tree.get_hash(path_info, **kwargs)

# Override to return path as a string instead of PathInfo for clouds
# which support string paths (see local)
def hash_to_path(self, hash_):
return self.hash_to_path_info(hash_)

def hash_to_path_info(self, hash_):
return self.tree.hash_to_path_info(hash_)

def get_dir_cache(self, hash_):
assert hash_

Expand All @@ -265,10 +232,10 @@ def get_dir_cache(self, hash_):
return dir_info

def load_dir_cache(self, hash_):
path_info = self.hash_to_path_info(hash_)
path_info = self.tree.hash_to_path_info(hash_)

try:
with self.cache.open(path_info, "r") as fobj:
with self.tree.open(path_info, "r") as fobj:
d = json.load(fobj)
except (ValueError, FileNotFoundError) as exc:
raise DirCacheError(hash_) from exc
Expand Down Expand Up @@ -325,7 +292,7 @@ def changed(self, path_info, hash_info):
logger.debug("cache for '%s'('%s') has changed.", path_info, hash_)
return True

actual = self.get_hash(path_info)
actual = self.tree.get_hash(path_info)
if hash_ != actual:
logger.debug(
"hash value '%s' for '%s' has changed (actual '%s').",
Expand Down Expand Up @@ -389,7 +356,7 @@ def _do_link(self, from_info, to_info, link_method):
def _save_file(self, path_info, tree, hash_, save_link=True, **kwargs):
assert hash_

cache_info = self.hash_to_path_info(hash_)
cache_info = self.tree.hash_to_path_info(hash_)
if tree == self.tree:
if self.changed_cache(hash_):
self.tree.move(path_info, cache_info, mode=self.CACHE_MODE)
Expand All @@ -404,11 +371,11 @@ def _save_file(self, path_info, tree, hash_, save_link=True, **kwargs):
self.link(cache_info, path_info)

if save_link:
self.state.save_link(path_info)
self.tree.state.save_link(path_info)
# we need to update path and cache, since in case of reflink,
# or copy cache type moving original file results in updates on
# next executed command, which causes md5 recalculation
self.state.save(path_info, hash_)
self.tree.state.save(path_info, hash_)
else:
if self.changed_cache(hash_):
with tree.open(path_info, mode="rb") as fobj:
Expand All @@ -422,7 +389,7 @@ def _save_file(self, path_info, tree, hash_, save_link=True, **kwargs):
if callback:
callback(1)

self.state.save(cache_info, hash_)
self.tree.state.save(cache_info, hash_)
return {self.tree.PARAM_CHECKSUM: hash_}

def _cache_is_copy(self, path_info):
Expand Down Expand Up @@ -459,18 +426,19 @@ def _save_dir(self, path_info, tree, hash_, save_link=True, **kwargs):
)

if save_link:
self.state.save_link(path_info)
self.tree.state.save_link(path_info)
if self.tree.exists(path_info):
self.state.save(path_info, hash_)
self.tree.state.save(path_info, hash_)

cache_info = self.hash_to_path_info(hash_)
self.state.save(cache_info, hash_)
cache_info = self.tree.hash_to_path_info(hash_)
self.tree.state.save(cache_info, hash_)
return {self.tree.PARAM_CHECKSUM: hash_}

def save(self, path_info, tree, hash_info, save_link=True, **kwargs):
if path_info.scheme != self.scheme:
if path_info.scheme != self.tree.scheme:
raise RemoteActionNotImplemented(
f"save {path_info.scheme} -> {self.scheme}", self.scheme,
f"save {path_info.scheme} -> {self.tree.scheme}",
self.tree.scheme,
)

if not hash_info:
Expand All @@ -479,13 +447,18 @@ def save(self, path_info, tree, hash_info, save_link=True, **kwargs):
return self._save(path_info, tree, hash_, save_link, **kwargs)

def _save(self, path_info, tree, hash_, save_link=True, **kwargs):
to_info = self.hash_to_path_info(hash_)
to_info = self.tree.hash_to_path_info(hash_)
logger.debug("Saving '%s' to '%s'.", path_info, to_info)

if tree.isdir(path_info):
return self._save_dir(path_info, tree, hash_, save_link, **kwargs)
return self._save_file(path_info, tree, hash_, save_link, **kwargs)

# Override to return path as a string instead of PathInfo for clouds
# which support string paths (see local)
def hash_to_path(self, hash_):
return self.tree.hash_to_path_info(hash_)

def changed_cache_file(self, hash_):
"""Compare the given hash with the (corresponding) actual one.

Expand All @@ -506,7 +479,7 @@ def changed_cache_file(self, hash_):
)
return False

actual = self.get_hash(cache_info)
actual = self.tree.get_hash(cache_info)

logger.debug(
"cache '%s' expected '%s' actual '%s'", cache_info, hash_, actual,
Expand Down Expand Up @@ -545,14 +518,14 @@ def _changed_dir_cache(self, hash_, path_info=None, filter_info=None):
return False

def changed_cache(self, hash_, path_info=None, filter_info=None):
if self.is_dir_hash(hash_):
if self.tree.is_dir_hash(hash_):
return self._changed_dir_cache(
hash_, path_info=path_info, filter_info=filter_info
)
return self.changed_cache_file(hash_)

def already_cached(self, path_info):
current = self.get_hash(path_info)
current = self.tree.get_hash(path_info)

if not current:
return False
Expand All @@ -579,15 +552,15 @@ def _checkout_file(
):
"""The file is changed we need to checkout a new copy"""
added, modified = True, False
cache_info = self.hash_to_path_info(hash_)
cache_info = self.tree.hash_to_path_info(hash_)
if self.tree.exists(path_info):
logger.debug("data '%s' will be replaced.", path_info)
self.safe_remove(path_info, force=force)
added, modified = False, True

self.link(cache_info, path_info)
self.state.save_link(path_info)
self.state.save(path_info, hash_)
self.tree.state.save_link(path_info)
self.tree.state.save(path_info, hash_)
if progress_callback:
progress_callback(str(path_info))

Expand Down Expand Up @@ -616,7 +589,7 @@ def _checkout_dir(
for entry in dir_info:
relative_path = entry[self.tree.PARAM_RELPATH]
entry_hash = entry[self.tree.PARAM_CHECKSUM]
entry_cache_info = self.hash_to_path_info(entry_hash)
entry_cache_info = self.tree.hash_to_path_info(entry_hash)
entry_info = path_info / relative_path

if filter_info and not entry_info.isin_or_eq(filter_info):
Expand All @@ -627,7 +600,7 @@ def _checkout_dir(
modified = True
self.safe_remove(entry_info, force=force)
self.link(entry_cache_info, entry_info)
self.state.save(entry_info, entry_hash)
self.tree.state.save(entry_info, entry_hash)
if progress_callback:
progress_callback(str(entry_info))

Expand All @@ -636,8 +609,8 @@ def _checkout_dir(
or modified
)

self.state.save_link(path_info)
self.state.save(path_info, hash_)
self.tree.state.save_link(path_info)
self.tree.state.save(path_info, hash_)

# relink is not modified, assume it as nochange
return added, not added and modified and not relink
Expand All @@ -663,7 +636,7 @@ def checkout(
relink=False,
filter_info=None,
):
if path_info.scheme not in ["local", self.scheme]:
if path_info.scheme not in ["local", self.tree.scheme]:
raise NotImplementedError

hash_ = hash_info.get(self.tree.PARAM_CHECKSUM)
Expand Down Expand Up @@ -696,7 +669,9 @@ def checkout(
if progress_callback:
progress_callback(
str(path_info),
self.get_files_number(self.path_info, hash_, filter_info),
self.get_files_number(
self.tree.path_info, hash_, filter_info
),
)
if failed:
raise CheckoutError([failed])
Expand All @@ -717,7 +692,7 @@ def _checkout(
relink=False,
filter_info=None,
):
if not self.is_dir_hash(hash_):
if not self.tree.is_dir_hash(hash_):
return self._checkout_file(
path_info, hash_, force, progress_callback, relink
)
Expand All @@ -732,7 +707,7 @@ def get_files_number(self, path_info, hash_, filter_info):
if not hash_:
return 0

if not self.is_dir_hash(hash_):
if not self.tree.is_dir_hash(hash_):
return 1

if not filter_info:
Expand Down
14 changes: 8 additions & 6 deletions dvc/remote/local.py
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ def hashes_exist(
def already_cached(self, path_info):
assert path_info.scheme in ["", "local"]

current_md5 = self.get_hash(path_info)
current_md5 = self.tree.get_hash(path_info)

if not current_md5:
return False
Expand Down Expand Up @@ -147,7 +147,7 @@ def _status(
logger.debug(
f"Preparing to collect status from {remote.tree.path_info}"
)
md5s = set(named_cache.scheme_keys(self.scheme))
md5s = set(named_cache.scheme_keys(self.tree.scheme))

logger.debug("Collecting information from local cache...")
local_exists = frozenset(
Expand All @@ -163,7 +163,7 @@ def _status(
else:
logger.debug("Collecting information from remote cache...")
remote_exists = set()
dir_md5s = set(named_cache.dir_keys(self.scheme))
dir_md5s = set(named_cache.dir_keys(self.tree.scheme))
if dir_md5s:
remote_exists.update(
self._indexed_dir_hashes(named_cache, remote, dir_md5s)
Expand All @@ -188,7 +188,7 @@ def make_names(hash_, names):
dir_status = {}
file_status = {}
dir_contents = {}
for hash_, item in named_cache[self.scheme].items():
for hash_, item in named_cache[self.tree.scheme].items():
if item.children:
dir_status[hash_] = make_names(hash_, item.names)
dir_contents[hash_] = set()
Expand Down Expand Up @@ -233,7 +233,9 @@ def _indexed_dir_hashes(self, named_cache, remote, dir_md5s):
# If .dir hash exists on the remote, assume directory contents
# still exists on the remote
for dir_hash in dir_exists:
file_hashes = list(named_cache.child_keys(self.scheme, dir_hash))
file_hashes = list(
named_cache.child_keys(self.tree.scheme, dir_hash)
)
if dir_hash not in remote.index:
logger.debug(
"Indexing new .dir '{}' with '{}' nested files".format(
Expand Down Expand Up @@ -263,7 +265,7 @@ def _get_plans(self, download, remote, status_info, status):
status_info.items(), desc="Analysing status", unit="file"
):
if info["status"] == status:
cache.append(self.hash_to_path_info(md5))
cache.append(self.tree.hash_to_path_info(md5))
path_infos.append(remote.tree.hash_to_path_info(md5))
names.append(info["name"])
hashes.append(md5)
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def _to_path(output):

def _to_checksum(output):
if on_working_tree:
return self.cache.local.get_hash(output.path_info)
return self.cache.local.tree.get_hash(output.path_info)
return output.checksum

def _exists(output):
Expand Down
2 changes: 1 addition & 1 deletion dvc/repo/tree.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ def open(

if out.is_dir_checksum:
checksum = self._get_granular_checksum(path, out)
cache_path = out.cache.hash_to_path_info(checksum).url
cache_path = out.cache.tree.hash_to_path_info(checksum).url
else:
cache_path = out.cache_path
return open(cache_path, mode=mode, encoding=encoding)
Expand Down
4 changes: 2 additions & 2 deletions dvc/tree/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ def get_hash(self, path_info, tree=None, **kwargs):
if (
hash_
and self.is_dir_hash(hash_)
and not tree.exists(self.cache.hash_to_path_info(hash_))
and not tree.exists(self.cache.tree.hash_to_path_info(hash_))
):
hash_ = None

Expand Down Expand Up @@ -375,7 +375,7 @@ def _collect_dir(self, path_info, tree, **kwargs):

def _save_dir_info(self, dir_info, path_info):
hash_, tmp_info = self._get_dir_info_hash(dir_info)
new_info = self.cache.hash_to_path_info(hash_)
new_info = self.cache.tree.hash_to_path_info(hash_)
if self.cache.changed_cache_file(hash_):
self.cache.tree.makedirs(new_info.parent)
self.cache.tree.move(
Expand Down
Loading