Skip to content

Commit 74f5cc6

Browse files
committed
output: Don't serialize hash_info or meta in cloud versioning.
Closes #8357
1 parent 1c33024 commit 74f5cc6

File tree

5 files changed

+160
-22
lines changed

5 files changed

+160
-22
lines changed

dvc/output.py

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -368,7 +368,11 @@ def __init__(
368368
name=self.hash_name,
369369
value=getattr(self.meta, self.hash_name, None),
370370
)
371-
if self.meta.nfiles or self.hash_info and self.hash_info.isdir:
371+
if self.files:
372+
tree = Tree.from_list(self.files, hash_name=self.hash_name)
373+
tree.digest()
374+
self.hash_info = tree.hash_info
375+
elif self.meta.nfiles or self.hash_info and self.hash_info.isdir:
372376
self.meta.isdir = True
373377
if not self.hash_info and self.hash_name != "md5":
374378
md5 = getattr(self.meta, "md5", None)
@@ -747,7 +751,22 @@ def _commit_granular_dir(self, filter_info) -> Optional["HashFile"]:
747751
def dumpd(self, **kwargs): # noqa: C901
748752
meta = self.meta.to_dict()
749753
meta.pop("isdir", None)
750-
ret: Dict[str, Any] = {**self.hash_info.to_dict(), **meta}
754+
ret: Dict[str, Any] = {}
755+
if (
756+
(not self.IS_DEPENDENCY or self.stage.is_import)
757+
and self.hash_info.isdir
758+
and (kwargs.get("with_files") or self.files is not None)
759+
):
760+
obj: Optional["HashFile"]
761+
if self.obj:
762+
obj = self.obj
763+
else:
764+
obj = self.get_obj()
765+
if obj:
766+
obj = cast(Tree, obj)
767+
ret[self.PARAM_FILES] = obj.as_list(with_meta=True)
768+
else:
769+
ret = {**self.hash_info.to_dict(), **meta}
751770

752771
if self.is_in_repo:
753772
path = self.fs.path.as_posix(
@@ -788,20 +807,6 @@ def dumpd(self, **kwargs): # noqa: C901
788807
if not self.can_push:
789808
ret[self.PARAM_PUSH] = self.can_push
790809

791-
if (
792-
(not self.IS_DEPENDENCY or self.stage.is_import)
793-
and self.hash_info.isdir
794-
and (kwargs.get("with_files") or self.files is not None)
795-
):
796-
obj: Optional["HashFile"]
797-
if self.obj:
798-
obj = self.obj
799-
else:
800-
obj = self.get_obj()
801-
if obj:
802-
obj = cast(Tree, obj)
803-
ret[self.PARAM_FILES] = obj.as_list(with_meta=True)
804-
805810
return ret
806811

807812
def verify_metric(self):

dvc/stage/serialize.py

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -158,18 +158,16 @@ def to_single_stage_lockfile(stage: "Stage", **kwargs) -> dict:
158158
def _dumpd(item):
159159
meta_d = item.meta.to_dict()
160160
meta_d.pop("isdir", None)
161-
ret = [
162-
(item.PARAM_PATH, item.def_path),
163-
*item.hash_info.to_dict().items(),
164-
*meta_d.items(),
165-
]
166161

167162
if item.hash_info.isdir and kwargs.get("with_files"):
168163
if item.obj:
169164
obj = item.obj
170165
else:
171166
obj = item.get_obj()
172-
ret.append((item.PARAM_FILES, obj.as_list(with_meta=True)))
167+
ret = [((item.PARAM_FILES, obj.as_list(with_meta=True)))]
168+
else:
169+
ret = [*item.hash_info.to_dict().items(), *meta_d.items()]
170+
ret.insert(0, (item.PARAM_PATH, item.def_path))
173171

174172
return OrderedDict(ret)
175173

tests/func/test_add.py

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1142,3 +1142,61 @@ def test_add_with_annotations(M, tmp_dir, dvc):
11421142
(stage,) = dvc.add("foo", type="t2")
11431143
assert stage.outs[0].annot == Annotation(**annot)
11441144
assert (tmp_dir / "foo.dvc").parse() == M.dict(outs=[M.dict(**annot)])
1145+
1146+
1147+
def test_add_updates_to_cloud_versioning_dir(tmp_dir, dvc):
1148+
data_dvc = tmp_dir / "data.dvc"
1149+
data_dvc.dump(
1150+
{
1151+
"outs": [
1152+
{
1153+
"path": "data",
1154+
"files": [
1155+
{
1156+
"size": 3,
1157+
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
1158+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
1159+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
1160+
"relpath": "bar",
1161+
},
1162+
{
1163+
"size": 3,
1164+
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
1165+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
1166+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
1167+
"relpath": "foo",
1168+
},
1169+
],
1170+
}
1171+
]
1172+
}
1173+
)
1174+
1175+
data = tmp_dir / "data"
1176+
data.mkdir()
1177+
(data / "foo").write_text("foo")
1178+
(data / "bar").write_text("bar2")
1179+
1180+
dvc.add("data")
1181+
1182+
assert (tmp_dir / "data.dvc").parse() == {
1183+
"outs": [
1184+
{
1185+
"path": "data",
1186+
"files": [
1187+
{
1188+
"size": 4,
1189+
"md5": "224e2539f52203eb33728acd228b4432",
1190+
"relpath": "bar",
1191+
},
1192+
{
1193+
"size": 3,
1194+
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
1195+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
1196+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
1197+
"relpath": "foo",
1198+
},
1199+
],
1200+
}
1201+
]
1202+
}

tests/unit/output/test_output.py

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,3 +121,55 @@ def test_remote_missing_dependency_on_dir_pull(tmp_dir, scm, dvc, mocker):
121121
)
122122
with pytest.raises(RemoteMissingDepsError):
123123
dvc.pull()
124+
125+
126+
def test_hash_info_cloud_versioning_dir(mocker):
127+
stage = mocker.MagicMock()
128+
stage.repo.fs.version_aware = False
129+
stage.repo.fs.PARAM_CHECKSUM = "etag"
130+
files = [
131+
{
132+
"size": 3,
133+
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
134+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
135+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
136+
"relpath": "bar",
137+
},
138+
{
139+
"size": 3,
140+
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
141+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
142+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
143+
"relpath": "foo",
144+
},
145+
]
146+
out = Output(stage, "path", files=files)
147+
# hash_info constructed from files
148+
assert out.hash_info.name == "md5"
149+
assert out.hash_info.value == "77e8000f532886eef8ee1feba82e9bad.dir"
150+
151+
152+
def test_dumpd_cloud_versioning_dir(mocker):
153+
stage = mocker.MagicMock()
154+
stage.repo.fs.version_aware = False
155+
stage.repo.fs.PARAM_CHECKSUM = "md5"
156+
files = [
157+
{
158+
"size": 3,
159+
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
160+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
161+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
162+
"relpath": "bar",
163+
},
164+
{
165+
"size": 3,
166+
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
167+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
168+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
169+
"relpath": "foo",
170+
},
171+
]
172+
out = Output(stage, "path", files=files)
173+
174+
dumpd = out.dumpd()
175+
assert dumpd == {"path": "path", "files": files}

tests/unit/stage/test_serialize_pipeline_lock.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,3 +255,28 @@ def test_to_lockfile(dvc):
255255
]
256256
)
257257
}
258+
259+
260+
def test_to_single_stage_lockfile_cloud_versioning_dir(dvc):
261+
stage = create_stage(PipelineStage, dvc, outs=["dir"], **kwargs)
262+
stage.outs[0].hash_info = HashInfo("md5", "md-five.dir")
263+
files = [
264+
{
265+
"size": 3,
266+
"version_id": "WYRG4BglP7pD.gEoJP6a4AqOhl.FRA.h",
267+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
268+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
269+
"relpath": "bar",
270+
},
271+
{
272+
"size": 3,
273+
"version_id": "0vL53tFVY5vVAoJ4HG2jCS1mEcohDPE0",
274+
"etag": "acbd18db4cc2f85cedef654fccc4a4d8",
275+
"md5": "acbd18db4cc2f85cedef654fccc4a4d8",
276+
"relpath": "foo",
277+
},
278+
]
279+
stage.outs[0].files = files
280+
e = _to_single_stage_lockfile(stage, with_files=True)
281+
assert Schema(e)
282+
assert e["outs"][0] == {"path": "dir", "files": files}

0 commit comments

Comments
 (0)