1
1
"""Manages dvc remotes that user can use with push/pull/status commands."""
2
2
3
3
import logging
4
- from typing import TYPE_CHECKING , Iterable , Optional
4
+ from typing import TYPE_CHECKING , Iterable , Optional , Set , Tuple
5
5
6
6
from dvc .config import NoRemoteError , RemoteConfigError
7
7
from dvc .utils .objects import cached_property
8
8
from dvc_data .hashfile .db import get_index
9
+ from dvc_data .hashfile .transfer import TransferResult
9
10
10
11
if TYPE_CHECKING :
11
12
from dvc .fs import FileSystem
12
13
from dvc_data .hashfile .db import HashFileDB
13
14
from dvc_data .hashfile .hash_info import HashInfo
14
15
from dvc_data .hashfile .status import CompareStatusResult
15
- from dvc_data .hashfile .transfer import TransferResult
16
16
17
17
logger = logging .getLogger (__name__ )
18
18
@@ -50,6 +50,21 @@ def legacy_odb(self) -> "HashFileDB":
50
50
return get_odb (self .fs , path , hash_name = "md5-dos2unix" , ** self .config )
51
51
52
52
53
+ def _split_legacy_hash_infos (
54
+ hash_infos : Iterable ["HashInfo" ],
55
+ ) -> Tuple [Set ["HashInfo" ], Set ["HashInfo" ]]:
56
+ from dvc .cachemgr import LEGACY_HASH_NAMES
57
+
58
+ legacy = set ()
59
+ default = set ()
60
+ for hi in hash_infos :
61
+ if hi .name in LEGACY_HASH_NAMES :
62
+ legacy .add (hi )
63
+ else :
64
+ default .add (hi )
65
+ return legacy , default
66
+
67
+
53
68
class DataCloud :
54
69
"""Class that manages dvc remotes.
55
70
@@ -167,14 +182,40 @@ def push(
167
182
By default remote from core.remote config option is used.
168
183
odb: optional ODB to push to. Overrides remote.
169
184
"""
170
- odb = odb or self .get_remote_odb (remote , "push" )
185
+ if odb is not None :
186
+ return self ._push (objs , jobs = jobs , odb = odb )
187
+ legacy_objs , default_objs = _split_legacy_hash_infos (objs )
188
+ result = TransferResult (set (), set ())
189
+ if legacy_objs :
190
+ odb = self .get_remote_odb (remote , "push" , hash_name = "md5-dos2unix" )
191
+ t , f = self ._push (legacy_objs , jobs = jobs , odb = odb )
192
+ result .transferred .update (t )
193
+ result .failed .update (f )
194
+ if default_objs :
195
+ odb = self .get_remote_odb (remote , "push" )
196
+ t , f = self ._push (default_objs , jobs = jobs , odb = odb )
197
+ result .transferred .update (t )
198
+ result .failed .update (f )
199
+ return result
200
+
201
+ def _push (
202
+ self ,
203
+ objs : Iterable ["HashInfo" ],
204
+ * ,
205
+ jobs : Optional [int ] = None ,
206
+ odb : "HashFileDB" ,
207
+ ) -> "TransferResult" :
208
+ if odb .hash_name == "md5-dos2unix" :
209
+ cache = self .repo .cache .legacy
210
+ else :
211
+ cache = self .repo .cache .local
171
212
return self .transfer (
172
- self . repo . cache . local ,
213
+ cache ,
173
214
odb ,
174
215
objs ,
175
216
jobs = jobs ,
176
217
dest_index = get_index (odb ),
177
- cache_odb = self . repo . cache . local ,
218
+ cache_odb = cache ,
178
219
validate_status = self ._log_missing ,
179
220
)
180
221
@@ -194,14 +235,41 @@ def pull(
194
235
By default remote from core.remote config option is used.
195
236
odb: optional ODB to pull from. Overrides remote.
196
237
"""
197
- odb = odb or self .get_remote_odb (remote , "pull" )
238
+ if odb is not None :
239
+ return self ._pull (objs , jobs = jobs , odb = odb )
240
+ legacy_objs , default_objs = _split_legacy_hash_infos (objs )
241
+ result = TransferResult (set (), set ())
242
+ if legacy_objs :
243
+ odb = self .get_remote_odb (remote , "pull" , hash_name = "md5-dos2unix" )
244
+ assert odb .hash_name == "md5-dos2unix"
245
+ t , f = self ._pull (legacy_objs , jobs = jobs , odb = odb )
246
+ result .transferred .update (t )
247
+ result .failed .update (f )
248
+ if default_objs :
249
+ odb = self .get_remote_odb (remote , "pull" )
250
+ t , f = self ._pull (default_objs , jobs = jobs , odb = odb )
251
+ result .transferred .update (t )
252
+ result .failed .update (f )
253
+ return result
254
+
255
+ def _pull (
256
+ self ,
257
+ objs : Iterable ["HashInfo" ],
258
+ * ,
259
+ jobs : Optional [int ] = None ,
260
+ odb : "HashFileDB" ,
261
+ ) -> "TransferResult" :
262
+ if odb .hash_name == "md5-dos2unix" :
263
+ cache = self .repo .cache .legacy
264
+ else :
265
+ cache = self .repo .cache .local
198
266
return self .transfer (
199
267
odb ,
200
- self . repo . cache . local ,
268
+ cache ,
201
269
objs ,
202
270
jobs = jobs ,
203
271
src_index = get_index (odb ),
204
- cache_odb = self . repo . cache . local ,
272
+ cache_odb = cache ,
205
273
verify = odb .verify ,
206
274
validate_status = self ._log_missing ,
207
275
)
@@ -223,17 +291,49 @@ def status(
223
291
is used.
224
292
odb: optional ODB to check status from. Overrides remote.
225
293
"""
294
+ from dvc_data .hashfile .status import CompareStatusResult
295
+
296
+ if odb is not None :
297
+ return self ._status (objs , jobs = jobs , odb = odb )
298
+ result = CompareStatusResult (set (), set (), set (), set ())
299
+ legacy_objs , default_objs = _split_legacy_hash_infos (objs )
300
+ if legacy_objs :
301
+ odb = self .get_remote_odb (remote , "status" , hash_name = "md5-dos2unix" )
302
+ assert odb .hash_name == "md5-dos2unix"
303
+ o , m , n , d = self ._status (legacy_objs , jobs = jobs , odb = odb )
304
+ result .ok .update (o )
305
+ result .missing .update (m )
306
+ result .new .update (n )
307
+ result .deleted .update (d )
308
+ if default_objs :
309
+ odb = self .get_remote_odb (remote , "status" )
310
+ o , m , n , d = self ._status (default_objs , jobs = jobs , odb = odb )
311
+ result .ok .update (o )
312
+ result .missing .update (m )
313
+ result .new .update (n )
314
+ result .deleted .update (d )
315
+ return result
316
+
317
+ def _status (
318
+ self ,
319
+ objs : Iterable ["HashInfo" ],
320
+ * ,
321
+ jobs : Optional [int ] = None ,
322
+ odb : "HashFileDB" ,
323
+ ):
226
324
from dvc_data .hashfile .status import compare_status
227
325
228
- if not odb :
229
- odb = self .get_remote_odb (remote , "status" )
326
+ if odb .hash_name == "md5-dos2unix" :
327
+ cache = self .repo .cache .legacy
328
+ else :
329
+ cache = self .repo .cache .local
230
330
return compare_status (
231
- self . repo . cache . local ,
331
+ cache ,
232
332
odb ,
233
333
objs ,
234
334
jobs = jobs ,
235
335
dest_index = get_index (odb ),
236
- cache_odb = self . repo . cache . local ,
336
+ cache_odb = cache ,
237
337
)
238
338
239
339
def get_url_for (self , remote , checksum ):
0 commit comments