19
19
from dvc .path_info import PathInfo
20
20
from dvc .repo import Repo
21
21
from dvc .repo .tree import RepoTree
22
+ from dvc .scm .base import CloneError
22
23
from dvc .scm .git import Git
23
24
from dvc .tree .local import LocalTree
24
25
from dvc .utils .fs import remove
@@ -31,7 +32,10 @@ def external_repo(url, rev=None, for_write=False):
31
32
logger .debug ("Creating external repo %s@%s" , url , rev )
32
33
path = _cached_clone (url , rev , for_write = for_write )
33
34
if not rev :
34
- rev = "HEAD"
35
+ # Local HEAD points to the tip of whatever branch we first cloned from
36
+ # (which may not be the default branch), use origin/HEAD here to get
37
+ # the tip of the default branch
38
+ rev = "refs/remotes/origin/HEAD"
35
39
try :
36
40
repo = ExternalRepo (path , url , rev , for_write = for_write )
37
41
except NotDvcRepoError :
@@ -59,7 +63,7 @@ def external_repo(url, rev=None, for_write=False):
59
63
60
64
def clean_repos ():
61
65
# Outside code should not see cache while we are removing
62
- paths = list ( CLONES .values ()) + list (CACHE_DIRS .values ())
66
+ paths = [ path for path , _ in CLONES .values ()] + list (CACHE_DIRS .values ())
63
67
CLONES .clear ()
64
68
CACHE_DIRS .clear ()
65
69
@@ -251,10 +255,10 @@ def _cached_clone(url, rev, for_write=False):
251
255
"""
252
256
# even if we have already cloned this repo, we may need to
253
257
# fetch/fast-forward to get specified rev
254
- clone_path = _clone_default_branch (url , rev )
258
+ clone_path , shallow = _clone_default_branch (url , rev , for_write = for_write )
255
259
256
260
if not for_write and (url ) in CLONES :
257
- return CLONES [url ]
261
+ return CLONES [url ][ 0 ]
258
262
259
263
# Copy to a new dir to keep the clone clean
260
264
repo_path = tempfile .mkdtemp ("dvc-erepo" )
@@ -265,36 +269,73 @@ def _cached_clone(url, rev, for_write=False):
265
269
if for_write :
266
270
_git_checkout (repo_path , rev )
267
271
else :
268
- CLONES [url ] = repo_path
272
+ CLONES [url ] = ( repo_path , shallow )
269
273
return repo_path
270
274
271
275
272
276
@wrap_with (threading .Lock ())
273
- def _clone_default_branch (url , rev ):
277
+ def _clone_default_branch (url , rev , for_write = False ):
274
278
"""Get or create a clean clone of the url.
275
279
276
280
The cloned is reactualized with git pull unless rev is a known sha.
277
281
"""
278
- clone_path = CLONES .get (url )
282
+ clone_path , shallow = CLONES .get (url , ( None , False ) )
279
283
280
284
git = None
281
285
try :
282
286
if clone_path :
283
287
git = Git (clone_path )
284
288
# Do not pull for known shas, branches and tags might move
285
289
if not Git .is_sha (rev ) or not git .has_rev (rev ):
286
- logger .debug ("erepo: git pull %s" , url )
287
- git .pull ()
290
+ if shallow :
291
+ # If we are missing a rev in a shallow clone, fallback to
292
+ # a full (unshallowed) clone. Since fetching specific rev
293
+ # SHAs is only available in certain git versions, if we
294
+ # have need to reference multiple specific revs for a
295
+ # given repo URL it is easier/safer for us to work with
296
+ # full clones in this case.
297
+ logger .debug ("erepo: unshallowing clone for '%s'" , url )
298
+ _unshallow (git )
299
+ shallow = False
300
+ CLONES [url ] = (clone_path , shallow )
301
+ else :
302
+ logger .debug ("erepo: git pull '%s'" , url )
303
+ git .pull ()
288
304
else :
289
- logger .debug ("erepo: git clone %s to a temporary dir" , url )
305
+ logger .debug ("erepo: git clone '%s' to a temporary dir" , url )
290
306
clone_path = tempfile .mkdtemp ("dvc-clone" )
291
- git = Git .clone (url , clone_path )
292
- CLONES [url ] = clone_path
307
+ if not for_write and rev and not Git .is_sha (rev ):
308
+ # If rev is a tag or branch name try shallow clone first
309
+ try :
310
+ git = Git .clone (url , clone_path , shallow_branch = rev )
311
+ shallow = True
312
+ logger .debug (
313
+ "erepo: using shallow clone for branch '%s'" , rev
314
+ )
315
+ except CloneError :
316
+ pass
317
+ if not git :
318
+ git = Git .clone (url , clone_path )
319
+ shallow = False
320
+ CLONES [url ] = (clone_path , shallow )
293
321
finally :
294
322
if git :
295
323
git .close ()
296
324
297
- return clone_path
325
+ return clone_path , shallow
326
+
327
+
328
+ def _unshallow (git ):
329
+ if git .repo .head .is_detached :
330
+ # If this is a detached head (i.e. we shallow cloned a tag) switch to
331
+ # the default branch
332
+ origin_refs = git .repo .remotes ["origin" ].refs
333
+ ref = origin_refs ["HEAD" ].reference
334
+ branch_name = ref .name .split ("/" )[- 1 ]
335
+ branch = git .repo .create_head (branch_name , ref )
336
+ branch .set_tracking_branch (ref )
337
+ branch .checkout ()
338
+ git .pull (unshallow = True )
298
339
299
340
300
341
def _git_checkout (repo_path , rev ):
0 commit comments