Skip to content

Commit 4ccbc95

Browse files
committed
gdrive: add open
Fixes iterative#3408 Related iterative#2865 Fixes iterative#3897
1 parent 516d82e commit 4ccbc95

File tree

4 files changed

+68
-45
lines changed

4 files changed

+68
-45
lines changed

dvc/remote/gdrive.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,11 @@
1+
import io
12
import logging
23
import os
34
import posixpath
45
import re
56
import threading
67
from collections import defaultdict
8+
from contextlib import contextmanager
79
from urllib.parse import urlparse
810

911
from funcy import cached_property, retry, wrap_prop, wrap_with
@@ -15,6 +17,7 @@
1517
from dvc.remote.base import BaseRemote
1618
from dvc.scheme import Schemes
1719
from dvc.utils import format_link, tmp_fname
20+
from dvc.utils.stream import IterStream
1821

1922
logger = logging.getLogger(__name__)
2023
FOLDER_MIME_TYPE = "application/vnd.google-apps.folder"
@@ -393,6 +396,23 @@ def _gdrive_download_file(
393396
) as pbar:
394397
gdrive_file.GetContentFile(to_file, callback=pbar.update_to)
395398

399+
@contextmanager
400+
@_gdrive_retry
401+
def open(self, path_info, mode="r", encoding=None):
402+
assert mode in {"r", "rt", "rb"}
403+
404+
item_id = self._get_item_id(path_info)
405+
param = {"id": item_id}
406+
# it does not create a file on the remote
407+
gdrive_file = self._drive.CreateFile(param)
408+
fd = gdrive_file.GetContentIOBuffer()
409+
stream = IterStream(iter(fd))
410+
411+
if mode != "rb":
412+
stream = io.TextIOWrapper(stream, encoding=encoding)
413+
414+
yield stream
415+
396416
@_gdrive_retry
397417
def _gdrive_delete_file(self, item_id):
398418
from pydrive2.files import ApiRequestError

dvc/utils/http.py

Lines changed: 2 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
import io
22
from contextlib import contextmanager
33

4+
from dvc.utils.stream import IterStream
5+
46

57
@contextmanager
68
def open_url(url, mode="r", encoding=None):
@@ -61,47 +63,3 @@ def gen(response):
6163
finally:
6264
# Ensure connection is closed
6365
it.close()
64-
65-
66-
class IterStream(io.RawIOBase):
67-
"""Wraps an iterator yielding bytes as a file object"""
68-
69-
def __init__(self, iterator):
70-
self.iterator = iterator
71-
self.leftover = None
72-
73-
def readable(self):
74-
return True
75-
76-
# Python 3 requires only .readinto() method, it still uses other ones
77-
# under some circumstances and falls back if those are absent. Since
78-
# iterator already constructs byte strings for us, .readinto() is not the
79-
# most optimal, so we provide .read1() too.
80-
81-
def readinto(self, b):
82-
try:
83-
n = len(b) # We're supposed to return at most this much
84-
chunk = self.leftover or next(self.iterator)
85-
output, self.leftover = chunk[:n], chunk[n:]
86-
87-
n_out = len(output)
88-
b[:n_out] = output
89-
return n_out
90-
except StopIteration:
91-
return 0 # indicate EOF
92-
93-
readinto1 = readinto
94-
95-
def read1(self, n=-1):
96-
try:
97-
chunk = self.leftover or next(self.iterator)
98-
except StopIteration:
99-
return b""
100-
101-
# Return an arbitrary number or bytes
102-
if n <= 0:
103-
self.leftover = None
104-
return chunk
105-
106-
output, self.leftover = chunk[:n], chunk[n:]
107-
return output

dvc/utils/stream.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
import io
2+
3+
4+
class IterStream(io.RawIOBase):
5+
"""Wraps an iterator yielding bytes as a file object"""
6+
7+
def __init__(self, iterator):
8+
self.iterator = iterator
9+
self.leftover = None
10+
11+
def readable(self):
12+
return True
13+
14+
# Python 3 requires only .readinto() method, it still uses other ones
15+
# under some circumstances and falls back if those are absent. Since
16+
# iterator already constructs byte strings for us, .readinto() is not the
17+
# most optimal, so we provide .read1() too.
18+
19+
def readinto(self, b):
20+
try:
21+
n = len(b) # We're supposed to return at most this much
22+
chunk = self.leftover or next(self.iterator)
23+
output, self.leftover = chunk[:n], chunk[n:]
24+
25+
n_out = len(output)
26+
b[:n_out] = output
27+
return n_out
28+
except StopIteration:
29+
return 0 # indicate EOF
30+
31+
readinto1 = readinto
32+
33+
def read1(self, n=-1):
34+
try:
35+
chunk = self.leftover or next(self.iterator)
36+
except StopIteration:
37+
return b""
38+
39+
# Return an arbitrary number or bytes
40+
if n <= 0:
41+
self.leftover = None
42+
return chunk
43+
44+
output, self.leftover = chunk[:n], chunk[n:]
45+
return output

setup.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def run(self):
8787
# Extra dependencies for remote integrations
8888

8989
gs = ["google-cloud-storage==1.19.0"]
90-
gdrive = ["pydrive2>=1.4.13"]
90+
gdrive = ["pydrive2>=1.4.15"]
9191
s3 = ["boto3>=1.9.201"]
9292
azure = ["azure-storage-blob==2.1.0"]
9393
oss = ["oss2==2.6.1"]

0 commit comments

Comments
 (0)