-
Notifications
You must be signed in to change notification settings - Fork 1.2k
gdrive: add open #3916
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
gdrive: add open #3916
Changes from all commits
0c4e859
78dc1b9
3cfdcc3
2a140c9
405684d
802577f
61c1125
a6f93c0
7285161
5aee39a
ba52fc1
a84311a
8125a65
8ae3016
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,45 @@ | ||
import io | ||
|
||
|
||
class IterStream(io.RawIOBase): | ||
"""Wraps an iterator yielding bytes as a file object""" | ||
|
||
def __init__(self, iterator): | ||
self.iterator = iterator | ||
self.leftover = None | ||
|
||
def readable(self): | ||
return True | ||
|
||
# Python 3 requires only .readinto() method, it still uses other ones | ||
# under some circumstances and falls back if those are absent. Since | ||
# iterator already constructs byte strings for us, .readinto() is not the | ||
# most optimal, so we provide .read1() too. | ||
|
||
def readinto(self, b): | ||
try: | ||
n = len(b) # We're supposed to return at most this much | ||
chunk = self.leftover or next(self.iterator) | ||
output, self.leftover = chunk[:n], chunk[n:] | ||
|
||
n_out = len(output) | ||
b[:n_out] = output | ||
return n_out | ||
except StopIteration: | ||
return 0 # indicate EOF | ||
|
||
readinto1 = readinto | ||
|
||
def read1(self, n=-1): | ||
try: | ||
chunk = self.leftover or next(self.iterator) | ||
except StopIteration: | ||
return b"" | ||
|
||
# Return an arbitrary number or bytes | ||
if n <= 0: | ||
self.leftover = None | ||
return chunk | ||
|
||
output, self.leftover = chunk[:n], chunk[n:] | ||
return output |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,9 +8,19 @@ | |
from dvc.main import main | ||
from dvc.path_info import URLInfo | ||
from dvc.utils.fs import remove | ||
from tests.remotes import GCP, HDFS, OSS, S3, SSH, Azure, Local | ||
|
||
remote_params = [S3, GCP, Azure, OSS, SSH, HDFS] | ||
from tests.remotes import ( | ||
GCP, | ||
HDFS, | ||
OSS, | ||
S3, | ||
SSH, | ||
TEST_REMOTE, | ||
Azure, | ||
GDrive, | ||
Local, | ||
) | ||
|
||
remote_params = [S3, GCP, Azure, GDrive, OSS, SSH, HDFS] | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. we have a lot of tests for functions besides open here - do we expect all of them to pass? we'll need to implement more stuff then There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shcheklein All the tests should work, there is nothing new required from GdriveRemotes. There might be some issues with Gdrive itself though (creds or something), that differentiates it from other remotes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Even it works out of the box, some things do not make much sense - e.g. what will Re credentials - should be fine (we use an env var to set them, so I would expect external repo to pick it up as well). As I mentioned though, GDrive remote requires some special setup (e.g. it does not create a directory for you). There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @shcheklein is this comment outdated since rebasing on master? Or does There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @casperdcl Could do that, sure. Will require some work to make it do that without first passing it a dvc instance. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. well the only other way I see is to rework the current pytest fixture (currently it hides the There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @casperdcl Both work. Choose the easiest one, we'll be revisiting this later anyway. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. tried a simple change along the same lines as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @casperdcl does it work locally with your creds? |
||
all_remote_params = [Local] + remote_params | ||
|
||
|
||
|
@@ -25,9 +35,48 @@ def run_dvc(*argv): | |
assert main(argv) == 0 | ||
|
||
|
||
def ensure_dir(dvc, url): | ||
if url.startswith("gdrive://"): | ||
GDrive.create_dir(dvc, url) | ||
run_dvc( | ||
"remote", | ||
"modify", | ||
TEST_REMOTE, | ||
"gdrive_service_account_email", | ||
"test", | ||
) | ||
run_dvc( | ||
"remote", | ||
"modify", | ||
TEST_REMOTE, | ||
"gdrive_service_account_p12_file_path", | ||
"test.p12", | ||
) | ||
run_dvc( | ||
"remote", | ||
"modify", | ||
TEST_REMOTE, | ||
"gdrive_use_service_account", | ||
"True", | ||
) | ||
|
||
|
||
def ensure_dir_scm(dvc, url): | ||
if url.startswith("gdrive://"): | ||
GDrive.create_dir(dvc, url) | ||
with dvc.config.edit() as conf: | ||
conf["remote"][TEST_REMOTE].update( | ||
gdrive_service_account_email="test", | ||
gdrive_service_account_p12_file_path="test.p12", | ||
gdrive_use_service_account=True, | ||
) | ||
dvc.scm.add(dvc.config.files["repo"]) | ||
dvc.scm.commit(f"modify '{TEST_REMOTE}' remote") | ||
|
||
|
||
@pytest.mark.parametrize("remote_url", remote_params, indirect=True) | ||
def test_get_url(tmp_dir, dvc, remote_url): | ||
run_dvc("remote", "add", "-d", "upstream", remote_url) | ||
run_dvc("remote", "add", "-d", TEST_REMOTE, remote_url) | ||
tmp_dir.dvc_gen("foo", "foo") | ||
|
||
expected_url = URLInfo(remote_url) / "ac/bd18db4cc2f85cedef654fccc4a4d8" | ||
|
@@ -58,7 +107,8 @@ def test_get_url_requires_dvc(tmp_dir, scm): | |
|
||
@pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) | ||
def test_open(remote_url, tmp_dir, dvc): | ||
run_dvc("remote", "add", "-d", "upstream", remote_url) | ||
run_dvc("remote", "add", "-d", TEST_REMOTE, remote_url) | ||
ensure_dir(dvc, remote_url) | ||
tmp_dir.dvc_gen("foo", "foo-text") | ||
run_dvc("push") | ||
|
||
|
@@ -72,6 +122,7 @@ def test_open(remote_url, tmp_dir, dvc): | |
@pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) | ||
def test_open_external(remote_url, erepo_dir, setup_remote): | ||
setup_remote(erepo_dir.dvc, url=remote_url) | ||
ensure_dir_scm(erepo_dir.dvc, remote_url) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @efiop I've added
|
||
|
||
with erepo_dir.chdir(): | ||
erepo_dir.dvc_gen("version", "master", commit="add version") | ||
|
@@ -95,7 +146,8 @@ def test_open_external(remote_url, erepo_dir, setup_remote): | |
|
||
@pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) | ||
def test_open_granular(remote_url, tmp_dir, dvc): | ||
run_dvc("remote", "add", "-d", "upstream", remote_url) | ||
run_dvc("remote", "add", "-d", TEST_REMOTE, remote_url) | ||
ensure_dir(dvc, remote_url) | ||
tmp_dir.dvc_gen({"dir": {"foo": "foo-text"}}) | ||
run_dvc("push") | ||
|
||
|
@@ -109,7 +161,8 @@ def test_open_granular(remote_url, tmp_dir, dvc): | |
@pytest.mark.parametrize("remote_url", all_remote_params, indirect=True) | ||
def test_missing(remote_url, tmp_dir, dvc): | ||
tmp_dir.dvc_gen("foo", "foo") | ||
run_dvc("remote", "add", "-d", "upstream", remote_url) | ||
run_dvc("remote", "add", "-d", TEST_REMOTE, remote_url) | ||
ensure_dir(dvc, remote_url) | ||
|
||
# Remove cache to make foo missing | ||
remove(dvc.cache.local.cache_dir) | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
just to double check - do we properly pass
close
down to the PyDrive?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
pydrive2 doesn't expose any
close()
methods anywhere. I think the underlyinghttplib2
closes connections before raising errors; so no special handling needed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
it should close if just
return
from the block ... we should check the logic carefully hereThere was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
upstream issue: iterative/PyDrive2#44