From fd5bfb5e79edbc22cf509a94076283d02bbf9dea Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 29 Mar 2018 15:05:07 -0400 Subject: [PATCH 01/83] basic implementation working --- zarr/storage.py | 154 +++++++++++++++++++++++++++++++++++++ zarr/tests/test_core.py | 27 ++++++- zarr/tests/test_storage.py | 32 +++++++- 3 files changed, 210 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 39a497d08b..a73e0ed032 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1883,3 +1883,157 @@ def __delitem__(self, key): with self._mutex: self._invalidate_keys() self._invalidate_value(key) + + +# utility functions for object stores + + +def _strip_prefix_from_path(path, prefix): + # normalized things will not have any leading or trailing slashes + path_norm = normalize_storage_path(path) + prefix_norm = normalize_storage_path(prefix) + if path_norm.startswith(prefix_norm): + return path_norm[(len(prefix_norm)+1):] + else: + return path + + +def _append_path_to_prefix(path, prefix): + return '/'.join([normalize_storage_path(prefix), + normalize_storage_path(path)]) + + +def atexit_rmgcspath(bucket, path): + from google.cloud import storage + client = storage.Client() + bucket = client.get_bucket(bucket) + bucket.delete_blobs(bucket.list_blobs(prefix=path)) + print('deleted blobs') + + +class GCSStore(MutableMapping): + + def __init__(self, bucket_name, prefix=None, client_kwargs={}): + + self.bucket_name = bucket_name + self.prefix = normalize_storage_path(prefix) + self.client_kwargs = {} + self.initialize_bucket() + + def initialize_bucket(self): + from google.cloud import storage + # run `gcloud auth application-default login` from shell + client = storage.Client(**self.client_kwargs) + self.bucket = client.get_bucket(self.bucket_name) + # need to properly handle excpetions + import google.api_core.exceptions as exceptions + self.exceptions = exceptions + + # needed for pickling + def __getstate__(self): + state = self.__dict__.copy() + del state['bucket'] + del state['exceptions'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.initialize_bucket() + + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def __getitem__(self, key): + blob_name = _append_path_to_prefix(key, self.prefix) + blob = self.bucket.get_blob(blob_name) + if blob: + return blob.download_as_string() + else: + raise KeyError('Blob %s not found' % blob_name) + + def __setitem__(self, key, value): + blob_name = _append_path_to_prefix(key, self.prefix) + blob = self.bucket.blob(blob_name) + blob.upload_from_string(value) + + def __delitem__(self, key): + blob_name = _append_path_to_prefix(key, self.prefix) + try: + self.bucket.delete_blob(blob_name) + except self.exceptions.NotFound as er: + raise KeyError(er.message) + + def __contains__(self, key): + blob_name = _append_path_to_prefix(key, self.prefix) + return self.bucket.get_blob(blob_name) is not None + + def __eq__(self, other): + return ( + isinstance(other, GCSMap) and + self.bucket_name == other.bucket_name and + self.prefix == other.prefix + ) + + def __iter__(self): + blobs = self.bucket.list_blobs(prefix=self.prefix) + for blob in blobs: + yield _strip_prefix_from_path(blob.name, self.prefix) + + def __len__(self): + iterator = self.bucket.list_blobs(prefix=self.prefix) + return len(list(iterator)) + + def list_gcs_directory_blobs(self, path): + """Return list of all blobs *directly* under a gcs prefix.""" + prefix = normalize_storage_path(path) + '/' + return [blob.name for blob in + self.bucket.list_blobs(prefix=prefix, delimiter='/')] + + # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920#issuecomment-326125992 + def list_gcs_subdirectories(self, path): + """Return set of all "subdirectories" from a gcs prefix.""" + prefix = normalize_storage_path(path) + '/' + iterator = self.bucket.list_blobs(prefix=prefix, delimiter='/') + prefixes = set() + for page in iterator.pages: + prefixes.update(page.prefixes) + # need to strip trailing slash to be consistent with os.listdir + return [path[:-1] for path in prefixes] + + def list_gcs_directory(self, prefix, strip_prefix=True): + """Return a list of all blobs and subdirectories from a gcs prefix.""" + items = set() + items.update(self.list_gcs_directory_blobs(prefix)) + items.update(self.list_gcs_subdirectories(prefix)) + items = list(items) + if strip_prefix: + items = [_strip_prefix_from_path(path, prefix) for path in items] + return items + + def dir_path(self, path=None): + dir_path = _append_path_to_prefix(path, self.prefix) + return dir_path + + def listdir(self, path=None): + dir_path = self.dir_path(path) + return sorted(self.list_gcs_directory(dir_path, strip_prefix=True)) + + def rename(self, src_path, dst_path): + raise NotImplementedErrror + + def rmdir(self, path=None): + dir_path = self.dir_path(path) + self.bucket.delete_blobs(self.bucket.list_blobs(prefix=dir_path)) + + def getsize(self, path=None): + dir_path = self.dir_path(path) + size = 0 + for blob in self.bucket.list_blobs(prefix=dir_path): + size += blob.size + return size + + def clear(self): + self.rmdir() diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 390f888287..6a02fd3821 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -7,7 +7,7 @@ import pickle import os import warnings - +import uuid import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal @@ -16,7 +16,7 @@ from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob, - LRUStoreCache) + LRUStoreCache, GCSStore, atexit_rmgcspath) from zarr.core import Array from zarr.errors import PermissionError from zarr.compat import PY2, text_type, binary_type @@ -1698,3 +1698,26 @@ def create_array(read_only=False, **kwargs): init_array(store, **kwargs) return Array(store, read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) + +try: + from google.cloud import storage as gcstorage + # cleanup function + +except ImportError: # pragma: no cover + gcstorage = None + + +@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') +class TestGCSArray(TestArray): + + def create_array(self, read_only=False, **kwargs): + bucket = 'zarr-test' + prefix = uuid.uuid4() + atexit.register(atexit_rmgcspath, bucket, prefix) + store = GCSStore(bucket, prefix) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + kwargs.setdefault('compressor', Zlib(1)) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index f68f8a6ed6..afa2d333b5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -8,6 +8,7 @@ import array import shutil import os +import uuid import numpy as np @@ -19,7 +20,8 @@ DirectoryStore, ZipStore, init_group, group_meta_key, getsize, migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor, DBMStore, - LMDBStore, atexit_rmglob, LRUStoreCache) + LMDBStore, atexit_rmglob, LRUStoreCache, GCSStore, + atexit_rmgcspath) from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT, decode_group_metadata, encode_group_metadata) from zarr.compat import PY2 @@ -1235,3 +1237,31 @@ def test_format_compatibility(): else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config() + + +try: + from google.cloud import storage as gcstorage + # cleanup function + +except ImportError: # pragma: no cover + gcstorage = None + + +@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') +class TestGCSStore(StoreTests, unittest.TestCase): + + def create_store(self): + # would need to be replaced with a dedicated test bucket + bucket = 'zarr-test' + prefix = uuid.uuid4() + + print('registering') + atexit.register(atexit_rmgcspath, bucket, prefix) + store = GCSStore(bucket, prefix) + return store + + def test_context_manager(self): + with self.create_store() as store: + store['foo'] = b'bar' + store['baz'] = b'qux' + assert 2 == len(store) From a26752c018bc7ab97d3906c7b08f806f109cd83b Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Thu, 29 Mar 2018 16:20:25 -0400 Subject: [PATCH 02/83] docs and cleanup --- zarr/storage.py | 134 ++++++++++++++++++++++--------------- zarr/tests/test_core.py | 3 +- zarr/tests/test_storage.py | 2 - 3 files changed, 80 insertions(+), 59 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index a73e0ed032..8f0c7e8a21 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1908,10 +1908,29 @@ def atexit_rmgcspath(bucket, path): client = storage.Client() bucket = client.get_bucket(bucket) bucket.delete_blobs(bucket.list_blobs(prefix=path)) - print('deleted blobs') class GCSStore(MutableMapping): + """Storage class using a Google Cloud Storage (GCS) + + Parameters + ---------- + bucket_name : string + The name of the GCS bucket + prefix : string, optional + The prefix within the bucket (i.e. subdirectory) + client_kwargs : dict, optional + Extra options passed to ``google.cloud.storage.Client`` when connecting + to GCS + + Notes + ----- + In order to use this store, you must install the Google Cloud Storage + `Python Client Library `_. + You must also provide valid application credentials, either by setting the + ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable or via + `default credentials `_. + """ def __init__(self, bucket_name, prefix=None, client_kwargs={}): @@ -1946,45 +1965,8 @@ def __enter__(self): def __exit__(self, *args): pass - def __getitem__(self, key): - blob_name = _append_path_to_prefix(key, self.prefix) - blob = self.bucket.get_blob(blob_name) - if blob: - return blob.download_as_string() - else: - raise KeyError('Blob %s not found' % blob_name) - - def __setitem__(self, key, value): - blob_name = _append_path_to_prefix(key, self.prefix) - blob = self.bucket.blob(blob_name) - blob.upload_from_string(value) - - def __delitem__(self, key): - blob_name = _append_path_to_prefix(key, self.prefix) - try: - self.bucket.delete_blob(blob_name) - except self.exceptions.NotFound as er: - raise KeyError(er.message) - - def __contains__(self, key): - blob_name = _append_path_to_prefix(key, self.prefix) - return self.bucket.get_blob(blob_name) is not None - - def __eq__(self, other): - return ( - isinstance(other, GCSMap) and - self.bucket_name == other.bucket_name and - self.prefix == other.prefix - ) - - def __iter__(self): - blobs = self.bucket.list_blobs(prefix=self.prefix) - for blob in blobs: - yield _strip_prefix_from_path(blob.name, self.prefix) - - def __len__(self): - iterator = self.bucket.list_blobs(prefix=self.prefix) - return len(list(iterator)) + def full_path(self, path=None): + return _append_path_to_prefix(path, self.prefix) def list_gcs_directory_blobs(self, path): """Return list of all blobs *directly* under a gcs prefix.""" @@ -1992,7 +1974,7 @@ def list_gcs_directory_blobs(self, path): return [blob.name for blob in self.bucket.list_blobs(prefix=prefix, delimiter='/')] - # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920#issuecomment-326125992 + # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920 def list_gcs_subdirectories(self, path): """Return set of all "subdirectories" from a gcs prefix.""" prefix = normalize_storage_path(path) + '/' @@ -2013,27 +1995,69 @@ def list_gcs_directory(self, prefix, strip_prefix=True): items = [_strip_prefix_from_path(path, prefix) for path in items] return items - def dir_path(self, path=None): - dir_path = _append_path_to_prefix(path, self.prefix) - return dir_path - def listdir(self, path=None): - dir_path = self.dir_path(path) + dir_path = self.full_path(path) return sorted(self.list_gcs_directory(dir_path, strip_prefix=True)) - def rename(self, src_path, dst_path): - raise NotImplementedErrror - def rmdir(self, path=None): - dir_path = self.dir_path(path) + # make sure it's a directory + dir_path = normalize_storage_path(self.full_path(path)) + '/' self.bucket.delete_blobs(self.bucket.list_blobs(prefix=dir_path)) def getsize(self, path=None): - dir_path = self.dir_path(path) - size = 0 - for blob in self.bucket.list_blobs(prefix=dir_path): - size += blob.size - return size + # this function should *not* be recursive + # a lot of slash trickery is required to make this work right + full_path = self.full_path(path) + blob = self.bucket.get_blob(full_path) + if blob is not None: + return blob.size + else: + dir_path = normalize_storage_path(full_path) + '/' + blobs = self.bucket.list_blobs(prefix=dir_path, delimiter='/') + size = 0 + for blob in blobs: + size += blob.size + return size def clear(self): self.rmdir() + + def __getitem__(self, key): + blob_name = self.full_path(key) + blob = self.bucket.get_blob(blob_name) + if blob: + return blob.download_as_string() + else: + raise KeyError('Blob %s not found' % blob_name) + + def __setitem__(self, key, value): + blob_name = self.full_path(key) + blob = self.bucket.blob(blob_name) + blob.upload_from_string(value) + + def __delitem__(self, key): + blob_name = self.full_path(key) + try: + self.bucket.delete_blob(blob_name) + except self.exceptions.NotFound as er: + raise KeyError(er.message) + + def __contains__(self, key): + blob_name = self.full_path(key) + return self.bucket.get_blob(blob_name) is not None + + def __eq__(self, other): + return ( + isinstance(other, GCSStore) and + self.bucket_name == other.bucket_name and + self.prefix == other.prefix + ) + + def __iter__(self): + blobs = self.bucket.list_blobs(prefix=self.prefix) + for blob in blobs: + yield _strip_prefix_from_path(blob.name, self.prefix) + + def __len__(self): + iterator = self.bucket.list_blobs(prefix=self.prefix) + return len(list(iterator)) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 6a02fd3821..23d64d1f2c 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1699,10 +1699,9 @@ def create_array(read_only=False, **kwargs): return Array(store, read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) + try: from google.cloud import storage as gcstorage - # cleanup function - except ImportError: # pragma: no cover gcstorage = None diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index afa2d333b5..ffc19822a5 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1254,8 +1254,6 @@ def create_store(self): # would need to be replaced with a dedicated test bucket bucket = 'zarr-test' prefix = uuid.uuid4() - - print('registering') atexit.register(atexit_rmgcspath, bucket, prefix) store = GCSStore(bucket, prefix) return store From 23dd8f6f3effccd2903e871adbfcbedf7e1289c7 Mon Sep 17 00:00:00 2001 From: Ryan Abernathey Date: Fri, 30 Mar 2018 10:14:43 -0400 Subject: [PATCH 03/83] fixed client_kwargs bug --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8f0c7e8a21..ac3e64558e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1936,7 +1936,7 @@ def __init__(self, bucket_name, prefix=None, client_kwargs={}): self.bucket_name = bucket_name self.prefix = normalize_storage_path(prefix) - self.client_kwargs = {} + self.client_kwargs = client_kwargs self.initialize_bucket() def initialize_bucket(self): From dec75dd6680b04f5f51c35b38685990bdf9ae430 Mon Sep 17 00:00:00 2001 From: Friedrich Knuth Date: Sun, 15 Apr 2018 13:01:43 -0400 Subject: [PATCH 04/83] Add ABSStore mutable mapping --- zarr/storage.py | 127 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index ac3e64558e..bbfde16b1e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2061,3 +2061,130 @@ def __iter__(self): def __len__(self): iterator = self.bucket.list_blobs(prefix=self.prefix) return len(list(iterator)) + +class ABSStore(MutableMapping): + +#import logging +#logger = logging.getLogger(__name__) + + def __init__(self, container_name, prefix, user, token): + + self.user = user + self.token = token + self.container_name = container_name + self.prefix = normalize_storage_path(prefix) + self.initialize_container() + + def initialize_container(self): + from azure import storage + self.client = storage.blob.BlockBlobService(self.user, self.token) + # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). + # client needs to be used in functions and container name needs to be passed on. + # could get rid of this function and consolidate. + + # needed for pickling + def __getstate__(self): + state = self.__dict__.copy() + del state['container'] + return state + + def __setstate__(self, state): + self.__dict__.update(state) + self.initialize_container() + + + def __getitem__(self, key): + #logger.debug('__getitem__(%s)' % key) # not sure what logger returns. need to test live and adapt. + blob_name = '/'.join([self.prefix, key]) + blob = self.client.get_blob_to_text(self.container_name, blob_name) + if blob: + return blob + else: + raise KeyError('Blob %s not found' % blob_name) + + def __setitem__(self, key, value): + raise NotImplementedError + + def __delitem__(self, key): + raise NotImplementedError + + def __contains__(self, key): + #logger.debug('__contains__(%s)' % key) + blob_name = '/'.join([self.container_name, key]) + return self.client.get_blob_to_text(blob_name) is not None + + def __eq__(self, other): + return ( + isinstance(other, ABSMap) and + self.container_name == other.container_name and + self.prefix == other.prefix + ) + + def keys(self): + raise NotImplementedError + + def __iter__(self): + raise NotImplementedError + + def __len__(self): + raise NotImplementedError + + def __contains__(self, key): + #logger.debug('__contains__(%s)' % key) + blob_name = '/'.join([self.prefix, key]) + return self.client.get_blob_to_text(blob_name) is not None + + def list_abs_directory_blobs(self, prefix): + """Return list of all blobs under a abs prefix.""" + return [blob.name for blob in + self.client.list_blobs(prefix=prefix)] + + def list_abs_subdirectories(self, prefix): + """Return set of all "subdirectories" from a abs prefix.""" + iterator = self.client.list_blobs(prefix=prefix, delimiter='/') + + # here comes a hack. azure list_blobs() doesn't seems to have iterator.pages + + return set([blob.name.rsplit('/',1)[:-1][0] for blob in iterator if '/' in blob.name]) + + def list_abs_directory(self, prefix, strip_prefix=True): + """Return a list of all blobs and subdirectories from a gcs prefix.""" + items = set() + items.update(self.list_abs_directory_blobs(prefix)) + items.update(self.list_abs_subdirectories(prefix)) + items = list(items) + if strip_prefix: + items = [_strip_prefix_from_path(path, prefix) for path in items] + return items + + def dir_path(self, path=None): + store_path = normalize_storage_path(path) + # prefix is normalized to not have a trailing slash + dir_path = self.prefix + if store_path: + dir_path = '/'.join(dir_path, store_path) + else: + dir_path += '/' + return dir_path + + def listdir(self, path=None): + #logger.debug('listdir(%s)' % path) + dir_path = self.dir_path(path) + return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) + + def rename(self, src_path, dst_path): + raise NotImplementedErrror + + def rmdir(self, path=None): + raise NotImplementedErrror + + def getsize(self, path=None): + #logger.debug('getsize %s' % path) + dir_path = self.dir_path(path) + size = 0 + for blob in self.client.list_blobs(prefix=dir_path): + size += blob.properties.content_length # from https://stackoverflow.com/questions/47694592/get-container-sizes-in-azure-blob-storage-using-python + return size + + def clear(self): + raise NotImplementedError From 13c207751b28b01e46054d1b1c2dcdd8fbb5aa59 Mon Sep 17 00:00:00 2001 From: Friedrich Knuth Date: Sun, 15 Apr 2018 13:21:25 -0400 Subject: [PATCH 05/83] Fix import syntax --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index bbfde16b1e..7e6b92afbb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2076,8 +2076,8 @@ def __init__(self, container_name, prefix, user, token): self.initialize_container() def initialize_container(self): - from azure import storage - self.client = storage.blob.BlockBlobService(self.user, self.token) + from azure.storage.blob import BlockBlobService + self.client = BlockBlobService(self.user, self.token) # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). # client needs to be used in functions and container name needs to be passed on. # could get rid of this function and consolidate. From 86603fae80650a5596e3751bcbb3b380937613cb Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 09:52:41 -0400 Subject: [PATCH 06/83] Get open_zarr() working --- zarr/storage.py | 38 ++++++++++++-------------------------- 1 file changed, 12 insertions(+), 26 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 7e6b92afbb..7b1cb222e7 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2064,9 +2064,6 @@ def __len__(self): class ABSStore(MutableMapping): -#import logging -#logger = logging.getLogger(__name__) - def __init__(self, container_name, prefix, user, token): self.user = user @@ -2076,6 +2073,7 @@ def __init__(self, container_name, prefix, user, token): self.initialize_container() def initialize_container(self): + from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.user, self.token) # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). @@ -2092,13 +2090,11 @@ def __setstate__(self, state): self.__dict__.update(state) self.initialize_container() - def __getitem__(self, key): - #logger.debug('__getitem__(%s)' % key) # not sure what logger returns. need to test live and adapt. blob_name = '/'.join([self.prefix, key]) - blob = self.client.get_blob_to_text(self.container_name, blob_name) + blob = self.client.get_blob_to_bytes(self.container_name, blob_name) if blob: - return blob + return blob.content else: raise KeyError('Blob %s not found' % blob_name) @@ -2108,14 +2104,9 @@ def __setitem__(self, key, value): def __delitem__(self, key): raise NotImplementedError - def __contains__(self, key): - #logger.debug('__contains__(%s)' % key) - blob_name = '/'.join([self.container_name, key]) - return self.client.get_blob_to_text(blob_name) is not None - def __eq__(self, other): return ( - isinstance(other, ABSMap) and + isinstance(other, ABSStore) and self.container_name == other.container_name and self.prefix == other.prefix ) @@ -2130,25 +2121,22 @@ def __len__(self): raise NotImplementedError def __contains__(self, key): - #logger.debug('__contains__(%s)' % key) blob_name = '/'.join([self.prefix, key]) - return self.client.get_blob_to_text(blob_name) is not None + try: + return self.client.get_blob_to_text(self.container_name, blob_name) + except: + return None def list_abs_directory_blobs(self, prefix): - """Return list of all blobs under a abs prefix.""" - return [blob.name for blob in - self.client.list_blobs(prefix=prefix)] + """Return list of all blobs under an abs prefix.""" + return [blob.name for blob in self.client.list_blobs(self.container_name)] def list_abs_subdirectories(self, prefix): """Return set of all "subdirectories" from a abs prefix.""" - iterator = self.client.list_blobs(prefix=prefix, delimiter='/') - - # here comes a hack. azure list_blobs() doesn't seems to have iterator.pages - - return set([blob.name.rsplit('/',1)[:-1][0] for blob in iterator if '/' in blob.name]) + return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) def list_abs_directory(self, prefix, strip_prefix=True): - """Return a list of all blobs and subdirectories from a gcs prefix.""" + """Return a list of all blobs and subdirectories from an abs prefix.""" items = set() items.update(self.list_abs_directory_blobs(prefix)) items.update(self.list_abs_subdirectories(prefix)) @@ -2168,7 +2156,6 @@ def dir_path(self, path=None): return dir_path def listdir(self, path=None): - #logger.debug('listdir(%s)' % path) dir_path = self.dir_path(path) return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) @@ -2179,7 +2166,6 @@ def rmdir(self, path=None): raise NotImplementedErrror def getsize(self, path=None): - #logger.debug('getsize %s' % path) dir_path = self.dir_path(path) size = 0 for blob in self.client.list_blobs(prefix=dir_path): From 4b999baa4e140871de9c18907e5763062cbba7d8 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 13:35:48 -0400 Subject: [PATCH 07/83] Change account variable names --- zarr/storage.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 7b1cb222e7..e95b1629bd 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2064,10 +2064,9 @@ def __len__(self): class ABSStore(MutableMapping): - def __init__(self, container_name, prefix, user, token): - - self.user = user - self.token = token + def __init__(self, container_name, prefix, account_name, account_key): + self.account_name = account_name + self.account_key = account_key self.container_name = container_name self.prefix = normalize_storage_path(prefix) self.initialize_container() @@ -2075,10 +2074,7 @@ def __init__(self, container_name, prefix, user, token): def initialize_container(self): from azure.storage.blob import BlockBlobService - self.client = BlockBlobService(self.user, self.token) - # azure doesn't seem to be a way to initialize a container as google goes with get_bucket(). - # client needs to be used in functions and container name needs to be passed on. - # could get rid of this function and consolidate. + self.client = BlockBlobService(self.account_name, self.account_key) # needed for pickling def __getstate__(self): From 677ec1c726cd486bde3cabe1293e6b740c9266f1 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 15:11:30 -0400 Subject: [PATCH 08/83] Fix client.exists() logging issue --- zarr/storage.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index e95b1629bd..9867111733 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2072,9 +2072,11 @@ def __init__(self, container_name, prefix, account_name, account_key): self.initialize_container() def initialize_container(self): - from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) + # change logging level to deal with https://github.com/Azure/azure-storage-python/issues/437 + import logging + logging.basicConfig(level=logging.CRITICAL) # needed for pickling def __getstate__(self): @@ -2117,11 +2119,12 @@ def __len__(self): raise NotImplementedError def __contains__(self, key): + # this is where the logging error occurs. not sure why we are looking for a .zarray below every blob blob_name = '/'.join([self.prefix, key]) - try: - return self.client.get_blob_to_text(self.container_name, blob_name) - except: - return None + if self.client.exists(self.container_name, blob_name): + return True + else: + return False def list_abs_directory_blobs(self, prefix): """Return list of all blobs under an abs prefix.""" From d9be9ba9a334d50d8321580abbd6f5d14360b78d Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 15:38:32 -0400 Subject: [PATCH 09/83] Minor comment changes --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 9867111733..15a3412e0f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2127,11 +2127,11 @@ def __contains__(self, key): return False def list_abs_directory_blobs(self, prefix): - """Return list of all blobs under an abs prefix.""" + """Return list of all blobs from an abs prefix.""" return [blob.name for blob in self.client.list_blobs(self.container_name)] def list_abs_subdirectories(self, prefix): - """Return set of all "subdirectories" from a abs prefix.""" + """Return list of all "subdirectories" from an abs prefix.""" return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) def list_abs_directory(self, prefix, strip_prefix=True): From 13b1ee8e4a666d14587748bef6e4b7d46a3fb27d Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 16 Apr 2018 15:59:01 -0400 Subject: [PATCH 10/83] Get to_zarr() working --- zarr/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 15a3412e0f..f36ec02139 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2097,7 +2097,8 @@ def __getitem__(self, key): raise KeyError('Blob %s not found' % blob_name) def __setitem__(self, key, value): - raise NotImplementedError + blob_name = '/'.join([self.prefix, key]) + self.client.create_blob_from_text(self.container_name, blob_name, value) def __delitem__(self, key): raise NotImplementedError From e5564c36252a3b5f475431807483a9289ef6bf00 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Tue, 17 Apr 2018 05:12:34 -0400 Subject: [PATCH 11/83] Remove state['container'] delete --- zarr/storage.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index f36ec02139..c4741eb7fb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2075,13 +2075,14 @@ def initialize_container(self): from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) # change logging level to deal with https://github.com/Azure/azure-storage-python/issues/437 + # it would be better to set up a logging filter that throws out just the + # error logged when calling exists(). import logging logging.basicConfig(level=logging.CRITICAL) # needed for pickling def __getstate__(self): state = self.__dict__.copy() - del state['container'] return state def __setstate__(self, state): From a85e5595a1648ba0479278d5e629f8758cac2f8a Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Tue, 17 Apr 2018 08:35:49 -0400 Subject: [PATCH 12/83] Implement rmdir --- zarr/storage.py | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index c4741eb7fb..f4d83bd937 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2089,6 +2089,15 @@ def __setstate__(self, state): self.__dict__.update(state) self.initialize_container() + def __enter__(self): + return self + + def __exit__(self, *args): + pass + + def full_path(self, path=None): + return _append_path_to_prefix(path, self.prefix) + def __getitem__(self, key): blob_name = '/'.join([self.prefix, key]) blob = self.client.get_blob_to_bytes(self.container_name, blob_name) @@ -2164,7 +2173,9 @@ def rename(self, src_path, dst_path): raise NotImplementedErrror def rmdir(self, path=None): - raise NotImplementedErrror + dir_path = normalize_storage_path(self.full_path(path)) + '/' + for blob in self.client.list_blobs(self.container_name, dir_path): + self.client.delete_blob(self.container_name, blob.name) def getsize(self, path=None): dir_path = self.dir_path(path) From 272d234caed82c0e72a6d1a8df23c1855989206b Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Thu, 2 Aug 2018 16:57:00 -0400 Subject: [PATCH 13/83] Add docstring for ABSStore --- zarr/storage.py | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index f4d83bd937..c59d61c50d 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2063,6 +2063,24 @@ def __len__(self): return len(list(iterator)) class ABSStore(MutableMapping): + """Storage class using Azure Blob Storage (ABS) + + Parameters + ---------- + container_name : string + The name of the ABS container to use + prefix : string, optional + The prefix within the container (i.e. subdirectory) + account_name : string + The Azure blob storage account name + account_key : string + The Azure blob storage account acess key + + Notes + ----- + In order to use this store, you must install the Azure Blob Storage + `Python Client Library `_. + """ def __init__(self, container_name, prefix, account_name, account_key): self.account_name = account_name From bb406a0e354d6026430f523d0192e5167c6dc9fb Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Thu, 2 Aug 2018 16:59:20 -0400 Subject: [PATCH 14/83] Remove GCSStore from this branch --- zarr/storage.py | 152 ------------------------------------------------ 1 file changed, 152 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index c59d61c50d..dffc73749f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1910,158 +1910,6 @@ def atexit_rmgcspath(bucket, path): bucket.delete_blobs(bucket.list_blobs(prefix=path)) -class GCSStore(MutableMapping): - """Storage class using a Google Cloud Storage (GCS) - - Parameters - ---------- - bucket_name : string - The name of the GCS bucket - prefix : string, optional - The prefix within the bucket (i.e. subdirectory) - client_kwargs : dict, optional - Extra options passed to ``google.cloud.storage.Client`` when connecting - to GCS - - Notes - ----- - In order to use this store, you must install the Google Cloud Storage - `Python Client Library `_. - You must also provide valid application credentials, either by setting the - ``GOOGLE_APPLICATION_CREDENTIALS`` environment variable or via - `default credentials `_. - """ - - def __init__(self, bucket_name, prefix=None, client_kwargs={}): - - self.bucket_name = bucket_name - self.prefix = normalize_storage_path(prefix) - self.client_kwargs = client_kwargs - self.initialize_bucket() - - def initialize_bucket(self): - from google.cloud import storage - # run `gcloud auth application-default login` from shell - client = storage.Client(**self.client_kwargs) - self.bucket = client.get_bucket(self.bucket_name) - # need to properly handle excpetions - import google.api_core.exceptions as exceptions - self.exceptions = exceptions - - # needed for pickling - def __getstate__(self): - state = self.__dict__.copy() - del state['bucket'] - del state['exceptions'] - return state - - def __setstate__(self, state): - self.__dict__.update(state) - self.initialize_bucket() - - def __enter__(self): - return self - - def __exit__(self, *args): - pass - - def full_path(self, path=None): - return _append_path_to_prefix(path, self.prefix) - - def list_gcs_directory_blobs(self, path): - """Return list of all blobs *directly* under a gcs prefix.""" - prefix = normalize_storage_path(path) + '/' - return [blob.name for blob in - self.bucket.list_blobs(prefix=prefix, delimiter='/')] - - # from https://github.com/GoogleCloudPlatform/google-cloud-python/issues/920 - def list_gcs_subdirectories(self, path): - """Return set of all "subdirectories" from a gcs prefix.""" - prefix = normalize_storage_path(path) + '/' - iterator = self.bucket.list_blobs(prefix=prefix, delimiter='/') - prefixes = set() - for page in iterator.pages: - prefixes.update(page.prefixes) - # need to strip trailing slash to be consistent with os.listdir - return [path[:-1] for path in prefixes] - - def list_gcs_directory(self, prefix, strip_prefix=True): - """Return a list of all blobs and subdirectories from a gcs prefix.""" - items = set() - items.update(self.list_gcs_directory_blobs(prefix)) - items.update(self.list_gcs_subdirectories(prefix)) - items = list(items) - if strip_prefix: - items = [_strip_prefix_from_path(path, prefix) for path in items] - return items - - def listdir(self, path=None): - dir_path = self.full_path(path) - return sorted(self.list_gcs_directory(dir_path, strip_prefix=True)) - - def rmdir(self, path=None): - # make sure it's a directory - dir_path = normalize_storage_path(self.full_path(path)) + '/' - self.bucket.delete_blobs(self.bucket.list_blobs(prefix=dir_path)) - - def getsize(self, path=None): - # this function should *not* be recursive - # a lot of slash trickery is required to make this work right - full_path = self.full_path(path) - blob = self.bucket.get_blob(full_path) - if blob is not None: - return blob.size - else: - dir_path = normalize_storage_path(full_path) + '/' - blobs = self.bucket.list_blobs(prefix=dir_path, delimiter='/') - size = 0 - for blob in blobs: - size += blob.size - return size - - def clear(self): - self.rmdir() - - def __getitem__(self, key): - blob_name = self.full_path(key) - blob = self.bucket.get_blob(blob_name) - if blob: - return blob.download_as_string() - else: - raise KeyError('Blob %s not found' % blob_name) - - def __setitem__(self, key, value): - blob_name = self.full_path(key) - blob = self.bucket.blob(blob_name) - blob.upload_from_string(value) - - def __delitem__(self, key): - blob_name = self.full_path(key) - try: - self.bucket.delete_blob(blob_name) - except self.exceptions.NotFound as er: - raise KeyError(er.message) - - def __contains__(self, key): - blob_name = self.full_path(key) - return self.bucket.get_blob(blob_name) is not None - - def __eq__(self, other): - return ( - isinstance(other, GCSStore) and - self.bucket_name == other.bucket_name and - self.prefix == other.prefix - ) - - def __iter__(self): - blobs = self.bucket.list_blobs(prefix=self.prefix) - for blob in blobs: - yield _strip_prefix_from_path(blob.name, self.prefix) - - def __len__(self): - iterator = self.bucket.list_blobs(prefix=self.prefix) - return len(list(iterator)) - class ABSStore(MutableMapping): """Storage class using Azure Blob Storage (ABS) From 937d16224db40b291c4f743a5924471890fd9a48 Mon Sep 17 00:00:00 2001 From: Zain Patel <30357972+mzjp2@users.noreply.github.com> Date: Thu, 2 Aug 2018 22:23:21 +0100 Subject: [PATCH 15/83] Fixed missing argument in getsize of ABStore Was missing self.container_name as an argument --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index dffc73749f..68037b6971 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2046,7 +2046,7 @@ def rmdir(self, path=None): def getsize(self, path=None): dir_path = self.dir_path(path) size = 0 - for blob in self.client.list_blobs(prefix=dir_path): + for blob in self.client.list_blobs(self.container_name, prefix=dir_path): size += blob.properties.content_length # from https://stackoverflow.com/questions/47694592/get-container-sizes-in-azure-blob-storage-using-python return size From 74920c43463a577e2d78507a85b4bd0d48d0f3fc Mon Sep 17 00:00:00 2001 From: Zain Patel <30357972+mzjp2@users.noreply.github.com> Date: Thu, 2 Aug 2018 22:23:46 +0100 Subject: [PATCH 16/83] Specified prefix argument in rmdir for abstore --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 68037b6971..e539ce7b65 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2040,7 +2040,7 @@ def rename(self, src_path, dst_path): def rmdir(self, path=None): dir_path = normalize_storage_path(self.full_path(path)) + '/' - for blob in self.client.list_blobs(self.container_name, dir_path): + for blob in self.client.list_blobs(self.container_name, prefix=dir_path): self.client.delete_blob(self.container_name, blob.name) def getsize(self, path=None): From bd1648bfba6127accb9cc1464ac0f2b9f583b6d5 Mon Sep 17 00:00:00 2001 From: Zain Patel <30357972+mzjp2@users.noreply.github.com> Date: Thu, 2 Aug 2018 22:24:44 +0100 Subject: [PATCH 17/83] Fixed join string error in dir_path in ABStore Join only accepts one argument, using os.path.join(x,y) formats the string as a valid file path for us. --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index e539ce7b65..dbbcb27dce 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2026,7 +2026,7 @@ def dir_path(self, path=None): # prefix is normalized to not have a trailing slash dir_path = self.prefix if store_path: - dir_path = '/'.join(dir_path, store_path) + dir_path = os.path.join(dir_path, store_path) else: dir_path += '/' return dir_path From 0e71f709a58161c05eb4023096b114d26e5db620 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Fri, 3 Aug 2018 06:52:23 -0400 Subject: [PATCH 18/83] Remove logging work-around as the issue was fixed in azure-storage 1.3.0 --- zarr/storage.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index dbbcb27dce..2d5ccd90e5 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1940,11 +1940,6 @@ def __init__(self, container_name, prefix, account_name, account_key): def initialize_container(self): from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) - # change logging level to deal with https://github.com/Azure/azure-storage-python/issues/437 - # it would be better to set up a logging filter that throws out just the - # error logged when calling exists(). - import logging - logging.basicConfig(level=logging.CRITICAL) # needed for pickling def __getstate__(self): @@ -1996,7 +1991,6 @@ def __len__(self): raise NotImplementedError def __contains__(self, key): - # this is where the logging error occurs. not sure why we are looking for a .zarray below every blob blob_name = '/'.join([self.prefix, key]) if self.client.exists(self.container_name, blob_name): return True From de5bb9c95dd10168ad7104056a0c6f46db4646d8 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Fri, 3 Aug 2018 07:27:19 -0400 Subject: [PATCH 19/83] Clean up docstring --- zarr/storage.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 2d5ccd90e5..0956eb738f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1911,23 +1911,25 @@ def atexit_rmgcspath(bucket, path): class ABSStore(MutableMapping): - """Storage class using Azure Blob Storage (ABS) + """Storage class using Azure Blob Storage (ABS). Parameters ---------- container_name : string - The name of the ABS container to use - prefix : string, optional - The prefix within the container (i.e. subdirectory) + The name of the ABS container to use. Currently this must exist in the + storage account. + prefix : string + Location of the "directory" to use as the root of the storage hierarchy + within the container. account_name : string - The Azure blob storage account name + The Azure blob storage account name. account_key : string - The Azure blob storage account acess key + The Azure blob storage account acess key. Notes ----- In order to use this store, you must install the Azure Blob Storage - `Python Client Library `_. + `Python Client Library `_ version >= 1.3.0. """ def __init__(self, container_name, prefix, account_name, account_key): @@ -2041,7 +2043,7 @@ def getsize(self, path=None): dir_path = self.dir_path(path) size = 0 for blob in self.client.list_blobs(self.container_name, prefix=dir_path): - size += blob.properties.content_length # from https://stackoverflow.com/questions/47694592/get-container-sizes-in-azure-blob-storage-using-python + size += blob.properties.content_length return size def clear(self): From 13a6d3092463d9b43ce7e36653ec333b00511562 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Fri, 3 Aug 2018 07:30:24 -0400 Subject: [PATCH 20/83] Remove more GCSStore code --- zarr/storage.py | 7 ------- zarr/tests/test_core.py | 26 ++------------------------ zarr/tests/test_storage.py | 30 +----------------------------- 3 files changed, 3 insertions(+), 60 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 0956eb738f..2ab4cf16c1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1903,13 +1903,6 @@ def _append_path_to_prefix(path, prefix): normalize_storage_path(path)]) -def atexit_rmgcspath(bucket, path): - from google.cloud import storage - client = storage.Client() - bucket = client.get_bucket(bucket) - bucket.delete_blobs(bucket.list_blobs(prefix=path)) - - class ABSStore(MutableMapping): """Storage class using Azure Blob Storage (ABS). diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 23d64d1f2c..390f888287 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -7,7 +7,7 @@ import pickle import os import warnings -import uuid + import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal @@ -16,7 +16,7 @@ from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob, - LRUStoreCache, GCSStore, atexit_rmgcspath) + LRUStoreCache) from zarr.core import Array from zarr.errors import PermissionError from zarr.compat import PY2, text_type, binary_type @@ -1698,25 +1698,3 @@ def create_array(read_only=False, **kwargs): init_array(store, **kwargs) return Array(store, read_only=read_only, cache_metadata=cache_metadata, cache_attrs=cache_attrs) - - -try: - from google.cloud import storage as gcstorage -except ImportError: # pragma: no cover - gcstorage = None - - -@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') -class TestGCSArray(TestArray): - - def create_array(self, read_only=False, **kwargs): - bucket = 'zarr-test' - prefix = uuid.uuid4() - atexit.register(atexit_rmgcspath, bucket, prefix) - store = GCSStore(bucket, prefix) - cache_metadata = kwargs.pop('cache_metadata', True) - cache_attrs = kwargs.pop('cache_attrs', True) - kwargs.setdefault('compressor', Zlib(1)) - init_array(store, **kwargs) - return Array(store, read_only=read_only, cache_metadata=cache_metadata, - cache_attrs=cache_attrs) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ffc19822a5..f68f8a6ed6 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -8,7 +8,6 @@ import array import shutil import os -import uuid import numpy as np @@ -20,8 +19,7 @@ DirectoryStore, ZipStore, init_group, group_meta_key, getsize, migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor, DBMStore, - LMDBStore, atexit_rmglob, LRUStoreCache, GCSStore, - atexit_rmgcspath) + LMDBStore, atexit_rmglob, LRUStoreCache) from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT, decode_group_metadata, encode_group_metadata) from zarr.compat import PY2 @@ -1237,29 +1235,3 @@ def test_format_compatibility(): else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config() - - -try: - from google.cloud import storage as gcstorage - # cleanup function - -except ImportError: # pragma: no cover - gcstorage = None - - -@unittest.skipIf(gcstorage is None, 'google-cloud-storage is not installed') -class TestGCSStore(StoreTests, unittest.TestCase): - - def create_store(self): - # would need to be replaced with a dedicated test bucket - bucket = 'zarr-test' - prefix = uuid.uuid4() - atexit.register(atexit_rmgcspath, bucket, prefix) - store = GCSStore(bucket, prefix) - return store - - def test_context_manager(self): - with self.create_store() as store: - store['foo'] = b'bar' - store['baz'] = b'qux' - assert 2 == len(store) From 7b52e3923dfb036d0213e2a8281cd1876d5ec962 Mon Sep 17 00:00:00 2001 From: Tim Crone Date: Mon, 6 Aug 2018 10:52:54 -0400 Subject: [PATCH 21/83] Move utility functions into ABSStore class --- zarr/storage.py | 31 +++++++++++++------------------ 1 file changed, 13 insertions(+), 18 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 2ab4cf16c1..7964e3dd01 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1885,24 +1885,6 @@ def __delitem__(self, key): self._invalidate_value(key) -# utility functions for object stores - - -def _strip_prefix_from_path(path, prefix): - # normalized things will not have any leading or trailing slashes - path_norm = normalize_storage_path(path) - prefix_norm = normalize_storage_path(prefix) - if path_norm.startswith(prefix_norm): - return path_norm[(len(prefix_norm)+1):] - else: - return path - - -def _append_path_to_prefix(path, prefix): - return '/'.join([normalize_storage_path(prefix), - normalize_storage_path(path)]) - - class ABSStore(MutableMapping): """Storage class using Azure Blob Storage (ABS). @@ -1951,6 +1933,10 @@ def __enter__(self): def __exit__(self, *args): pass + def _append_path_to_prefix(path, prefix): + return '/'.join([normalize_storage_path(prefix), + normalize_storage_path(path)]) + def full_path(self, path=None): return _append_path_to_prefix(path, self.prefix) @@ -2000,6 +1986,15 @@ def list_abs_subdirectories(self, prefix): """Return list of all "subdirectories" from an abs prefix.""" return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) + def _strip_prefix_from_path(path, prefix): + # normalized things will not have any leading or trailing slashes + path_norm = normalize_storage_path(path) + prefix_norm = normalize_storage_path(prefix) + if path_norm.startswith(prefix_norm): + return path_norm[(len(prefix_norm)+1):] + else: + return path + def list_abs_directory(self, prefix, strip_prefix=True): """Return a list of all blobs and subdirectories from an abs prefix.""" items = set() From 36139cba87a6535d71fab9f3c8cb3c47da52d57e Mon Sep 17 00:00:00 2001 From: shikharsg Date: Mon, 13 Aug 2018 00:57:05 +0530 Subject: [PATCH 22/83] implemented the rest of the mutable mapping functions. tests pass with python 3.5 --- zarr/storage.py | 57 +++++++++++++++++++++++--------------- zarr/tests/test_storage.py | 11 +++++++- 2 files changed, 45 insertions(+), 23 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 7964e3dd01..9096414121 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1933,27 +1933,38 @@ def __enter__(self): def __exit__(self, *args): pass + @staticmethod def _append_path_to_prefix(path, prefix): return '/'.join([normalize_storage_path(prefix), normalize_storage_path(path)]) def full_path(self, path=None): - return _append_path_to_prefix(path, self.prefix) + return self._append_path_to_prefix(path, self.prefix) def __getitem__(self, key): + from azure.common import AzureMissingResourceHttpError blob_name = '/'.join([self.prefix, key]) - blob = self.client.get_blob_to_bytes(self.container_name, blob_name) - if blob: + try: + blob = self.client.get_blob_to_bytes(self.container_name, blob_name) return blob.content - else: + except AzureMissingResourceHttpError: raise KeyError('Blob %s not found' % blob_name) def __setitem__(self, key, value): + import io blob_name = '/'.join([self.prefix, key]) - self.client.create_blob_from_text(self.container_name, blob_name, value) + buffer = io.BytesIO() + buffer.write(value) + buffer.seek(0) + self.client.create_blob_from_bytes(self.container_name, blob_name, buffer.read()) def __delitem__(self, key): - raise NotImplementedError + if self.client.exists(self.container_name, '/'.join([self.prefix, key])): + self.client.delete_blob(self.container_name, '/'.join([self.prefix, key])) + elif self.__contains__(key): + self.rmdir(key) + else: + raise KeyError def __eq__(self, other): return ( @@ -1963,13 +1974,14 @@ def __eq__(self, other): ) def keys(self): - raise NotImplementedError + return list(self.__iter__()) def __iter__(self): - raise NotImplementedError + for blob in self.client.list_blobs(self.container_name, self.prefix + '/'): + yield self._strip_prefix_from_path(blob.name, self.prefix) def __len__(self): - raise NotImplementedError + return len(self.keys()) def __contains__(self, key): blob_name = '/'.join([self.prefix, key]) @@ -1986,6 +1998,7 @@ def list_abs_subdirectories(self, prefix): """Return list of all "subdirectories" from an abs prefix.""" return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) + @staticmethod def _strip_prefix_from_path(path, prefix): # normalized things will not have any leading or trailing slashes path_norm = normalize_storage_path(path) @@ -2015,24 +2028,24 @@ def dir_path(self, path=None): dir_path += '/' return dir_path - def listdir(self, path=None): - dir_path = self.dir_path(path) - return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) - - def rename(self, src_path, dst_path): - raise NotImplementedErrror + # def listdir(self, path=None): + # dir_path = self.dir_path(path) + # return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) + # + # def rename(self, src_path, dst_path): + # raise NotImplementedErrror def rmdir(self, path=None): dir_path = normalize_storage_path(self.full_path(path)) + '/' for blob in self.client.list_blobs(self.container_name, prefix=dir_path): self.client.delete_blob(self.container_name, blob.name) - def getsize(self, path=None): - dir_path = self.dir_path(path) - size = 0 - for blob in self.client.list_blobs(self.container_name, prefix=dir_path): - size += blob.properties.content_length - return size + # def getsize(self, path=None): + # dir_path = self.dir_path(path) + # size = 0 + # for blob in self.client.list_blobs(self.container_name, prefix=dir_path): + # size += blob.properties.content_length + # return size def clear(self): - raise NotImplementedError + self.rmdir() diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index f68f8a6ed6..515875b297 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -19,7 +19,7 @@ DirectoryStore, ZipStore, init_group, group_meta_key, getsize, migrate_1to2, TempStore, atexit_rmtree, NestedDirectoryStore, default_compressor, DBMStore, - LMDBStore, atexit_rmglob, LRUStoreCache) + LMDBStore, atexit_rmglob, LRUStoreCache, ABSStore) from zarr.meta import (decode_array_metadata, encode_array_metadata, ZARR_FORMAT, decode_group_metadata, encode_group_metadata) from zarr.compat import PY2 @@ -1235,3 +1235,12 @@ def test_format_compatibility(): else: assert compressor.codec_id == z.compressor.codec_id assert compressor.get_config() == z.compressor.get_config() + + +class TestABSStore(StoreTests, unittest.TestCase): + + def create_store(self): + from zarr.azureblob import BLOB_ACCOUNT_NAME, BLOB_ACCOUNT_KEY + store = ABSStore('test', 'zarrtesting/', BLOB_ACCOUNT_NAME, BLOB_ACCOUNT_KEY) + store.rmdir() + return store From bda0c3ffe826651cd6e9cff349e4f411da55ec8d Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 14 Aug 2018 13:20:16 +0530 Subject: [PATCH 23/83] using local blob emulator for storage.ABSStore testing --- zarr/storage.py | 11 ++--------- zarr/tests/test_storage.py | 10 ++++++++-- 2 files changed, 10 insertions(+), 11 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 9096414121..80c1ffb5a3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1907,16 +1907,10 @@ class ABSStore(MutableMapping): `Python Client Library `_ version >= 1.3.0. """ - def __init__(self, container_name, prefix, account_name, account_key): - self.account_name = account_name - self.account_key = account_key + def __init__(self, container_name, prefix, blob_client): + self.client = blob_client self.container_name = container_name self.prefix = normalize_storage_path(prefix) - self.initialize_container() - - def initialize_container(self): - from azure.storage.blob import BlockBlobService - self.client = BlockBlobService(self.account_name, self.account_key) # needed for pickling def __getstate__(self): @@ -1925,7 +1919,6 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__.update(state) - self.initialize_container() def __enter__(self): return self diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 515875b297..d6f39ea0bf 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1240,7 +1240,13 @@ def test_format_compatibility(): class TestABSStore(StoreTests, unittest.TestCase): def create_store(self): - from zarr.azureblob import BLOB_ACCOUNT_NAME, BLOB_ACCOUNT_KEY - store = ABSStore('test', 'zarrtesting/', BLOB_ACCOUNT_NAME, BLOB_ACCOUNT_KEY) + from azure.storage.blob import BlockBlobService + blob_emulator_connection_string = 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;'+\ + 'AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;'+\ + 'BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;'+\ + 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'+\ + 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' + blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + store = ABSStore('test', 'zarrtesting/', blob_client) store.rmdir() return store From 447c473d4f12fffb32e441807ebe6113862745ca Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 14 Aug 2018 13:30:01 +0530 Subject: [PATCH 24/83] fixed PY2 array.array error in storage.ABSStore --- zarr/storage.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/zarr/storage.py b/zarr/storage.py index 80c1ffb5a3..3c62c28fa4 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1945,6 +1945,9 @@ def __getitem__(self, key): def __setitem__(self, key, value): import io + import array + if PY2 and isinstance(value, array.array): + value = value.tostring() blob_name = '/'.join([self.prefix, key]) buffer = io.BytesIO() buffer.write(value) From c6858ed75fafc8e2278717a5d1bdb2f0c128b554 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 14 Aug 2018 18:57:07 +0530 Subject: [PATCH 25/83] create test container if not exists in ABSStore test --- zarr/tests/test_storage.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index d6f39ea0bf..dc0b015b5b 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1247,6 +1247,8 @@ def create_store(self): 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'+\ 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + if not blob_client.exists('test'): + blob_client.create_container('test') store = ABSStore('test', 'zarrtesting/', blob_client) store.rmdir() return store From 8e51b3bbe7c9194eb1765059c4b9e2715d8e97cb Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 00:17:25 +0530 Subject: [PATCH 26/83] added more tests for ABSStore --- zarr/tests/test_core.py | 29 ++++++++++++++++++++++++++++- 1 file changed, 28 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 390f888287..a42fa64064 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -16,7 +16,7 @@ from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmtree, atexit_rmglob, - LRUStoreCache) + LRUStoreCache, ABSStore) from zarr.core import Array from zarr.errors import PermissionError from zarr.compat import PY2, text_type, binary_type @@ -1211,6 +1211,33 @@ def test_nbytes_stored(self): assert expect_nbytes_stored == z.nbytes_stored +class TestArrayWithABSStore(TestArray): + + @staticmethod + def absstore(): + from azure.storage.blob import BlockBlobService + blob_emulator_connection_string = 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;'+\ + 'AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;'+\ + 'BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;'+\ + 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'+\ + 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' + blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + if not blob_client.exists('test'): + blob_client.create_container('test') + store = ABSStore('test', 'zarrtesting/', blob_client) + store.rmdir() + return store + + def create_array(self, read_only=False, **kwargs): + store = self.absstore() + kwargs.setdefault('compressor', Zlib(1)) + cache_metadata = kwargs.pop('cache_metadata', True) + cache_attrs = kwargs.pop('cache_attrs', True) + init_array(store, **kwargs) + return Array(store, read_only=read_only, cache_metadata=cache_metadata, + cache_attrs=cache_attrs) + + class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore): @staticmethod From ec4e3f1dd7d478cdd164bc3a2ef8c0ff0104625e Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 09:22:24 +0530 Subject: [PATCH 27/83] reverted blob client creation to inside of ABSStore --- zarr/storage.py | 10 ++++++++-- zarr/tests/test_core.py | 3 ++- zarr/tests/test_storage.py | 3 ++- 3 files changed, 12 insertions(+), 4 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 3c62c28fa4..52e43b55b3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1907,11 +1907,16 @@ class ABSStore(MutableMapping): `Python Client Library `_ version >= 1.3.0. """ - def __init__(self, container_name, prefix, blob_client): - self.client = blob_client + def __init__(self, container_name, prefix, account_name, account_key): + self.account_name = account_name + self.account_key = account_key self.container_name = container_name self.prefix = normalize_storage_path(prefix) + def initialize_container(self): + from azure.storage.blob import BlockBlobService + self.client = BlockBlobService(self.account_name, self.account_key) + # needed for pickling def __getstate__(self): state = self.__dict__.copy() @@ -1919,6 +1924,7 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__.update(state) + self.initialize_container() def __enter__(self): return self diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index a42fa64064..988db09667 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1224,7 +1224,8 @@ def absstore(): blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore('test', 'zarrtesting/', blob_client) + store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') + store.client = blob_client store.rmdir() return store diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index dc0b015b5b..7d6c9ff3a7 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1249,6 +1249,7 @@ def create_store(self): blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore('test', 'zarrtesting/', blob_client) + store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') + store.client = blob_client store.rmdir() return store From bde7b5ece967d44543e2b20e5ffe8759b6ef0f7c Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 09:50:19 +0530 Subject: [PATCH 28/83] added group test for ABSStore --- zarr/tests/test_hierarchy.py | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index f47012cf88..e9c60b5027 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -18,7 +18,7 @@ from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array, array_meta_key, group_meta_key, atexit_rmtree, NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmglob, - LRUStoreCache) + LRUStoreCache, ABSStore) from zarr.core import Array from zarr.compat import PY2, text_type from zarr.hierarchy import Group, group, open_group @@ -856,6 +856,25 @@ def create_store(): return store, None +class TestGroupWithABSStore(TestGroup): + + @staticmethod + def create_store(): + from azure.storage.blob import BlockBlobService + blob_emulator_connection_string = 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;' + \ + 'AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;' + \ + 'BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;' + \ + 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' + \ + 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' + blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + if not blob_client.exists('test'): + blob_client.create_container('test') + store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') + store.client = blob_client + store.rmdir() + return store, None + + class TestGroupWithNestedDirectoryStore(TestGroup): @staticmethod From b86cf5305e9e37a2adb13358122e564e664b1a1c Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 12:35:27 +0530 Subject: [PATCH 29/83] emulator connection string not needed --- zarr/tests/test_core.py | 7 +------ zarr/tests/test_hierarchy.py | 7 +------ zarr/tests/test_storage.py | 7 +------ 3 files changed, 3 insertions(+), 18 deletions(-) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 988db09667..9ac33da36a 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1216,12 +1216,7 @@ class TestArrayWithABSStore(TestArray): @staticmethod def absstore(): from azure.storage.blob import BlockBlobService - blob_emulator_connection_string = 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;'+\ - 'AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;'+\ - 'BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;'+\ - 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'+\ - 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' - blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index e9c60b5027..029075d889 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -861,12 +861,7 @@ class TestGroupWithABSStore(TestGroup): @staticmethod def create_store(): from azure.storage.blob import BlockBlobService - blob_emulator_connection_string = 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;' + \ - 'AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;' + \ - 'BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;' + \ - 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;' + \ - 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' - blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 7d6c9ff3a7..191da27bd3 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1241,12 +1241,7 @@ class TestABSStore(StoreTests, unittest.TestCase): def create_store(self): from azure.storage.blob import BlockBlobService - blob_emulator_connection_string = 'DefaultEndpointsProtocol=http;AccountName=devstoreaccount1;'+\ - 'AccountKey=Eby8vdM02xNOcqFlqUwJPLlmEtlCDXJ1OUzFT50uSRZ6IFsuFq2UVErCz4I6tq/K1SZFPTOtr/KBHBeksoGMGw==;'+\ - 'BlobEndpoint=http://127.0.0.1:10000/devstoreaccount1;'+\ - 'TableEndpoint=http://127.0.0.1:10002/devstoreaccount1;'+\ - 'QueueEndpoint=http://127.0.0.1:10001/devstoreaccount1;' - blob_client = BlockBlobService(is_emulated=True, connection_string=blob_emulator_connection_string) + blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') From f66dadd7421586c5992313c93ef16074497857c1 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 12:43:28 +0530 Subject: [PATCH 30/83] fixed import statement location and put azure-storage-blob in requirements --- requirements.txt | 1 + zarr/storage.py | 8 ++++---- zarr/tests/test_core.py | 2 +- zarr/tests/test_hierarchy.py | 2 +- zarr/tests/test_storage.py | 2 +- 5 files changed, 8 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8720210cf5..1b6d78b7a2 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,3 +3,4 @@ pytest numpy fasteners numcodecs +azure-storage-blob diff --git a/zarr/storage.py b/zarr/storage.py index 52e43b55b3..8b5f084082 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -28,9 +28,13 @@ from threading import Lock, RLock import glob import warnings +import io +import array import numpy as np +from azure.storage.blob import BlockBlobService +from azure.common import AzureMissingResourceHttpError from zarr.util import (normalize_shape, normalize_chunks, normalize_order, @@ -1914,7 +1918,6 @@ def __init__(self, container_name, prefix, account_name, account_key): self.prefix = normalize_storage_path(prefix) def initialize_container(self): - from azure.storage.blob import BlockBlobService self.client = BlockBlobService(self.account_name, self.account_key) # needed for pickling @@ -1941,7 +1944,6 @@ def full_path(self, path=None): return self._append_path_to_prefix(path, self.prefix) def __getitem__(self, key): - from azure.common import AzureMissingResourceHttpError blob_name = '/'.join([self.prefix, key]) try: blob = self.client.get_blob_to_bytes(self.container_name, blob_name) @@ -1950,8 +1952,6 @@ def __getitem__(self, key): raise KeyError('Blob %s not found' % blob_name) def __setitem__(self, key, value): - import io - import array if PY2 and isinstance(value, array.array): value = value.tostring() blob_name = '/'.join([self.prefix, key]) diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 9ac33da36a..f936dfe1b1 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -12,6 +12,7 @@ import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal import pytest +from azure.storage.blob import BlockBlobService from zarr.storage import (DirectoryStore, init_array, init_group, NestedDirectoryStore, @@ -1215,7 +1216,6 @@ class TestArrayWithABSStore(TestArray): @staticmethod def absstore(): - from azure.storage.blob import BlockBlobService blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 029075d889..9f7056ef9d 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -13,6 +13,7 @@ import numpy as np from numpy.testing import assert_array_equal import pytest +from azure.storage.blob import BlockBlobService from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array, @@ -860,7 +861,6 @@ class TestGroupWithABSStore(TestGroup): @staticmethod def create_store(): - from azure.storage.blob import BlockBlobService blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 191da27bd3..ca1bfadaae 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -13,6 +13,7 @@ import numpy as np from numpy.testing import assert_array_equal, assert_array_almost_equal import pytest +from azure.storage.blob import BlockBlobService from zarr.storage import (init_array, array_meta_key, attrs_key, DictStore, @@ -1240,7 +1241,6 @@ def test_format_compatibility(): class TestABSStore(StoreTests, unittest.TestCase): def create_store(self): - from azure.storage.blob import BlockBlobService blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') From b8f60fed587f75a97bf240b61623ab0af9fb4ed7 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 16:31:10 +0530 Subject: [PATCH 31/83] fixed pickle tests --- zarr/storage.py | 16 +++++++++------- zarr/tests/test_core.py | 4 ++-- zarr/tests/test_hierarchy.py | 4 ++-- zarr/tests/test_storage.py | 4 ++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8b5f084082..721c6ac8fb 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1911,14 +1911,16 @@ class ABSStore(MutableMapping): `Python Client Library `_ version >= 1.3.0. """ - def __init__(self, container_name, prefix, account_name, account_key): + def __init__(self, container, prefix, account_name=None, account_key=None, blob_service_kwargs=None): + self.container_name = container + self.prefix = normalize_storage_path(prefix) self.account_name = account_name self.account_key = account_key - self.container_name = container_name - self.prefix = normalize_storage_path(prefix) - - def initialize_container(self): - self.client = BlockBlobService(self.account_name, self.account_key) + if blob_service_kwargs is not None: + self.blob_service_kwargs = blob_service_kwargs + else: + self.blob_service_kwargs = dict() + self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) # needed for pickling def __getstate__(self): @@ -1927,7 +1929,7 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__.update(state) - self.initialize_container() + self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) def __enter__(self): return self diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index f936dfe1b1..89bb9702b7 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1219,8 +1219,8 @@ def absstore(): blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') - store.client = blob_client + store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', + blob_service_kwargs={'is_emulated':True}) store.rmdir() return store diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 9f7056ef9d..2cbac95bcd 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -864,8 +864,8 @@ def create_store(): blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') - store.client = blob_client + store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', + blob_service_kwargs={'is_emulated': True}) store.rmdir() return store, None diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index ca1bfadaae..7a9aa8d972 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1244,7 +1244,7 @@ def create_store(self): blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore(container_name='test', prefix='zarrtesting/', account_name='foo', account_key='bar') - store.client = blob_client + store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', + blob_service_kwargs={'is_emulated':True}) store.rmdir() return store From edd5a71022c62bd2723da38feabc216aaa25e614 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 15 Aug 2018 21:54:16 +0530 Subject: [PATCH 32/83] fixed listdir in ABSStore --- zarr/storage.py | 21 ++++++++++++--------- 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 721c6ac8fb..99fb4eff6e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1996,11 +1996,15 @@ def __contains__(self, key): def list_abs_directory_blobs(self, prefix): """Return list of all blobs from an abs prefix.""" - return [blob.name for blob in self.client.list_blobs(self.container_name)] + return [blob.name for blob in self.client.list_blobs(self.container_name, prefix=prefix) if '/' not in blob.name[len(prefix):]] def list_abs_subdirectories(self, prefix): """Return list of all "subdirectories" from an abs prefix.""" - return list(set([blob.name.rsplit('/', 1)[0] for blob in self.client.list_blobs(self.container_name) if '/' in blob.name])) + dirs = [] + for blob in self.client.list_blobs(self.container_name, prefix=prefix): + if '/' in blob.name[len(prefix):]: + dirs.append(blob.name[:blob.name.find('/', len(prefix))]) + return dirs @staticmethod def _strip_prefix_from_path(path, prefix): @@ -2019,7 +2023,7 @@ def list_abs_directory(self, prefix, strip_prefix=True): items.update(self.list_abs_subdirectories(prefix)) items = list(items) if strip_prefix: - items = [_strip_prefix_from_path(path, prefix) for path in items] + items = [self._strip_prefix_from_path(path, prefix) for path in items] return items def dir_path(self, path=None): @@ -2028,14 +2032,13 @@ def dir_path(self, path=None): dir_path = self.prefix if store_path: dir_path = os.path.join(dir_path, store_path) - else: - dir_path += '/' + dir_path += '/' return dir_path - # def listdir(self, path=None): - # dir_path = self.dir_path(path) - # return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) - # + def listdir(self, path=None): + dir_path = self.dir_path(path) + return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) + # def rename(self, src_path, dst_path): # raise NotImplementedErrror From 3fbe5896f5b65e6aace4db4c18a85fbec31df508 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Thu, 16 Aug 2018 23:28:59 +0530 Subject: [PATCH 33/83] fixed getsize --- zarr/storage.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 99fb4eff6e..51831700e8 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2047,12 +2047,18 @@ def rmdir(self, path=None): for blob in self.client.list_blobs(self.container_name, prefix=dir_path): self.client.delete_blob(self.container_name, blob.name) - # def getsize(self, path=None): - # dir_path = self.dir_path(path) - # size = 0 - # for blob in self.client.list_blobs(self.container_name, prefix=dir_path): - # size += blob.properties.content_length - # return size + def getsize(self, path=None): + store_path = normalize_storage_path(path) + fs_path = self.prefix + if store_path: + fs_path = self._append_path_to_prefix(store_path, self.prefix) + if self.client.exists(self.container_name, fs_path): + return self.client.get_blob_properties(self.container_name, fs_path).properties.content_length + else: + size = 0 + for blob_name in self.list_abs_directory_blobs(fs_path + '/'): + size += self.client.get_blob_properties(self.container_name, blob_name).properties.content_length + return size def clear(self): self.rmdir() From 4b8560e7790e4c1a7fdfeb1716a9b3f23d3cc563 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 17 Aug 2018 00:23:14 +0530 Subject: [PATCH 34/83] Fixed PY2 pickle test. python 2 pickle can't pickle instance methods --- zarr/storage.py | 1 + 1 file changed, 1 insertion(+) diff --git a/zarr/storage.py b/zarr/storage.py index 51831700e8..8168c77dfa 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1925,6 +1925,7 @@ def __init__(self, container, prefix, account_name=None, account_key=None, blob_ # needed for pickling def __getstate__(self): state = self.__dict__.copy() + del state['client'] return state def __setstate__(self, state): From 631051c080c175a33bb282088ed7802bb8b0662d Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 4 Sep 2018 17:06:12 +0530 Subject: [PATCH 35/83] implemented the suggestion from here: https://github.com/zarr-developers/zarr/pull/293#discussion_r214753603 --- zarr/storage.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8168c77dfa..0131d8a490 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1958,10 +1958,8 @@ def __setitem__(self, key, value): if PY2 and isinstance(value, array.array): value = value.tostring() blob_name = '/'.join([self.prefix, key]) - buffer = io.BytesIO() - buffer.write(value) - buffer.seek(0) - self.client.create_blob_from_bytes(self.container_name, blob_name, buffer.read()) + buffer = io.BytesIO(value) + self.client.create_blob_from_stream(self.container_name, blob_name, buffer) def __delitem__(self, key): if self.client.exists(self.container_name, '/'.join([self.prefix, key])): From ea933524d728e51470c5652cad318f4c1b4be89a Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 5 Sep 2018 14:28:33 +0530 Subject: [PATCH 36/83] flake-8 fixes --- zarr/storage.py | 27 ++++++++++++++++++--------- zarr/tests/test_core.py | 4 ++-- zarr/tests/test_hierarchy.py | 4 ++-- zarr/tests/test_storage.py | 4 ++-- 4 files changed, 24 insertions(+), 15 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 0131d8a490..a449e54520 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1908,19 +1908,21 @@ class ABSStore(MutableMapping): Notes ----- In order to use this store, you must install the Azure Blob Storage - `Python Client Library `_ version >= 1.3.0. + https://github.com/Azure/azure-storage-python/tree/master/azure-storage-blob_ version >= 1.3.0. """ - def __init__(self, container, prefix, account_name=None, account_key=None, blob_service_kwargs=None): + def __init__(self, container, prefix, account_name=None, account_key=None, + blob_service_kwargs=None): self.container_name = container self.prefix = normalize_storage_path(prefix) self.account_name = account_name self.account_key = account_key if blob_service_kwargs is not None: - self.blob_service_kwargs = blob_service_kwargs + self.blob_service_kwargs = blob_service_kwargs else: - self.blob_service_kwargs = dict() - self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) + self.blob_service_kwargs = dict() + self.client = BlockBlobService(self.account_name, self.account_key, + **self.blob_service_kwargs) # needed for pickling def __getstate__(self): @@ -1930,7 +1932,8 @@ def __getstate__(self): def __setstate__(self, state): self.__dict__.update(state) - self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) + self.client = BlockBlobService(self.account_name, self.account_key, + **self.blob_service_kwargs) def __enter__(self): return self @@ -1995,7 +1998,11 @@ def __contains__(self, key): def list_abs_directory_blobs(self, prefix): """Return list of all blobs from an abs prefix.""" - return [blob.name for blob in self.client.list_blobs(self.container_name, prefix=prefix) if '/' not in blob.name[len(prefix):]] + blobs = list() + for blob in self.client.list_blobs(self.container_name, prefix=prefix): + if '/' not in blob.name[len(prefix):]: + blobs.append(blob.name) + return blobs def list_abs_subdirectories(self, prefix): """Return list of all "subdirectories" from an abs prefix.""" @@ -2052,11 +2059,13 @@ def getsize(self, path=None): if store_path: fs_path = self._append_path_to_prefix(store_path, self.prefix) if self.client.exists(self.container_name, fs_path): - return self.client.get_blob_properties(self.container_name, fs_path).properties.content_length + return self.client.get_blob_properties(self.container_name, + fs_path).properties.content_length else: size = 0 for blob_name in self.list_abs_directory_blobs(fs_path + '/'): - size += self.client.get_blob_properties(self.container_name, blob_name).properties.content_length + size += self.client.get_blob_properties(self.container_name, + blob_name).properties.content_length return size def clear(self): diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 89bb9702b7..e4f0a68bef 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1219,8 +1219,8 @@ def absstore(): blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', - blob_service_kwargs={'is_emulated':True}) + store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', + account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() return store diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 2cbac95bcd..145124f90c 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -864,8 +864,8 @@ def create_store(): blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', - blob_service_kwargs={'is_emulated': True}) + store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', + account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() return store, None diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 7a9aa8d972..c76ba39b9a 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1244,7 +1244,7 @@ def create_store(self): blob_client = BlockBlobService(is_emulated=True) if not blob_client.exists('test'): blob_client.create_container('test') - store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', - blob_service_kwargs={'is_emulated':True}) + store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', + account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() return store From 08fe155078d8956987bb2293cad5212792ee2538 Mon Sep 17 00:00:00 2001 From: shikhar Date: Tue, 27 Nov 2018 13:10:13 +0530 Subject: [PATCH 37/83] added azure-storage-blob --- requirements_test.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements_test.txt b/requirements_test.txt index a668f130cc..1492a673c1 100644 --- a/requirements_test.txt +++ b/requirements_test.txt @@ -9,3 +9,4 @@ pytest-cov s3fs setuptools-scm tox +azure-storage-blob From 957b405aa780697c6758506a395fcc6069048270 Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 13:44:12 +0530 Subject: [PATCH 38/83] first attempt at docker build with azurite --- .travis.yml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.travis.yml b/.travis.yml index 8a5e1fe521..72494e0080 100644 --- a/.travis.yml +++ b/.travis.yml @@ -11,6 +11,9 @@ addons: packages: - libdb-dev +services: + - docker + matrix: include: - python: 2.7 @@ -20,6 +23,11 @@ matrix: dist: xenial sudo: true +before_install: + - docker pull arafato/azurite + - mkdir ~/blob_emulator + - docker run -e executable=blob -d -t -p 10000:10000 -v ~/blob_emulator:/opt/azurite/folder arafato/azurite + install: - pip install -U pip setuptools wheel tox-travis coveralls From 9c128dbab7d7ff619f81d43e2b287749da0c4272 Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 14:48:35 +0530 Subject: [PATCH 39/83] azure storage emulator in appveyor --- appveyor.yml | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 67058550dc..c5326ec5b2 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,6 +26,13 @@ environment: PYTHON_VERSION: "3.7" install: + - ps: | + $msiPath = "$($env:USERPROFILE)\MicrosoftAzureStorageEmulator.msi" + (New-Object Net.WebClient).DownloadFile('https://download.microsoft.com/download/B/4/A/B4A8422F-C564-4393-80DA-6865A8C4B32D/MicrosoftAzureStorageEmulator.msi', $msiPath) + cmd /c start /wait msiexec /i $msiPath /quiet + - cmd: | + "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start + "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" @@ -38,3 +45,7 @@ build: off test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" + +on_finish: + - cmd: | + "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" stop From 2da245306e27c6256834063f64bb2d464dde9fb1 Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 14:50:48 +0530 Subject: [PATCH 40/83] syntax correction --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index c5326ec5b2..482736e6f0 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -30,7 +30,7 @@ install: $msiPath = "$($env:USERPROFILE)\MicrosoftAzureStorageEmulator.msi" (New-Object Net.WebClient).DownloadFile('https://download.microsoft.com/download/B/4/A/B4A8422F-C564-4393-80DA-6865A8C4B32D/MicrosoftAzureStorageEmulator.msi', $msiPath) cmd /c start /wait msiexec /i $msiPath /quiet - - cmd: | + - cmd: | "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" From a09e2c9b8ae5e53efdeab60490c30fbf50c805be Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 15:33:35 +0530 Subject: [PATCH 41/83] checking if emulator is preinstalled --- appveyor.yml | 4 ---- 1 file changed, 4 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 482736e6f0..7c15c0756e 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,10 +26,6 @@ environment: PYTHON_VERSION: "3.7" install: - - ps: | - $msiPath = "$($env:USERPROFILE)\MicrosoftAzureStorageEmulator.msi" - (New-Object Net.WebClient).DownloadFile('https://download.microsoft.com/download/B/4/A/B4A8422F-C564-4393-80DA-6865A8C4B32D/MicrosoftAzureStorageEmulator.msi', $msiPath) - cmd /c start /wait msiexec /i $msiPath /quiet - cmd: | "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status From 5ce6a4c46f407315de1a48441471bf220fde0273 Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 15:36:52 +0530 Subject: [PATCH 42/83] syntax fix --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 7c15c0756e..38ee256444 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,7 +26,7 @@ environment: PYTHON_VERSION: "3.7" install: - - cmd: | + - cmd: "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" From bf8aa371a99b4db5622264b904d98a6132106deb Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 15:38:31 +0530 Subject: [PATCH 43/83] syntax fix --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 38ee256444..4977cce4ae 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -27,8 +27,8 @@ environment: install: - cmd: - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status + - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start + - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" From 730255c5edeccad1a694d10e859b1fc021b79210 Mon Sep 17 00:00:00 2001 From: shikhar Date: Fri, 30 Nov 2018 15:50:22 +0530 Subject: [PATCH 44/83] syntax fix --- appveyor.yml | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 4977cce4ae..e33b697357 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,9 +26,7 @@ environment: PYTHON_VERSION: "3.7" install: - - cmd: - - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start - - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" status + - cmd: "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" From 8dc2f5d90a96358e4c0e568c5629ea1908814f9a Mon Sep 17 00:00:00 2001 From: shikharsg Date: Thu, 13 Dec 2018 01:10:36 +0530 Subject: [PATCH 45/83] removed wrong syntax --- appveyor.yml | 5 ----- 1 file changed, 5 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index e33b697357..67058550dc 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,7 +26,6 @@ environment: PYTHON_VERSION: "3.7" install: - - cmd: "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" start - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" @@ -39,7 +38,3 @@ build: off test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" - -on_finish: - - cmd: | - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe" stop From 85a5670f01a64767b150a8f0c1cfd505d5852156 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Thu, 13 Dec 2018 01:15:14 +0530 Subject: [PATCH 46/83] storage emulator with docker --- appveyor.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/appveyor.yml b/appveyor.yml index 67058550dc..69bded65eb 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -26,6 +26,7 @@ environment: PYTHON_VERSION: "3.7" install: + - "docker run -d -p 10000:10000 -p 10001:10001 -p 10002:10002 microsoft/azure-storage-emulator" - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" From a09fb61ea0b206a94e42110c2dade7798302c98b Mon Sep 17 00:00:00 2001 From: shikharsg Date: Thu, 13 Dec 2018 12:47:33 +0530 Subject: [PATCH 47/83] trying different appveyor image --- appveyor.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/appveyor.yml b/appveyor.yml index 69bded65eb..34a964c218 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,6 +2,8 @@ branches: only: - master +image: Visual Studio 2017 + environment: global: From 168ba50c75776e36b228bb216a755c953c625691 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Thu, 13 Dec 2018 13:38:47 +0530 Subject: [PATCH 48/83] flake 8 fixes --- zarr/storage.py | 2 -- zarr/tests/test_storage.py | 1 - 2 files changed, 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 0db659aae7..8fe5963072 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -33,7 +33,6 @@ import array -import numpy as np from azure.storage.blob import BlockBlobService from azure.common import AzureMissingResourceHttpError @@ -2144,4 +2143,3 @@ def getsize(self, path): def listdir(self, path): return listdir(self.meta_store, path) - diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index c2ed7790e7..25546fa14e 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1322,4 +1322,3 @@ def test_read_write(self): cs['bar'] = 0 with pytest.raises(PermissionError): cs['spam'] = 'eggs' - From e0de99bd58803d827bf8dcccf72c2784509bf3bd Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 00:01:28 +0530 Subject: [PATCH 49/83] full coverage --- zarr/storage.py | 10 +++------- zarr/tests/test_core.py | 4 ++-- zarr/tests/test_hierarchy.py | 4 ++-- zarr/tests/test_storage.py | 10 ++++++++-- 4 files changed, 15 insertions(+), 13 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 8fe5963072..37b12408b2 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1913,7 +1913,7 @@ def __init__(self, container, prefix, account_name=None, account_key=None, self.account_key = account_key if blob_service_kwargs is not None: self.blob_service_kwargs = blob_service_kwargs - else: + else: # pragma: no cover self.blob_service_kwargs = dict() self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) @@ -1961,8 +1961,6 @@ def __setitem__(self, key, value): def __delitem__(self, key): if self.client.exists(self.container_name, '/'.join([self.prefix, key])): self.client.delete_blob(self.container_name, '/'.join([self.prefix, key])) - elif self.__contains__(key): - self.rmdir(key) else: raise KeyError @@ -2011,10 +2009,8 @@ def _strip_prefix_from_path(path, prefix): # normalized things will not have any leading or trailing slashes path_norm = normalize_storage_path(path) prefix_norm = normalize_storage_path(prefix) - if path_norm.startswith(prefix_norm): - return path_norm[(len(prefix_norm)+1):] - else: - return path + + return path_norm[(len(prefix_norm)+1):] def list_abs_directory(self, prefix, strip_prefix=True): """Return a list of all blobs and subdirectories from an abs prefix.""" diff --git a/zarr/tests/test_core.py b/zarr/tests/test_core.py index 8f43dbc079..cfa1d732f6 100644 --- a/zarr/tests/test_core.py +++ b/zarr/tests/test_core.py @@ -1294,8 +1294,8 @@ class TestArrayWithABSStore(TestArray): @staticmethod def absstore(): blob_client = BlockBlobService(is_emulated=True) - if not blob_client.exists('test'): - blob_client.create_container('test') + blob_client.delete_container('test') + blob_client.create_container('test') store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 4038c917d7..bba21d767d 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -870,8 +870,8 @@ class TestGroupWithABSStore(TestGroup): @staticmethod def create_store(): blob_client = BlockBlobService(is_emulated=True) - if not blob_client.exists('test'): - blob_client.create_container('test') + blob_client.delete_container('test') + blob_client.create_container('test') store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 25546fa14e..e546b336c4 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1270,13 +1270,19 @@ class TestABSStore(StoreTests, unittest.TestCase): def create_store(self): blob_client = BlockBlobService(is_emulated=True) - if not blob_client.exists('test'): - blob_client.create_container('test') + blob_client.delete_container('test') + blob_client.create_container('test') store = ABSStore(container='test', prefix='zarrtesting/', account_name='foo', account_key='bar', blob_service_kwargs={'is_emulated': True}) store.rmdir() return store + def test_context_manager(self): + with self.create_store() as store: + store['foo'] = b'bar' + store['baz'] = b'qux' + assert 2 == len(store) + class TestConsolidatedMetadataStore(unittest.TestCase): From 3efe8025052e8ef7aa92cbf6dc6afcb68a66ec59 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 12:36:33 +0530 Subject: [PATCH 50/83] verbose logs for pip install to see appveyor error --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 34a964c218..3569bf4c45 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -33,7 +33,7 @@ install: - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" - "%CMD_IN_ENV% python -m pip install -rrequirements_dev_npy.txt" - - "%CMD_IN_ENV% python -m pip install --no-binary=numcodecs -rrequirements_dev.txt" + - "%CMD_IN_ENV% python -m pip install --no-binary=numcodecs -v -rrequirements_dev.txt" - "%CMD_IN_ENV% python setup.py install" - "%CMD_IN_ENV% python -m pip freeze" From 8f85315160efa41fb792d9a15e2ec9249843d150 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 13:03:15 +0530 Subject: [PATCH 51/83] trying to run locally installed emulator --- appveyor.yml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 3569bf4c45..dba8b0c895 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -28,7 +28,6 @@ environment: PYTHON_VERSION: "3.7" install: - - "docker run -d -p 10000:10000 -p 10001:10001 -p 10002:10002 microsoft/azure-storage-emulator" - "SET PATH=%PYTHON%;%PYTHON%\\Scripts;%PATH%" - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" @@ -39,5 +38,11 @@ install: build: off +before_test: + - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe start" + test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" + +after_test: + - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe stop" From d1bb9ce41aae17ae7d9577785b1ef54b3e41519b Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 13:08:53 +0530 Subject: [PATCH 52/83] single-double quote yaml fix --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index dba8b0c895..549967fe8d 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -39,10 +39,10 @@ install: build: off before_test: - - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe start" + - "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe start" test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" after_test: - - "C:\Program Files (x86)\Microsoft SDKs\Azure\Storage Emulator\AzureStorageEmulator.exe stop" + - "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe stop" From 735c6610ae43b6184c9c2af5091a7bc351dede3c Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 14:31:50 +0530 Subject: [PATCH 53/83] cmd prefix --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index 549967fe8d..a87a844486 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -39,10 +39,10 @@ install: build: off before_test: - - "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe start" + - cmd: C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe start test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" after_test: - - "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe stop" + - cmd: C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe stop From 979a438aa7c2fc5bb390e49ea2713e92d7981df7 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 14:42:26 +0530 Subject: [PATCH 54/83] double quotes around exe file path --- appveyor.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index a87a844486..f2dde1ecd9 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -39,10 +39,10 @@ install: build: off before_test: - - cmd: C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe start + - cmd: "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe" start test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" after_test: - - cmd: C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe stop + - cmd: "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe" stop From 5beace1c598d11b6d78108502ab540abd960670c Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 14:51:11 +0530 Subject: [PATCH 55/83] double quotes within single quotes with environment variable substitution --- appveyor.yml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/appveyor.yml b/appveyor.yml index f2dde1ecd9..471f632812 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -11,6 +11,7 @@ environment: # /E:ON and /V:ON options are not enabled in the batch script intepreter # See: http://stackoverflow.com/a/13751649/163740 CMD_IN_ENV: "cmd /E:ON /V:ON /C .\\build.cmd" + EMULATOR_LOC: C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe matrix: @@ -39,10 +40,10 @@ install: build: off before_test: - - cmd: "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe" start + - '"%EMULATOR_LOC%" start' test_script: - "%CMD_IN_ENV% python -m pytest -v --pyargs zarr" after_test: - - cmd: "C:\\Program Files (x86)\\Microsoft SDKs\\Azure\\Storage Emulator\\AzureStorageEmulator.exe" stop + - '"%EMULATOR_LOC%" stop' From 68bda4ec8f16f32ae1258aa81ba4d49fb2651873 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 16:00:21 +0530 Subject: [PATCH 56/83] trying appveyor build with VS2015 image ; --- appveyor.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 471f632812..36c3cc0547 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,7 +2,7 @@ branches: only: - master -image: Visual Studio 2017 +image: Visual Studio 2015 environment: From 77db63739f984d05b4e7ab86d86e87f7e69ea4e5 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 16:13:21 +0530 Subject: [PATCH 57/83] added comment and removed verbosity option for pip install --- appveyor.yml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/appveyor.yml b/appveyor.yml index 36c3cc0547..d04417d671 100644 --- a/appveyor.yml +++ b/appveyor.yml @@ -2,6 +2,8 @@ branches: only: - master +# the VS C++ compiler path, doesn't seem to exist in the PATH environment variable of +# the Visual Studio 2017 build VM, due to which the pyosreplace package fails to build image: Visual Studio 2015 environment: @@ -33,7 +35,7 @@ install: - "%CMD_IN_ENV% python -m pip install -U pip setuptools wheel" - "%CMD_IN_ENV% python -m pip install -rrequirements_test.txt" - "%CMD_IN_ENV% python -m pip install -rrequirements_dev_npy.txt" - - "%CMD_IN_ENV% python -m pip install --no-binary=numcodecs -v -rrequirements_dev.txt" + - "%CMD_IN_ENV% python -m pip install --no-binary=numcodecs -rrequirements_dev.txt" - "%CMD_IN_ENV% python setup.py install" - "%CMD_IN_ENV% python -m pip freeze" From bcdc8393b002730b9a8643956f7582fb4bd178b4 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 16:25:15 +0530 Subject: [PATCH 58/83] list_abs_directory to list only directory blob using delimiter option in azure blob client --- zarr/storage.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 37b12408b2..89548d474b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1991,7 +1991,7 @@ def __contains__(self, key): def list_abs_directory_blobs(self, prefix): """Return list of all blobs from an abs prefix.""" blobs = list() - for blob in self.client.list_blobs(self.container_name, prefix=prefix): + for blob in self.client.list_blobs(self.container_name, prefix=prefix, delimiter='/'): if '/' not in blob.name[len(prefix):]: blobs.append(blob.name) return blobs @@ -1999,7 +1999,7 @@ def list_abs_directory_blobs(self, prefix): def list_abs_subdirectories(self, prefix): """Return list of all "subdirectories" from an abs prefix.""" dirs = [] - for blob in self.client.list_blobs(self.container_name, prefix=prefix): + for blob in self.client.list_blobs(self.container_name, prefix=prefix, delimiter='/'): if '/' in blob.name[len(prefix):]: dirs.append(blob.name[:blob.name.find('/', len(prefix))]) return dirs @@ -2035,9 +2035,6 @@ def listdir(self, path=None): dir_path = self.dir_path(path) return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) - # def rename(self, src_path, dst_path): - # raise NotImplementedErrror - def rmdir(self, path=None): dir_path = normalize_storage_path(self.full_path(path)) + '/' for blob in self.client.list_blobs(self.container_name, prefix=dir_path): From ac286ce47e6a5b712794081fba9511edd6037f97 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 14 Dec 2018 20:11:29 +0530 Subject: [PATCH 59/83] fixed ABSStore docs --- zarr/storage.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 89548d474b..797c506c53 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1889,8 +1889,7 @@ class ABSStore(MutableMapping): Parameters ---------- container_name : string - The name of the ABS container to use. Currently this must exist in the - storage account. + The name of the ABS container to use. prefix : string Location of the "directory" to use as the root of the storage hierarchy within the container. @@ -1898,6 +1897,9 @@ class ABSStore(MutableMapping): The Azure blob storage account name. account_key : string The Azure blob storage account acess key. + blob_service_kwargs : dictionary + Extra arguments to be passed into the azure blob client, for e.g. when + using the emulator, pass in blob_service_kwargs={'is_emulated': True} Notes ----- From cdaceb7f4adcc49c1f49e7f743814b6d6bd412f7 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sun, 16 Dec 2018 17:34:28 +0530 Subject: [PATCH 60/83] fixed windows path listdir error --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 797c506c53..ea9303a730 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -2029,7 +2029,7 @@ def dir_path(self, path=None): # prefix is normalized to not have a trailing slash dir_path = self.prefix if store_path: - dir_path = os.path.join(dir_path, store_path) + dir_path = dir_path + '/' + store_path dir_path += '/' return dir_path From b6b3024dae729d7baceb938c78017da24273fa87 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sun, 16 Dec 2018 22:03:13 +0530 Subject: [PATCH 61/83] ABSStore refactoring --- zarr/storage.py | 66 +++++++++++++++---------------------------------- 1 file changed, 20 insertions(+), 46 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index ea9303a730..3eb0385e74 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1942,8 +1942,12 @@ def _append_path_to_prefix(path, prefix): return '/'.join([normalize_storage_path(prefix), normalize_storage_path(path)]) - def full_path(self, path=None): - return self._append_path_to_prefix(path, self.prefix) + @staticmethod + def _strip_prefix_from_path(path, prefix): + # normalized things will not have any leading or trailing slashes + path_norm = normalize_storage_path(path) + prefix_norm = normalize_storage_path(prefix) + return path_norm[(len(prefix_norm)+1):] def __getitem__(self, key): blob_name = '/'.join([self.prefix, key]) @@ -1990,55 +1994,24 @@ def __contains__(self, key): else: return False - def list_abs_directory_blobs(self, prefix): - """Return list of all blobs from an abs prefix.""" - blobs = list() - for blob in self.client.list_blobs(self.container_name, prefix=prefix, delimiter='/'): - if '/' not in blob.name[len(prefix):]: - blobs.append(blob.name) - return blobs - - def list_abs_subdirectories(self, prefix): - """Return list of all "subdirectories" from an abs prefix.""" - dirs = [] - for blob in self.client.list_blobs(self.container_name, prefix=prefix, delimiter='/'): - if '/' in blob.name[len(prefix):]: - dirs.append(blob.name[:blob.name.find('/', len(prefix))]) - return dirs - - @staticmethod - def _strip_prefix_from_path(path, prefix): - # normalized things will not have any leading or trailing slashes - path_norm = normalize_storage_path(path) - prefix_norm = normalize_storage_path(prefix) - - return path_norm[(len(prefix_norm)+1):] - - def list_abs_directory(self, prefix, strip_prefix=True): - """Return a list of all blobs and subdirectories from an abs prefix.""" - items = set() - items.update(self.list_abs_directory_blobs(prefix)) - items.update(self.list_abs_subdirectories(prefix)) - items = list(items) - if strip_prefix: - items = [self._strip_prefix_from_path(path, prefix) for path in items] - return items - - def dir_path(self, path=None): + def listdir(self, path=None): store_path = normalize_storage_path(path) # prefix is normalized to not have a trailing slash dir_path = self.prefix if store_path: dir_path = dir_path + '/' + store_path dir_path += '/' - return dir_path - - def listdir(self, path=None): - dir_path = self.dir_path(path) - return sorted(self.list_abs_directory(dir_path, strip_prefix=True)) + items = list() + for blob in self.client.list_blobs(self.container_name, prefix=dir_path, delimiter='/'): + if '/' in blob.name[len(dir_path):]: + items.append(self._strip_prefix_from_path( + blob.name[:blob.name.find('/', len(dir_path))], dir_path)) + else: + items.append(self._strip_prefix_from_path(blob.name, dir_path)) + return items def rmdir(self, path=None): - dir_path = normalize_storage_path(self.full_path(path)) + '/' + dir_path = normalize_storage_path(self._append_path_to_prefix(path, self.prefix)) + '/' for blob in self.client.list_blobs(self.container_name, prefix=dir_path): self.client.delete_blob(self.container_name, blob.name) @@ -2052,9 +2025,10 @@ def getsize(self, path=None): fs_path).properties.content_length else: size = 0 - for blob_name in self.list_abs_directory_blobs(fs_path + '/'): - size += self.client.get_blob_properties(self.container_name, - blob_name).properties.content_length + for blob in self.client.list_blobs(self.container_name, prefix=fs_path + '/', + delimiter='/'): + if '/' not in blob.name[len(fs_path + '/'):]: + size += blob.properties.content_length return size def clear(self): From b6eebc8990302b00675905bdfe24190b008bdd73 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sun, 23 Dec 2018 15:05:40 +0530 Subject: [PATCH 62/83] moved py2 array.array checking to numcodecs ensure bytes --- zarr/storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 3eb0385e74..a32c7f6dbf 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1958,8 +1958,7 @@ def __getitem__(self, key): raise KeyError('Blob %s not found' % blob_name) def __setitem__(self, key, value): - if PY2 and isinstance(value, array.array): - value = value.tostring() + value = ensure_bytes(value) blob_name = '/'.join([self.prefix, key]) buffer = io.BytesIO(value) self.client.create_blob_from_stream(self.container_name, blob_name, buffer) From 3abe79df41b0a1eca99197c8e3e40013c3e98b67 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 22 Jan 2019 17:21:26 +0000 Subject: [PATCH 63/83] syntax fix --- zarr/tests/test_hierarchy.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/zarr/tests/test_hierarchy.py b/zarr/tests/test_hierarchy.py index 19fa4bfe2c..37baecf1ae 100644 --- a/zarr/tests/test_hierarchy.py +++ b/zarr/tests/test_hierarchy.py @@ -18,8 +18,6 @@ from zarr.storage import (DictStore, DirectoryStore, ZipStore, init_group, init_array, array_meta_key, group_meta_key, atexit_rmtree, - NestedDirectoryStore, DBMStore, LMDBStore, atexit_rmglob, - LRUStoreCache, ABSStore) NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore, ABSStore, atexit_rmglob, LRUStoreCache) from zarr.core import Array From 3ad6d9c1fded1bfd1d59f205b5ab9cbcc8e4670f Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 22 Jan 2019 21:35:20 +0000 Subject: [PATCH 64/83] flake8 fix --- zarr/storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 834bc8ab49..08c7274315 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -32,7 +32,6 @@ import glob import warnings import io -import array from azure.storage.blob import BlockBlobService From ab38119fa63acd3ad5294bafeb7f1984652411f9 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 23 Jan 2019 10:00:50 +0000 Subject: [PATCH 65/83] fixed ABSStore parameter name container --- zarr/storage.py | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 08c7274315..a0097f1c35 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1889,7 +1889,7 @@ class ABSStore(MutableMapping): Parameters ---------- - container_name : string + container : string The name of the ABS container to use. prefix : string Location of the "directory" to use as the root of the storage hierarchy @@ -1910,7 +1910,7 @@ class ABSStore(MutableMapping): def __init__(self, container, prefix, account_name=None, account_key=None, blob_service_kwargs=None): - self.container_name = container + self.container = container self.prefix = normalize_storage_path(prefix) self.account_name = account_name self.account_key = account_key @@ -1953,7 +1953,7 @@ def _strip_prefix_from_path(path, prefix): def __getitem__(self, key): blob_name = '/'.join([self.prefix, key]) try: - blob = self.client.get_blob_to_bytes(self.container_name, blob_name) + blob = self.client.get_blob_to_bytes(self.container, blob_name) return blob.content except AzureMissingResourceHttpError: raise KeyError('Blob %s not found' % blob_name) @@ -1962,18 +1962,18 @@ def __setitem__(self, key, value): value = ensure_bytes(value) blob_name = '/'.join([self.prefix, key]) buffer = io.BytesIO(value) - self.client.create_blob_from_stream(self.container_name, blob_name, buffer) + self.client.create_blob_from_stream(self.container, blob_name, buffer) def __delitem__(self, key): - if self.client.exists(self.container_name, '/'.join([self.prefix, key])): - self.client.delete_blob(self.container_name, '/'.join([self.prefix, key])) + if self.client.exists(self.container, '/'.join([self.prefix, key])): + self.client.delete_blob(self.container, '/'.join([self.prefix, key])) else: raise KeyError def __eq__(self, other): return ( isinstance(other, ABSStore) and - self.container_name == other.container_name and + self.container == other.container and self.prefix == other.prefix ) @@ -1981,7 +1981,7 @@ def keys(self): return list(self.__iter__()) def __iter__(self): - for blob in self.client.list_blobs(self.container_name, self.prefix + '/'): + for blob in self.client.list_blobs(self.container, self.prefix + '/'): yield self._strip_prefix_from_path(blob.name, self.prefix) def __len__(self): @@ -1989,7 +1989,7 @@ def __len__(self): def __contains__(self, key): blob_name = '/'.join([self.prefix, key]) - if self.client.exists(self.container_name, blob_name): + if self.client.exists(self.container, blob_name): return True else: return False @@ -2002,7 +2002,7 @@ def listdir(self, path=None): dir_path = dir_path + '/' + store_path dir_path += '/' items = list() - for blob in self.client.list_blobs(self.container_name, prefix=dir_path, delimiter='/'): + for blob in self.client.list_blobs(self.container, prefix=dir_path, delimiter='/'): if '/' in blob.name[len(dir_path):]: items.append(self._strip_prefix_from_path( blob.name[:blob.name.find('/', len(dir_path))], dir_path)) @@ -2012,20 +2012,20 @@ def listdir(self, path=None): def rmdir(self, path=None): dir_path = normalize_storage_path(self._append_path_to_prefix(path, self.prefix)) + '/' - for blob in self.client.list_blobs(self.container_name, prefix=dir_path): - self.client.delete_blob(self.container_name, blob.name) + for blob in self.client.list_blobs(self.container, prefix=dir_path): + self.client.delete_blob(self.container, blob.name) def getsize(self, path=None): store_path = normalize_storage_path(path) fs_path = self.prefix if store_path: fs_path = self._append_path_to_prefix(store_path, self.prefix) - if self.client.exists(self.container_name, fs_path): - return self.client.get_blob_properties(self.container_name, + if self.client.exists(self.container, fs_path): + return self.client.get_blob_properties(self.container, fs_path).properties.content_length else: size = 0 - for blob in self.client.list_blobs(self.container_name, prefix=fs_path + '/', + for blob in self.client.list_blobs(self.container, prefix=fs_path + '/', delimiter='/'): if '/' not in blob.name[len(fs_path + '/'):]: size += blob.properties.content_length From 05aab41a7803358df1cbae07a14de7e6a3dc5884 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 23 Jan 2019 10:56:34 +0000 Subject: [PATCH 66/83] removed context manager from ABSStore --- zarr/storage.py | 6 ------ zarr/tests/test_storage.py | 6 ------ 2 files changed, 12 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index a0097f1c35..699844ed4b 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1932,12 +1932,6 @@ def __setstate__(self, state): self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) - def __enter__(self): - return self - - def __exit__(self, *args): - pass - @staticmethod def _append_path_to_prefix(path, prefix): return '/'.join([normalize_storage_path(prefix), diff --git a/zarr/tests/test_storage.py b/zarr/tests/test_storage.py index 7ef8aa8151..74115549b3 100644 --- a/zarr/tests/test_storage.py +++ b/zarr/tests/test_storage.py @@ -1336,12 +1336,6 @@ def create_store(self): store.rmdir() return store - def test_context_manager(self): - with self.create_store() as store: - store['foo'] = b'bar' - store['baz'] = b'qux' - assert 2 == len(store) - class TestConsolidatedMetadataStore(unittest.TestCase): From 90b5e3a4d9bfab6c6e74da2c71745187d8b4a6ef Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 23 Jan 2019 11:59:24 +0000 Subject: [PATCH 67/83] ABSStore.__delitem__ now takes only 1 azure storage API call --- zarr/storage.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 699844ed4b..b5cb0bee4f 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1959,10 +1959,10 @@ def __setitem__(self, key, value): self.client.create_blob_from_stream(self.container, blob_name, buffer) def __delitem__(self, key): - if self.client.exists(self.container, '/'.join([self.prefix, key])): + try: self.client.delete_blob(self.container, '/'.join([self.prefix, key])) - else: - raise KeyError + except AzureMissingResourceHttpError: + raise KeyError('Blob %s not found' % key) def __eq__(self, other): return ( From 4636d5d650186162042b3f2219e2759b0ddf82eb Mon Sep 17 00:00:00 2001 From: shikharsg Date: Wed, 23 Jan 2019 15:42:12 +0000 Subject: [PATCH 68/83] docs --- docs/api/storage.rst | 2 ++ docs/release.rst | 3 +++ zarr/storage.py | 2 +- 3 files changed, 6 insertions(+), 1 deletion(-) diff --git a/docs/api/storage.rst b/docs/api/storage.rst index 24498b0d79..a53a7ce7ba 100644 --- a/docs/api/storage.rst +++ b/docs/api/storage.rst @@ -31,6 +31,8 @@ Storage (``zarr.storage``) .. automethod:: invalidate_values .. automethod:: invalidate_keys +.. autoclass:: ABSStore + .. autoclass:: ConsolidatedMetadataStore .. autofunction:: init_array diff --git a/docs/release.rst b/docs/release.rst index f0d5a559ab..fceff051d5 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -9,6 +9,9 @@ Release notes Enhancements ~~~~~~~~~~~~ +* New storage backend, backed by Azure Blob Storage, :class:`zarr.storage.ABSStore`. + Chunks are stored as block blobs. + * Add "consolidated" metadata as an experimental feature: use :func:`zarr.convenience.consolidate_metadata` to copy all metadata from the various metadata keys within a dataset hierarchy under a single key, and diff --git a/zarr/storage.py b/zarr/storage.py index b5cb0bee4f..54a042e055 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1897,7 +1897,7 @@ class ABSStore(MutableMapping): account_name : string The Azure blob storage account name. account_key : string - The Azure blob storage account acess key. + The Azure blob storage account access key. blob_service_kwargs : dictionary Extra arguments to be passed into the azure blob client, for e.g. when using the emulator, pass in blob_service_kwargs={'is_emulated': True} From 8c3863fb668f0536a312de0268a59c6b05828930 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Wed, 23 Jan 2019 15:43:51 +0000 Subject: [PATCH 69/83] Update zarr/storage.py Co-Authored-By: shikharsg --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 54a042e055..be6657d798 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1904,7 +1904,7 @@ class ABSStore(MutableMapping): Notes ----- - In order to use this store, you must install the Azure Blob Storage + In order to use this store, you must install the Microsoft Azure Storage SDK for Python https://github.com/Azure/azure-storage-python/tree/master/azure-storage-blob_ version >= 1.3.0. """ From b238f0b385c0dee5927b974d70ff421498bd8e39 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Fri, 1 Feb 2019 18:50:09 +0000 Subject: [PATCH 70/83] removed global import of azure storage library --- zarr/storage.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index be6657d798..47926acfa3 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -34,10 +34,6 @@ import io -from azure.storage.blob import BlockBlobService -from azure.common import AzureMissingResourceHttpError - - from zarr.util import (normalize_shape, normalize_chunks, normalize_order, normalize_storage_path, buffer_size, normalize_fill_value, nolock, normalize_dtype) @@ -1910,6 +1906,7 @@ class ABSStore(MutableMapping): def __init__(self, container, prefix, account_name=None, account_key=None, blob_service_kwargs=None): + from azure.storage.blob import BlockBlobService self.container = container self.prefix = normalize_storage_path(prefix) self.account_name = account_name @@ -1928,6 +1925,7 @@ def __getstate__(self): return state def __setstate__(self, state): + from azure.storage.blob import BlockBlobService self.__dict__.update(state) self.client = BlockBlobService(self.account_name, self.account_key, **self.blob_service_kwargs) @@ -1945,6 +1943,7 @@ def _strip_prefix_from_path(path, prefix): return path_norm[(len(prefix_norm)+1):] def __getitem__(self, key): + from azure.common import AzureMissingResourceHttpError blob_name = '/'.join([self.prefix, key]) try: blob = self.client.get_blob_to_bytes(self.container, blob_name) @@ -1959,6 +1958,7 @@ def __setitem__(self, key, value): self.client.create_blob_from_stream(self.container, blob_name, buffer) def __delitem__(self, key): + from azure.common import AzureMissingResourceHttpError try: self.client.delete_blob(self.container, '/'.join([self.prefix, key])) except AzureMissingResourceHttpError: From 9770876272f7930674d5572014d5d7436d0e046f Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sat, 2 Feb 2019 13:06:53 +0000 Subject: [PATCH 71/83] added ABSStore to zarr root import --- zarr/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/__init__.py b/zarr/__init__.py index c9046f6bff..9eb46c3f43 100644 --- a/zarr/__init__.py +++ b/zarr/__init__.py @@ -8,7 +8,7 @@ ones_like, full_like, open_array, open_like, create) from zarr.storage import (DictStore, DirectoryStore, ZipStore, TempStore, NestedDirectoryStore, DBMStore, LMDBStore, SQLiteStore, - LRUStoreCache) + LRUStoreCache, ABSStore) from zarr.hierarchy import group, open_group, Group from zarr.sync import ThreadSynchronizer, ProcessSynchronizer from zarr.codecs import * From 3ed4814ab42b5a4ee7e900dc650ef1895ca25fe3 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sat, 2 Feb 2019 13:31:55 +0000 Subject: [PATCH 72/83] added ABSStore to tutorial.rst --- docs/tutorial.rst | 13 +++++++++++++ zarr/storage.py | 1 - 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 0fbefc3e2e..008b8aa4ba 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -780,6 +780,19 @@ Here is an example using S3Map to read an array created previously:: >>> z[:].tostring() b'Hello from the cloud!' +Zarr now also has a builtin storage backend for Azure Blob Storage. +The class is :class:`zarr.storage.ABSStore` (requires + `azure-storage-blob `_ +to be installed):: + + >>> # when using a storage account, provide account_name and account_key arguments + >>> # to ABSStore + >>> store = zarr.ABSStore(container='test', prefix='zarr-testing', + blob_service_kwargs={'is_emulated': True}) + >>> root = zarr.group(store=store, overwrite=True) + >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') + >>> z[:] = 42 + Note that retrieving data from a remote service via the network can be significantly slower than retrieving data from a local file system, and will depend on network latency and bandwidth between the client and server systems. If you are experiencing poor diff --git a/zarr/storage.py b/zarr/storage.py index 47926acfa3..29fe3e7fcf 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1901,7 +1901,6 @@ class ABSStore(MutableMapping): Notes ----- In order to use this store, you must install the Microsoft Azure Storage SDK for Python - https://github.com/Azure/azure-storage-python/tree/master/azure-storage-blob_ version >= 1.3.0. """ def __init__(self, container, prefix, account_name=None, account_key=None, From 7b08aba1ff6106e24e68d3e9988811c41bf9f5b2 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sat, 2 Feb 2019 13:42:34 +0000 Subject: [PATCH 73/83] fixed docs --- docs/tutorial.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 008b8aa4ba..b5c5955782 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -788,7 +788,7 @@ to be installed):: >>> # when using a storage account, provide account_name and account_key arguments >>> # to ABSStore >>> store = zarr.ABSStore(container='test', prefix='zarr-testing', - blob_service_kwargs={'is_emulated': True}) + >>> blob_service_kwargs={'is_emulated': True}) >>> root = zarr.group(store=store, overwrite=True) >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') >>> z[:] = 42 From 6fc869de8219753b83150a32446250008fd6da04 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sat, 2 Feb 2019 13:54:45 +0000 Subject: [PATCH 74/83] trying to fix tutorial.rst --- docs/tutorial.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index b5c5955782..98e351907e 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -787,8 +787,7 @@ to be installed):: >>> # when using a storage account, provide account_name and account_key arguments >>> # to ABSStore - >>> store = zarr.ABSStore(container='test', prefix='zarr-testing', - >>> blob_service_kwargs={'is_emulated': True}) + >>> store = zarr.ABSStore(container='test', prefix='zarr-testing', blob_service_kwargs={'is_emulated': True}) >>> root = zarr.group(store=store, overwrite=True) >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') >>> z[:] = 42 From e9a402eb58bb988656e8f3937bfe7d82eeeb10e6 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sat, 2 Feb 2019 14:04:05 +0000 Subject: [PATCH 75/83] flake8 fix --- zarr/storage.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 29fe3e7fcf..7f52a5223e 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1450,7 +1450,7 @@ def flush(self): if self.flag[0] != 'r': with self.write_mutex: if hasattr(self.db, 'sync'): - self.db.sync() + self.db.sync() else: # fall-back, close and re-open, needed for ndbm flag = self.flag From 8aa3a013cceeea44e64c3e0e248014a9350490e3 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Sat, 2 Feb 2019 14:09:57 +0000 Subject: [PATCH 76/83] fixing tutorial.rst --- docs/tutorial.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 98e351907e..09f6df8386 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -785,8 +785,7 @@ The class is :class:`zarr.storage.ABSStore` (requires `azure-storage-blob `_ to be installed):: - >>> # when using a storage account, provide account_name and account_key arguments - >>> # to ABSStore + >>> # when using a storage account, provide account_name and account_key arguments to ABSStore >>> store = zarr.ABSStore(container='test', prefix='zarr-testing', blob_service_kwargs={'is_emulated': True}) >>> root = zarr.group(store=store, overwrite=True) >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') From a9940a248af2602d27737c54e4a2ca67dfd199a7 Mon Sep 17 00:00:00 2001 From: shikharsg Date: Mon, 4 Feb 2019 14:29:29 +0000 Subject: [PATCH 77/83] fixed ABSStore in tutorial --- docs/tutorial.rst | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/docs/tutorial.rst b/docs/tutorial.rst index 09f6df8386..502cfa2dd1 100644 --- a/docs/tutorial.rst +++ b/docs/tutorial.rst @@ -785,12 +785,16 @@ The class is :class:`zarr.storage.ABSStore` (requires `azure-storage-blob `_ to be installed):: - >>> # when using a storage account, provide account_name and account_key arguments to ABSStore >>> store = zarr.ABSStore(container='test', prefix='zarr-testing', blob_service_kwargs={'is_emulated': True}) >>> root = zarr.group(store=store, overwrite=True) >>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') >>> z[:] = 42 +When using an actual storage account, provide ``account_name`` and +``account_key`` arguments to :class:`zarr.storage.ABSStore`, the +above client is just testing against the emulator. Please also note +that this is an experimental feature. + Note that retrieving data from a remote service via the network can be significantly slower than retrieving data from a local file system, and will depend on network latency and bandwidth between the client and server systems. If you are experiencing poor From 4d5b6d13374c0c05395bece367b122f4f77862bc Mon Sep 17 00:00:00 2001 From: shikharsg Date: Mon, 4 Feb 2019 14:31:28 +0000 Subject: [PATCH 78/83] docs --- docs/release.rst | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index fceff051d5..dd6f02573b 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -9,8 +9,9 @@ Release notes Enhancements ~~~~~~~~~~~~ -* New storage backend, backed by Azure Blob Storage, :class:`zarr.storage.ABSStore`. - Chunks are stored as block blobs. +* New storage backend, backed by Azure Blob Storage, + class :class:`zarr.storage.ABSStore`. + All data is stored as Block blobs. * Add "consolidated" metadata as an experimental feature: use :func:`zarr.convenience.consolidate_metadata` to copy all metadata from the various From 3a4f4d9f29e8d6f6cf86a8f07814282a60ba457f Mon Sep 17 00:00:00 2001 From: shikharsg Date: Tue, 12 Feb 2019 18:56:39 +0000 Subject: [PATCH 79/83] small change to docs --- docs/release.rst | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/release.rst b/docs/release.rst index dd6f02573b..7017f21fb6 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -9,8 +9,7 @@ Release notes Enhancements ~~~~~~~~~~~~ -* New storage backend, backed by Azure Blob Storage, - class :class:`zarr.storage.ABSStore`. +* New storage backend, backed by Azure Blob Storage, class :class:`zarr.storage.ABSStore`. All data is stored as Block blobs. * Add "consolidated" metadata as an experimental feature: use From b51fb789c9d233806912f592f786a5253469f609 Mon Sep 17 00:00:00 2001 From: shikhar Date: Thu, 21 Feb 2019 14:24:16 +0000 Subject: [PATCH 80/83] cleaned create blob code --- zarr/storage.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index d8f8433e15..96c1abb0e1 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1953,8 +1953,7 @@ def __getitem__(self, key): def __setitem__(self, key, value): value = ensure_bytes(value) blob_name = '/'.join([self.prefix, key]) - buffer = io.BytesIO(value) - self.client.create_blob_from_stream(self.container, blob_name, buffer) + self.client.create_blob_from_bytes(self.container, blob_name, value) def __delitem__(self, key): from azure.common import AzureMissingResourceHttpError From 4af5ebe7de017f99d42e5b95a50eaa98b353432d Mon Sep 17 00:00:00 2001 From: shikhar Date: Thu, 21 Feb 2019 14:33:40 +0000 Subject: [PATCH 81/83] flake8 fix --- zarr/storage.py | 1 - 1 file changed, 1 deletion(-) diff --git a/zarr/storage.py b/zarr/storage.py index 96c1abb0e1..9c768a4395 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -31,7 +31,6 @@ from threading import Lock, RLock import glob import warnings -import io from zarr.util import (normalize_shape, normalize_chunks, normalize_order, From 13a7dc4d13e5e824cc8eea2921cd08dd827e42a2 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Fri, 1 Mar 2019 00:44:34 +0000 Subject: [PATCH 82/83] Update docs/release.rst Co-Authored-By: shikharsg --- docs/release.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/release.rst b/docs/release.rst index 3b6a04a871..23cc70c267 100644 --- a/docs/release.rst +++ b/docs/release.rst @@ -10,7 +10,7 @@ Enhancements ~~~~~~~~~~~~ * New storage backend, backed by Azure Blob Storage, class :class:`zarr.storage.ABSStore`. - All data is stored as Block blobs. + All data is stored as block blobs. By :user:`Shikhar Goenka ` and :user:`Tim Crone `, :issue:`345`. * Add "consolidated" metadata as an experimental feature: use :func:`zarr.convenience.consolidate_metadata` to copy all metadata from the various From a062e1875b9a658f748184459721e009402e8bc0 Mon Sep 17 00:00:00 2001 From: Alistair Miles Date: Fri, 1 Mar 2019 00:46:49 +0000 Subject: [PATCH 83/83] Apply suggestions from code review Co-Authored-By: shikharsg --- zarr/storage.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/zarr/storage.py b/zarr/storage.py index 9c768a4395..e6fd98705a 100644 --- a/zarr/storage.py +++ b/zarr/storage.py @@ -1895,11 +1895,11 @@ class ABSStore(MutableMapping): The Azure blob storage account access key. blob_service_kwargs : dictionary Extra arguments to be passed into the azure blob client, for e.g. when - using the emulator, pass in blob_service_kwargs={'is_emulated': True} + using the emulator, pass in blob_service_kwargs={'is_emulated': True}. Notes ----- - In order to use this store, you must install the Microsoft Azure Storage SDK for Python + In order to use this store, you must install the Microsoft Azure Storage SDK for Python. """ def __init__(self, container, prefix, account_name=None, account_key=None,