Skip to content

Update ABSStore for compatibility with newer azure.storage.blob. #759

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 26 commits into from
Jun 1, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion docs/contributing.rst
Original file line number Diff line number Diff line change
Expand Up @@ -164,7 +164,11 @@ optional dependencies to be installed), run::
Note that some tests also require storage services to be running
locally. To run the Azure Blob Service storage tests, run an Azure
storage emulator (e.g., azurite) and set the environment variable
``ZARR_TEST_ABS=1``. To run the Mongo DB storage tests, run a Mongo
``ZARR_TEST_ABS=1``. If you're using Docker to run azurite, start the service with::

docker run --rm -p 10000:10000 mcr.microsoft.com/azure-storage/azurite azurite-blob --loose --blobHost 0.0.0.0

To run the Mongo DB storage tests, run a Mongo
server locally and set the environment variable ``ZARR_TEST_MONGO=1``.
To run the Redis storage tests, run a Redis server locally on port
6379 and set the environment variable ``ZARR_TEST_REDIS=1``.
Expand Down
3 changes: 3 additions & 0 deletions docs/release.rst
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,9 @@ Bug fixes

* FSStore: default to normalize_keys=False
By :user:`Josh Moore <joshmoore>`; :issue:`755`.
* ABSStore: compatibility with ``azure.storage.python>=12``
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you for adding this!

By :user:`Tom Augspurger <tomaugspurger>`; :issue:`618`


.. _release_2.8.2:

Expand Down
4 changes: 3 additions & 1 deletion docs/tutorial.rst
Original file line number Diff line number Diff line change
Expand Up @@ -810,7 +810,9 @@ The class is :class:`zarr.storage.ABSStore` (requires
`azure-storage-blob <https://docs.microsoft.com/en-us/azure/storage/blobs/storage-quickstart-blobs-python>`_
to be installed)::

>>> store = zarr.ABSStore(container='test', prefix='zarr-testing', blob_service_kwargs={'is_emulated': True}) # doctest: +SKIP
>>> import azure.storage.blob
>>> container_client = azure.storage.blob.ContainerClient(...) # doctest: +SKIP
>>> store = zarr.ABSStore(client=container_client, prefix='zarr-testing') # doctest: +SKIP
>>> root = zarr.group(store=store, overwrite=True) # doctest: +SKIP
>>> z = root.zeros('foo/bar', shape=(1000, 1000), chunks=(100, 100), dtype='i4') # doctest: +SKIP
>>> z[:] = 42 # doctest: +SKIP
Expand Down
2 changes: 1 addition & 1 deletion requirements_dev_optional.txt
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ ipytree==0.2.1
# optional library requirements for services
# don't let pyup change pinning for azure-storage-blob, need to pin to older
# version to get compatibility with azure storage emulator on appveyor (FIXME)
azure-storage-blob==2.0.1 # pyup: ignore
azure-storage-blob==12.5.0 # pyup: ignore
redis==3.5.3
pymongo==3.11.4
# optional test requirements
Expand Down
151 changes: 87 additions & 64 deletions zarr/storage.py
Original file line number Diff line number Diff line change
Expand Up @@ -2211,50 +2211,84 @@ class ABSStore(MutableMapping):
----------
container : string
The name of the ABS container to use.
.. deprecated::
Use ``client`` instead.
prefix : string
Location of the "directory" to use as the root of the storage hierarchy
within the container.
account_name : string
The Azure blob storage account name.
.. deprecated:: 2.8.3
Use ``client`` instead.
account_key : string
The Azure blob storage account access key.
.. deprecated:: 2.8.3
Use ``client`` instead.
blob_service_kwargs : dictionary
Extra arguments to be passed into the azure blob client, for e.g. when
using the emulator, pass in blob_service_kwargs={'is_emulated': True}.
.. deprecated:: 2.8.3
Use ``client`` instead.
dimension_separator : {'.', '/'}, optional
Separator placed between the dimensions of a chunk.
client : azure.storage.blob.ContainerClient, optional
And ``azure.storage.blob.ContainerClient`` to connect with. See
`here <https://docs.microsoft.com/en-us/python/api/azure-storage-blob/azure.storage.blob.containerclient?view=azure-python>`_ # noqa
for more.

.. versionadded:: 2.8.3

Notes
-----
In order to use this store, you must install the Microsoft Azure Storage SDK for Python.
In order to use this store, you must install the Microsoft Azure Storage SDK for Python,
``azure-storage-blob>=12.5.0``.
"""

def __init__(self, container, prefix='', account_name=None, account_key=None,
blob_service_kwargs=None, dimension_separator=None):
from azure.storage.blob import BlockBlobService
self.container = container
self.prefix = normalize_storage_path(prefix)
self.account_name = account_name
self.account_key = account_key
def __init__(self, container=None, prefix='', account_name=None, account_key=None,
blob_service_kwargs=None, dimension_separator=None,
client=None,
):
self._dimension_separator = dimension_separator
if blob_service_kwargs is not None:
self.blob_service_kwargs = blob_service_kwargs
else: # pragma: no cover
self.blob_service_kwargs = dict()
self.client = BlockBlobService(self.account_name, self.account_key,
**self.blob_service_kwargs)

# needed for pickling
def __getstate__(self):
state = self.__dict__.copy()
del state['client']
return state
self.prefix = normalize_storage_path(prefix)
if client is None:
# deprecated option, try to construct the client for them
msg = (
"Providing 'container', 'account_name', 'account_key', and 'blob_service_kwargs'"
"is deprecated. Provide and instance of 'azure.storage.blob.ContainerClient' "
"'client' instead."
)
warnings.warn(msg, FutureWarning, stacklevel=2)
from azure.storage.blob import ContainerClient
blob_service_kwargs = blob_service_kwargs or {}
client = ContainerClient(
"https://{}.blob.core.windows.net/".format(account_name), container,
credential=account_key, **blob_service_kwargs
)

def __setstate__(self, state):
from azure.storage.blob import BlockBlobService
self.__dict__.update(state)
self.client = BlockBlobService(self.account_name, self.account_key,
**self.blob_service_kwargs)
self.client = client
self._container = container
self._account_name = account_name
self._account_key = account_key

def _warn_deprecated(self, property_):
msg = ("The {} property is deprecated and will be removed in a future "
"version. Get the property from 'ABSStore.client' instead.")
warnings.warn(msg.format(property_), FutureWarning, stacklevel=3)

@property
def container(self):
self._warn_deprecated("container")
return self._container

@property
def account_name(self):
self._warn_deprecated("account_name")
return self._account_name

@property
def account_key(self):
self._warn_deprecated("account_key")
return self._account_key

def _append_path_to_prefix(self, path):
if self.prefix == '':
Expand All @@ -2273,30 +2307,29 @@ def _strip_prefix_from_path(path, prefix):
return path_norm

def __getitem__(self, key):
from azure.common import AzureMissingResourceHttpError
from azure.core.exceptions import ResourceNotFoundError
blob_name = self._append_path_to_prefix(key)
try:
blob = self.client.get_blob_to_bytes(self.container, blob_name)
return blob.content
except AzureMissingResourceHttpError:
return self.client.download_blob(blob_name).readall()
except ResourceNotFoundError:
raise KeyError('Blob %s not found' % blob_name)

def __setitem__(self, key, value):
value = ensure_bytes(value)
blob_name = self._append_path_to_prefix(key)
self.client.create_blob_from_bytes(self.container, blob_name, value)
self.client.upload_blob(blob_name, value, overwrite=True)

def __delitem__(self, key):
from azure.common import AzureMissingResourceHttpError
from azure.core.exceptions import ResourceNotFoundError
try:
self.client.delete_blob(self.container, self._append_path_to_prefix(key))
except AzureMissingResourceHttpError:
self.client.delete_blob(self._append_path_to_prefix(key))
except ResourceNotFoundError:
raise KeyError('Blob %s not found' % key)

def __eq__(self, other):
return (
isinstance(other, ABSStore) and
self.container == other.container and
self.client == other.client and
self.prefix == other.prefix
)

Expand All @@ -2308,63 +2341,53 @@ def __iter__(self):
list_blobs_prefix = self.prefix + '/'
else:
list_blobs_prefix = None
for blob in self.client.list_blobs(self.container, list_blobs_prefix):
for blob in self.client.list_blobs(list_blobs_prefix):
yield self._strip_prefix_from_path(blob.name, self.prefix)

def __len__(self):
return len(self.keys())

def __contains__(self, key):
blob_name = self._append_path_to_prefix(key)
assert len(blob_name) >= 1
if self.client.exists(self.container, blob_name):
return True
else:
return False
return self.client.get_blob_client(blob_name).exists()

def listdir(self, path=None):
from azure.storage.blob import Blob
dir_path = normalize_storage_path(self._append_path_to_prefix(path))
if dir_path:
dir_path += '/'
items = list()
for blob in self.client.list_blobs(self.container, prefix=dir_path, delimiter='/'):
if type(blob) == Blob:
items.append(self._strip_prefix_from_path(blob.name, dir_path))
else:
items.append(self._strip_prefix_from_path(
blob.name[:blob.name.find('/', len(dir_path))], dir_path))
items = [
self._strip_prefix_from_path(blob.name, dir_path)
for blob in self.client.walk_blobs(name_starts_with=dir_path, delimiter='/')
]
return items

def rmdir(self, path=None):
dir_path = normalize_storage_path(self._append_path_to_prefix(path))
if dir_path:
dir_path += '/'
for blob in self.client.list_blobs(self.container, prefix=dir_path):
assert len(blob.name) >= 1
self.client.delete_blob(self.container, blob.name)
for blob in self.client.list_blobs(name_starts_with=dir_path):
self.client.delete_blob(blob)

def getsize(self, path=None):
from azure.storage.blob import Blob
store_path = normalize_storage_path(path)
fs_path = self.prefix
if store_path:
fs_path = self._append_path_to_prefix(store_path)
fs_path = self._append_path_to_prefix(store_path)
if fs_path:
blob_client = self.client.get_blob_client(fs_path)
else:
blob_client = None

if fs_path != "" and self.client.exists(self.container, fs_path):
return self.client.get_blob_properties(
self.container, fs_path
).properties.content_length
if blob_client and blob_client.exists():
return blob_client.get_blob_properties().size
else:
size = 0
if fs_path == '':
fs_path = None
else:
elif not fs_path.endswith('/'):
fs_path += '/'
for blob in self.client.list_blobs(self.container, prefix=fs_path,
delimiter='/'):
if type(blob) == Blob:
size += blob.properties.content_length
for blob in self.client.walk_blobs(name_starts_with=fs_path, delimiter='/'):
blob_client = self.client.get_blob_client(blob)
if blob_client.exists():
size += blob_client.get_blob_properties().size
return size

def clear(self):
Expand Down
16 changes: 9 additions & 7 deletions zarr/tests/test_core.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import atexit
import os
import sys
import pickle
import shutil
import unittest
Expand Down Expand Up @@ -33,7 +34,7 @@
init_group,
)
from zarr.util import buffer_size
from zarr.tests.util import skip_test_env_var, have_fsspec
from zarr.tests.util import abs_container, skip_test_env_var, have_fsspec

# noinspection PyMethodMayBeStatic

Expand Down Expand Up @@ -1627,12 +1628,8 @@ class TestArrayWithABSStore(TestArray):

@staticmethod
def absstore():
asb = pytest.importorskip("azure.storage.blob")
blob_client = asb.BlockBlobService(is_emulated=True)
blob_client.delete_container('test')
blob_client.create_container('test')
store = ABSStore(container='test', account_name='foo', account_key='bar',
blob_service_kwargs={'is_emulated': True})
client = abs_container()
store = ABSStore(client=client)
store.rmdir()
return store

Expand All @@ -1649,6 +1646,11 @@ def create_array(self, read_only=False, **kwargs):
def test_nbytes_stored(self):
return super().test_nbytes_stored()

@pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36")
def test_pickle(self):
# internal attribute on ContainerClient isn't serializable for py36 and earlier
super().test_pickle()


class TestArrayWithNestedDirectoryStore(TestArrayWithDirectoryStore):

Expand Down
16 changes: 9 additions & 7 deletions zarr/tests/test_hierarchy.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import atexit
import os
import sys
import pickle
import shutil
import tempfile
Expand Down Expand Up @@ -27,7 +28,7 @@
array_meta_key, atexit_rmglob, atexit_rmtree,
group_meta_key, init_array, init_group)
from zarr.util import InfoReporter
from zarr.tests.util import skip_test_env_var, have_fsspec
from zarr.tests.util import skip_test_env_var, have_fsspec, abs_container


# noinspection PyStatementEffect
Expand Down Expand Up @@ -951,15 +952,16 @@ class TestGroupWithABSStore(TestGroup):

@staticmethod
def create_store():
asb = pytest.importorskip("azure.storage.blob")
blob_client = asb.BlockBlobService(is_emulated=True)
blob_client.delete_container('test')
blob_client.create_container('test')
store = ABSStore(container='test', account_name='foo', account_key='bar',
blob_service_kwargs={'is_emulated': True})
container_client = abs_container()
store = ABSStore(client=container_client)
store.rmdir()
return store, None

@pytest.mark.skipif(sys.version_info < (3, 7), reason="attr not serializable in py36")
def test_pickle(self):
# internal attribute on ContainerClient isn't serializable for py36 and earlier
super().test_pickle()


class TestGroupWithNestedDirectoryStore(TestGroup):

Expand Down
Loading