From 0b56c4764c3c6223e26ee6249b24b98358aeb664 Mon Sep 17 00:00:00 2001 From: John Kirkham Date: Mon, 4 Dec 2017 13:11:21 -0500 Subject: [PATCH] Add a `digest` method for Zarr Arrays This implements an equivalent method to `hashlib`'s `digest` method for Zarr Arrays. Makes sure to document it as well. --- docs/api/core.rst | 1 + zarr/core.py | 45 +++++++++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/docs/api/core.rst b/docs/api/core.rst index cf8c4b0f8b..5789fb996b 100644 --- a/docs/api/core.rst +++ b/docs/api/core.rst @@ -14,6 +14,7 @@ The Array class (``zarr.core``) .. automethod:: set_coordinate_selection .. automethod:: get_orthogonal_selection .. automethod:: set_orthogonal_selection + .. automethod:: digest .. automethod:: hexdigest .. automethod:: resize .. automethod:: append diff --git a/zarr/core.py b/zarr/core.py index 4d33049343..656d37dad0 100644 --- a/zarr/core.py +++ b/zarr/core.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- from __future__ import absolute_import, print_function, division +import binascii import operator import itertools import hashlib @@ -88,6 +89,7 @@ class Array(object): set_mask_selection get_coordinate_selection set_coordinate_selection + digest hexdigest resize append @@ -1837,22 +1839,23 @@ def bytestr(n): return items - def hexdigest(self, hashname="sha1"): + def digest(self, hashname="sha1"): """ Compute a checksum for the data. Default uses sha1 for speed. Examples -------- + >>> import binascii >>> import zarr >>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000)) - >>> z.hexdigest() - '041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' + >>> binascii.hexlify(z.digest()) + b'041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) - >>> z.hexdigest() - '7162d416d26a68063b66ed1f30e0a866e4abed60' + >>> binascii.hexlify(z.digest()) + b'7162d416d26a68063b66ed1f30e0a866e4abed60' >>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) - >>> z.hexdigest() - 'cb387af37410ae5a3222e893cf3373e4e4f22816' + >>> binascii.hexlify(z.digest()) + b'cb387af37410ae5a3222e893cf3373e4e4f22816' """ h = hashlib.new(hashname) @@ -1864,7 +1867,33 @@ def hexdigest(self, hashname="sha1"): h.update(self.store.get(self.attrs.key, b"")) - checksum = h.hexdigest() + checksum = h.digest() + + return checksum + + def hexdigest(self, hashname="sha1"): + """ + Compute a checksum for the data. Default uses sha1 for speed. + + Examples + -------- + >>> import zarr + >>> z = zarr.empty(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z.hexdigest() + '041f90bc7a571452af4f850a8ca2c6cddfa8a1ac' + >>> z = zarr.zeros(shape=(10000, 10000), chunks=(1000, 1000)) + >>> z.hexdigest() + '7162d416d26a68063b66ed1f30e0a866e4abed60' + >>> z = zarr.zeros(shape=(10000, 10000), dtype="u1", chunks=(1000, 1000)) + >>> z.hexdigest() + 'cb387af37410ae5a3222e893cf3373e4e4f22816' + """ + + checksum = binascii.hexlify(self.digest(hashname=hashname)) + + # This is a bytes object on Python 3 and we want a str. + if type(checksum) is not str: + checksum = checksum.decode('utf8') return checksum