diff --git a/docs/conf.py b/docs/conf.py index dfd1ae07bb..d336740da2 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -104,6 +104,7 @@ def skip_submodules( "spec/v3": "https://zarr-specs.readthedocs.io/en/latest/v3/core/v3.0.html", "license": "https://github.com/zarr-developers/zarr-python/blob/main/LICENSE.txt", "tutorial": "user-guide", + "getting-started": "quickstart", } # The language for content autogenerated by Sphinx. Refer to documentation diff --git a/docs/getting_started.rst b/docs/getting_started.rst deleted file mode 100644 index 5950e2ae44..0000000000 --- a/docs/getting_started.rst +++ /dev/null @@ -1,28 +0,0 @@ -Getting Started -=============== - -Highlights ----------- - -* Create N-dimensional arrays with any NumPy dtype. -* Chunk arrays along any dimension. -* Compress and/or filter chunks using any NumCodecs_ codec. -* Store arrays in memory, on disk, inside a Zip file, on S3, ... -* Read an array concurrently from multiple threads or processes. -* Write to an array concurrently from multiple threads or processes. -* Organize arrays into hierarchies via groups. - -Contributing ------------- - -Feedback and bug reports are very welcome, please get in touch via -the `GitHub issue tracker `_. See -:doc:`contributing` for further information about contributing to Zarr. - -.. toctree:: - :caption: Getting Started - :hidden: - - installation - -.. _NumCodecs: https://numcodecs.readthedocs.io/ diff --git a/docs/index.rst b/docs/index.rst index 5fe5b2a848..5bbd04ec60 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -8,7 +8,8 @@ Zarr-Python :maxdepth: 1 :hidden: - getting_started + quickstart + installation user-guide/index api/index release @@ -34,20 +35,20 @@ Zarr is a file storage format for chunked, compressed, N-dimensional arrays base .. grid-item-card:: :img-top: _static/index_getting_started.svg - Getting Started - ^^^^^^^^^^^^^^^ + Quick Start + ^^^^^^^^^^^ - New to Zarr? Check out the getting started guide. It contains an + New to Zarr? Check out the quick start guide. It contains a brief introduction to Zarr's main concepts and links to additional tutorials. +++ - .. button-ref:: getting_started + .. button-ref:: quickstart :expand: :color: dark :click-parent: - To the getting started guide + To the Quick Start .. grid-item-card:: :img-top: _static/index_user_guide.svg diff --git a/docs/quickstart.rst b/docs/quickstart.rst new file mode 100644 index 0000000000..2d0e8ecef8 --- /dev/null +++ b/docs/quickstart.rst @@ -0,0 +1,186 @@ +.. only:: doctest + + >>> import shutil + >>> shutil.rmtree('data', ignore_errors=True) + >>> + >>> import numpy as np + >>> np.random.seed(0) + +Quickstart +========== + +Welcome to the Zarr-Python Quickstart guide! This page will help you get up and running with +the Zarr library in Python to efficiently manage and analyze multi-dimensional arrays. + +Zarr is a powerful library for storage of n-dimensional arrays, supporting chunking, +compression, and various backends, making it a versatile choice for scientific and +large-scale data. + +Installation +------------ + +Zarr requires Python 3.11 or higher. You can install it via `pip`: + +.. code-block:: bash + + pip install zarr + +or `conda`: + +.. code-block:: bash + + conda install --channel conda-forge zarr + +Creating an Array +----------------- + +To get started, you can create a simple Zarr array:: + + >>> import zarr + >>> import numpy as np + >>> + >>> # Create a 2D Zarr array + >>> z = zarr.create_array( + ... store="data/example-1.zarr", + ... shape=(100, 100), + ... chunks=(10, 10), + ... dtype="f4" + ... ) + >>> + >>> # Assign data to the array + >>> z[:, :] = np.random.random((100, 100)) + >>> z.info + Type : Array + Zarr format : 3 + Data type : DataType.float32 + Shape : (100, 100) + Chunk shape : (10, 10) + Order : C + Read-only : False + Store type : LocalStore + Codecs : [{'endian': }, {'level': 0, 'checksum': False}] + No. bytes : 40000 (39.1K) + +Here, we created a 2D array of shape ``(100, 100)``, chunked into blocks of +``(10, 10)``, and filled it with random floating-point data. This array was +written to a ``LocalStore`` in the ``data/example-1.zarr`` directory. + +Compression and Filters +~~~~~~~~~~~~~~~~~~~~~~~ + +Zarr supports data compression and filters. For example, to use Blosc compression:: + + >>> z = zarr.create_array( + ... "data/example-3.zarr", + ... mode="w", shape=(100, 100), + ... chunks=(10, 10), dtype="f4", + ... compressor=zarr.codecs.BloscCodec(cname="zstd", clevel=3, shuffle=zarr.codecs.BloscShuffle.SHUFFLE) + ... ) + >>> z[:, :] = np.random.random((100, 100)) + >>> + >>> z.info + Type : Array + Zarr format : 3 + Data type : DataType.float32 + Shape : (100, 100) + Chunk shape : (10, 10) + Order : C + Read-only : False + Store type : LocalStore + Codecs : [{'endian': }, {'level': 0, 'checksum': False}] + No. bytes : 40000 (39.1K) + +This compresses the data using the Zstandard codec with shuffle enabled for better compression. + +Hierarchical Groups +------------------- + +Zarr allows you to create hierarchical groups, similar to directories:: + + >>> # Create nested groups and add arrays + >>> root = zarr.group("data/example-2.zarr") + >>> foo = root.create_group(name="foo") + >>> bar = root.create_array( + ... name="bar", shape=(100, 10), chunks=(10, 10) + ... ) + >>> spam = foo.create_array(name="spam", shape=(10,), dtype="i4") + >>> + >>> # Assign values + >>> bar[:, :] = np.random.random((100, 10)) + >>> spam[:] = np.arange(10) + >>> + >>> # print the hierarchy + >>> root.tree() + / + └── foo + └── spam (10,) int32 + + +This creates a group with two datasets: ``foo`` and ``bar``. + +Persistent Storage +------------------ + +Zarr supports persistent storage to disk or cloud-compatible backends. While examples above +utilized a :class:`zarr.storage.LocalStore`, a number of other storage options are available. + +Zarr integrates seamlessly with cloud object storage such as Amazon S3 and Google Cloud Storage +using external libraries like `s3fs `_ or +`gcsfs `_:: + + >>> import s3fs # doctest: +SKIP + >>> + >>> z = zarr.create_array("s3://example-bucket/foo", mode="w", shape=(100, 100), chunks=(10, 10)) # doctest: +SKIP + >>> z[:, :] = np.random.random((100, 100)) # doctest: +SKIP + +A single-file store can also be created using the the :class:`zarr.storage.ZipStore`:: + + >>> # Store the array in a ZIP file + >>> store = zarr.storage.ZipStore("data/example-3.zip", mode='w') + >>> + >>> z = zarr.create_array( + ... store=store, + ... mode="w", + ... shape=(100, 100), + ... chunks=(10, 10), + ... dtype="f4" + ... ) + >>> + >>> # write to the array + >>> z[:, :] = np.random.random((100, 100)) + >>> + >>> # the ZipStore must be explicitly closed + >>> store.close() + +To open an existing array from a ZIP file:: + + >>> # Open the ZipStore in read-only mode + >>> store = zarr.storage.ZipStore("data/example-3.zip", read_only=True) + >>> + >>> z = zarr.open_array(store, mode='r') + >>> + >>> # read the data as a NumPy Array + >>> z[:] + array([[0.66734236, 0.15667458, 0.98720884, ..., 0.36229587, 0.67443246, + 0.34315267], + [0.65787303, 0.9544212 , 0.4830079 , ..., 0.33097172, 0.60423803, + 0.45621237], + [0.27632037, 0.9947008 , 0.42434934, ..., 0.94860053, 0.6226942 , + 0.6386924 ], + ..., + [0.12854576, 0.934397 , 0.19524333, ..., 0.11838563, 0.4967675 , + 0.43074256], + [0.82029045, 0.4671437 , 0.8090906 , ..., 0.7814118 , 0.42650765, + 0.95929915], + [0.4335856 , 0.7565437 , 0.7828931 , ..., 0.48119593, 0.66220033, + 0.6652362 ]], shape=(100, 100), dtype=float32) + +Read more about Zarr's storage options in the :ref:`User Guide `. + +Next Steps +---------- + +Now that you're familiar with the basics, explore the following resources: + +- `User Guide `_ +- `API Reference `_