SciTools
diff --git a/‎docs/src/userguide/real_and_lazy_data.rst
Lines changed: 40 additions & 2 deletions b/‎docs/src/userguide/real_and_lazy_data.rst
Lines changed: 40 additions & 2 deletions
diff --git a/‎docs/src/whatsnew/latest.rst
Lines changed: 29 additions & 2 deletions b/‎docs/src/whatsnew/latest.rst
Lines changed: 29 additions & 2 deletions
diff --git a/‎lib/iris/fileformats/_nc_load_rules/helpers.py
Lines changed: 206 additions & 4 deletions b/‎lib/iris/fileformats/_nc_load_rules/helpers.py
Lines changed: 206 additions & 4 deletions
diff --git a/‎lib/iris/fileformats/netcdf/__init__.py
Lines changed: 5 additions & 2 deletions b/‎lib/iris/fileformats/netcdf/__init__.py
Lines changed: 5 additions & 2 deletions
@@ -6,6 +6,7 @@
 
     import dask.array as da
     import iris
+    from iris.cube import CubeList
     import numpy as np
 
 
@@ -227,10 +228,47 @@ coordinates' lazy points and bounds:
 Dask Processing Options
 -----------------------
 
-Iris uses dask to provide lazy data arrays for both Iris cubes and coordinates,
-and for computing deferred operations on lazy arrays.
+Iris uses `Dask <https://docs.dask.org/en/stable/>`_ to provide lazy data arrays for
+both Iris cubes and coordinates, and for computing deferred operations on lazy arrays.
 
 Dask provides processing options to control how deferred operations on lazy arrays
 are computed. This is provided via the ``dask.set_options`` interface. See the
 `dask documentation <http://dask.pydata.org/en/latest/scheduler-overview.html>`_
 for more information on setting dask processing options.
+
+
+.. _delayed_netcdf_save:
+
+Delayed NetCDF Saving
+---------------------
+
+When saving data to NetCDF files, it is possible to *delay* writing lazy content to the
+output file, to be performed by  `Dask <https://docs.dask.org/en/stable/>`_  later,
+thus enabling parallel save operations.
+
+This works in the following way :
+    1. an :func:`iris.save` call is made, with a NetCDF file output and the additional
+       keyword ``compute=False``.
+       This is currently *only* available when saving to NetCDF, so it is documented in
+       the Iris NetCDF file format API.  See: :func:`iris.fileformats.netcdf.save`.
+
+    2. the call creates the output file, but does not fill in variables' data, where
+       the data is a lazy array in the Iris object.  Instead, these variables are
+       initially created "empty".
+
+    3. the :meth:`~iris.save` call returns a ``result`` which is a
+       :class:`~dask.delayed.Delayed` object.
+
+    4. the save can be completed later by calling ``result.compute()``, or by passing it
+       to the :func:`dask.compute` call.
+
+The benefit of this, is that costly data transfer operations can be performed in
+parallel with writes to other data files.  Also, where array contents are calculated
+from shared lazy input data, these can be computed in parallel efficiently by Dask
+(i.e. without re-fetching), similar to what :meth:`iris.cube.CubeList.realise_data`
+can do.
+
+.. note::
+    This feature does **not** enable parallel writes to the *same* NetCDF output file.
+    That can only be done on certain operating systems, with a specially configured
+    build of the NetCDF C library, and is not supported by Iris at present.
@@ -30,7 +30,33 @@ This document explains the changes made to Iris for this release
 ✨ Features
 ===========
 
-#. N/A
+#. `@bsherratt`_ added support for plugins - see the corresponding
+   :ref:`documentation page<community_plugins>` for further information.
+   (:pull:`5144`)
+
+#. `@rcomer`_ enabled lazy evaluation of :obj:`~iris.analysis.RMS` calcuations
+   with weights. (:pull:`5017`)
+
+#. `@schlunma`_ allowed the usage of cubes, coordinates, cell measures, or
+   ancillary variables as weights for cube aggregations
+   (:meth:`iris.cube.Cube.collapsed`, :meth:`iris.cube.Cube.aggregated_by`, and
+   :meth:`iris.cube.Cube.rolling_window`). This automatically adapts cube units
+   if necessary. (:pull:`5084`)
+
+#. `@lbdreyer`_ and `@trexfeathers`_ (reviewer)  added :func:`iris.plot.hist` 
+   and :func:`iris.quickplot.hist`. (:pull:`5189`)
+
+#. `@tinyendian`_ edited :func:`~iris.analysis.cartography.rotate_winds` to
+   enable lazy computation of rotated wind vector components (:issue:`4934`,
+   :pull:`4972`)
+
+#. `@ESadek-MO`_ updated to the latest CF Standard Names Table v80
+   (07 February 2023). (:pull:`5244`)
+
+#. `@pp-mo`_ and  `@lbdreyer`_ supported delayed saving of lazy data, when writing to
+   the netCDF file format.  See : :ref:`delayed netCDF saves <delayed_netcdf_save>`.
+   Also with significant input from `@fnattino`_.
+   (:pull:`5191`)
 
 
 🐛 Bugs Fixed
@@ -97,7 +123,8 @@ This document explains the changes made to Iris for this release
     Whatsnew author names (@github name) in alphabetical order. Note that,
     core dev names are automatically included by the common_links.inc:
 
-
+.. _@fnattino: https://github.com/fnattino
+.. _@tinyendian: https://github.com/tinyendian
 
 
 .. comment
 
@@ -13,6 +13,8 @@
 build routines, and which it does not use.
 
 """
+import re
+from typing import List
 import warnings
 
 import cf_units
@@ -28,10 +30,6 @@
 import iris.exceptions
 import iris.fileformats.cf as cf
 import iris.fileformats.netcdf
-from iris.fileformats.netcdf import (
-    UnknownCellMethodWarning,
-    parse_cell_methods,
-)
 from iris.fileformats.netcdf.loader import _get_cf_var_data
 import iris.std_names
 import iris.util
@@ -184,6 +182,210 @@
 CF_VALUE_STD_NAME_PROJ_Y = "projection_y_coordinate"
 
 
+################################################################################
+# Handling of cell-methods.
+
+_CM_COMMENT = "comment"
+_CM_EXTRA = "extra"
+_CM_INTERVAL = "interval"
+_CM_METHOD = "method"
+_CM_NAME = "name"
+_CM_PARSE_NAME = re.compile(r"([\w_]+\s*?:\s+)+")
+_CM_PARSE = re.compile(
+    r"""
+                           (?P<name>([\w_]+\s*?:\s+)+)
+                           (?P<method>[\w_\s]+(?![\w_]*\s*?:))\s*
+                           (?:
+                               \(\s*
+                               (?P<extra>.+)
+                               \)\s*
+                           )?
+                       """,
+    re.VERBOSE,
+)
+
+# Cell methods.
+_CM_KNOWN_METHODS = [
+    "point",
+    "sum",
+    "mean",
+    "maximum",
+    "minimum",
+    "mid_range",
+    "standard_deviation",
+    "variance",
+    "mode",
+    "median",
+]
+
+
+def _split_cell_methods(nc_cell_methods: str) -> List[re.Match]:
+    """
+    Split a CF cell_methods attribute string into a list of zero or more cell
+    methods, each of which is then parsed with a regex to return a list of match
+    objects.
+
+    Args:
+
+    * nc_cell_methods: The value of the cell methods attribute to be split.
+
+    Returns:
+
+    * nc_cell_methods_matches: A list of the re.Match objects associated with
+      each parsed cell method
+
+    Splitting is done based on words followed by colons outside of any brackets.
+    Validation of anything other than being laid out in the expected format is
+    left to the calling function.
+    """
+
+    # Find name candidates
+    name_start_inds = []
+    for m in _CM_PARSE_NAME.finditer(nc_cell_methods):
+        name_start_inds.append(m.start())
+
+    # Remove those that fall inside brackets
+    bracket_depth = 0
+    for ind, cha in enumerate(nc_cell_methods):
+        if cha == "(":
+            bracket_depth += 1
+        elif cha == ")":
+            bracket_depth -= 1
+            if bracket_depth < 0:
+                msg = (
+                    "Cell methods may be incorrectly parsed due to mismatched "
+                    "brackets"
+                )
+                warnings.warn(msg, UserWarning, stacklevel=2)
+        if bracket_depth > 0 and ind in name_start_inds:
+            name_start_inds.remove(ind)
+
+    # List tuples of indices of starts and ends of the cell methods in the string
+    method_indices = []
+    for ii in range(len(name_start_inds) - 1):
+        method_indices.append((name_start_inds[ii], name_start_inds[ii + 1]))
+    method_indices.append((name_start_inds[-1], len(nc_cell_methods)))
+
+    # Index the string and match against each substring
+    nc_cell_methods_matches = []
+    for start_ind, end_ind in method_indices:
+        nc_cell_method_str = nc_cell_methods[start_ind:end_ind]
+        nc_cell_method_match = _CM_PARSE.match(nc_cell_method_str.strip())
+        if not nc_cell_method_match:
+            msg = (
+                f"Failed to fully parse cell method string: {nc_cell_methods}"
+            )
+            warnings.warn(msg, UserWarning, stacklevel=2)
+            continue
+        nc_cell_methods_matches.append(nc_cell_method_match)
+
+    return nc_cell_methods_matches
+
+
+class UnknownCellMethodWarning(Warning):
+    pass
+
+
+def parse_cell_methods(nc_cell_methods):
+    """
+    Parse a CF cell_methods attribute string into a tuple of zero or
+    more CellMethod instances.
+
+    Args:
+
+    * nc_cell_methods (str):
+        The value of the cell methods attribute to be parsed.
+
+    Returns:
+
+    * cell_methods
+        An iterable of :class:`iris.coords.CellMethod`.
+
+    Multiple coordinates, intervals and comments are supported.
+    If a method has a non-standard name a warning will be issued, but the
+    results are not affected.
+
+    """
+
+    cell_methods = []
+    if nc_cell_methods is not None:
+        for m in _split_cell_methods(nc_cell_methods):
+            d = m.groupdict()
+            method = d[_CM_METHOD]
+            method = method.strip()
+            # Check validity of method, allowing for multi-part methods
+            # e.g. mean over years.
+            method_words = method.split()
+            if method_words[0].lower() not in _CM_KNOWN_METHODS:
+                msg = "NetCDF variable contains unknown cell method {!r}"
+                warnings.warn(
+                    msg.format("{}".format(method_words[0])),
+                    UnknownCellMethodWarning,
+                )
+            d[_CM_METHOD] = method
+            name = d[_CM_NAME]
+            name = name.replace(" ", "")
+            name = name.rstrip(":")
+            d[_CM_NAME] = tuple([n for n in name.split(":")])
+            interval = []
+            comment = []
+            if d[_CM_EXTRA] is not None:
+                #
+                # tokenise the key words and field colon marker
+                #
+                d[_CM_EXTRA] = d[_CM_EXTRA].replace(
+                    "comment:", "<<comment>><<:>>"
+                )
+                d[_CM_EXTRA] = d[_CM_EXTRA].replace(
+                    "interval:", "<<interval>><<:>>"
+                )
+                d[_CM_EXTRA] = d[_CM_EXTRA].split("<<:>>")
+                if len(d[_CM_EXTRA]) == 1:
+                    comment.extend(d[_CM_EXTRA])
+                else:
+                    next_field_type = comment
+                    for field in d[_CM_EXTRA]:
+                        field_type = next_field_type
+                        index = field.rfind("<<interval>>")
+                        if index == 0:
+                            next_field_type = interval
+                            continue
+                        elif index > 0:
+                            next_field_type = interval
+                        else:
+                            index = field.rfind("<<comment>>")
+                            if index == 0:
+                                next_field_type = comment
+                                continue
+                            elif index > 0:
+                                next_field_type = comment
+                        if index != -1:
+                            field = field[:index]
+                        field_type.append(field.strip())
+            #
+            # cater for a shared interval over multiple axes
+            #
+            if len(interval):
+                if len(d[_CM_NAME]) != len(interval) and len(interval) == 1:
+                    interval = interval * len(d[_CM_NAME])
+            #
+            # cater for a shared comment over multiple axes
+            #
+            if len(comment):
+                if len(d[_CM_NAME]) != len(comment) and len(comment) == 1:
+                    comment = comment * len(d[_CM_NAME])
+            d[_CM_INTERVAL] = tuple(interval)
+            d[_CM_COMMENT] = tuple(comment)
+            cell_method = iris.coords.CellMethod(
+                d[_CM_METHOD],
+                coords=d[_CM_NAME],
+                intervals=d[_CM_INTERVAL],
+                comments=d[_CM_COMMENT],
+            )
+            cell_methods.append(cell_method)
+    return tuple(cell_methods)
+
+
 ################################################################################
 def build_cube_metadata(engine):
     """Add the standard meta data to the cube."""
 
@@ -18,15 +18,18 @@
 # Note: *must* be done before importing from submodules, as they also use this !
 logger = iris.config.get_logger(__name__)
 
+# Note: these probably shouldn't be public, but for now they are.
+from .._nc_load_rules.helpers import (
+    UnknownCellMethodWarning,
+    parse_cell_methods,
+)
 from .loader import DEBUG, NetCDFDataProxy, load_cubes
 from .saver import (
     CF_CONVENTIONS_VERSION,
     MESH_ELEMENTS,
     SPATIO_TEMPORAL_AXES,
     CFNameCoordMap,
     Saver,
-    UnknownCellMethodWarning,
-    parse_cell_methods,
     save,
 )