|
3 | 3 | from pathlib import Path
|
4 | 4 | from typing import Callable, Dict, List, Optional, Union
|
5 | 5 |
|
| 6 | +import numpy as np |
6 | 7 | import xarray as xr
|
7 | 8 | from openeo_pg_parser_networkx.graph import OpenEOProcessGraph
|
| 9 | +from openeo_pg_parser_networkx.pg_schema import BoundingBox, TemporalInterval |
| 10 | +from openeo_processes_dask.process_implementations.cubes import load_stac |
8 | 11 |
|
9 | 12 | from openeo.internal.graph_building import PGNode, as_flat_graph
|
10 | 13 | from openeo.internal.jupyter import VisualDict, VisualList
|
11 | 14 | from openeo.local.collections import _get_geotiff_metadata, _get_local_collections, _get_netcdf_zarr_metadata
|
12 | 15 | from openeo.local.processing import PROCESS_REGISTRY
|
13 |
| -from openeo.metadata import CollectionMetadata |
| 16 | +from openeo.metadata import Band, BandDimension, CollectionMetadata, SpatialDimension, TemporalDimension |
14 | 17 | from openeo.rest.datacube import DataCube
|
15 | 18 |
|
16 | 19 | _log = logging.getLogger(__name__)
|
@@ -88,6 +91,156 @@ def load_collection(
|
88 | 91 | fetch_metadata=fetch_metadata,
|
89 | 92 | )
|
90 | 93 |
|
| 94 | + def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube: |
| 95 | + """ |
| 96 | + Load a data cube from a (custom) process. |
| 97 | +
|
| 98 | + :param process_id: The process id. |
| 99 | + :param namespace: optional: process namespace |
| 100 | + :param kwargs: The arguments of the custom process |
| 101 | + :return: A :py:class:`DataCube`, without valid metadata, as the client is not aware of this custom process. |
| 102 | + """ |
| 103 | + graph = PGNode(process_id, namespace=namespace, arguments=kwargs) |
| 104 | + return DataCube(graph=graph, connection=self) |
| 105 | + |
| 106 | + def load_stac( |
| 107 | + self, |
| 108 | + url: str, |
| 109 | + spatial_extent: Optional[Dict[str, float]] = None, |
| 110 | + temporal_extent: Optional[List[Union[str, datetime.datetime, datetime.date]]] = None, |
| 111 | + bands: Optional[List[str]] = None, |
| 112 | + properties: Optional[dict] = None, |
| 113 | + ) -> DataCube: |
| 114 | + """ |
| 115 | + Loads data from a static STAC catalog or a STAC API Collection and returns the data as a processable :py:class:`DataCube`. |
| 116 | + A batch job result can be loaded by providing a reference to it. |
| 117 | +
|
| 118 | + If supported by the underlying metadata and file format, the data that is added to the data cube can be |
| 119 | + restricted with the parameters ``spatial_extent``, ``temporal_extent`` and ``bands``. |
| 120 | + If no data is available for the given extents, a ``NoDataAvailable`` error is thrown. |
| 121 | +
|
| 122 | + Remarks: |
| 123 | +
|
| 124 | + * The bands (and all dimensions that specify nominal dimension labels) are expected to be ordered as |
| 125 | + specified in the metadata if the ``bands`` parameter is set to ``null``. |
| 126 | + * If no additional parameter is specified this would imply that the whole data set is expected to be loaded. |
| 127 | + Due to the large size of many data sets, this is not recommended and may be optimized by back-ends to only |
| 128 | + load the data that is actually required after evaluating subsequent processes such as filters. |
| 129 | + This means that the values should be processed only after the data has been limited to the required extent |
| 130 | + and as a consequence also to a manageable size. |
| 131 | +
|
| 132 | +
|
| 133 | + :param url: The URL to a static STAC catalog (STAC Item, STAC Collection, or STAC Catalog) |
| 134 | + or a specific STAC API Collection that allows to filter items and to download assets. |
| 135 | + This includes batch job results, which itself are compliant to STAC. |
| 136 | + For external URLs, authentication details such as API keys or tokens may need to be included in the URL. |
| 137 | +
|
| 138 | + Batch job results can be specified in two ways: |
| 139 | +
|
| 140 | + - For Batch job results at the same back-end, a URL pointing to the corresponding batch job results |
| 141 | + endpoint should be provided. The URL usually ends with ``/jobs/{id}/results`` and ``{id}`` |
| 142 | + is the corresponding batch job ID. |
| 143 | + - For external results, a signed URL must be provided. Not all back-ends support signed URLs, |
| 144 | + which are provided as a link with the link relation `canonical` in the batch job result metadata. |
| 145 | + :param spatial_extent: |
| 146 | + Limits the data to load to the specified bounding box or polygons. |
| 147 | +
|
| 148 | + For raster data, the process loads the pixel into the data cube if the point at the pixel center intersects |
| 149 | + with the bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). |
| 150 | +
|
| 151 | + For vector data, the process loads the geometry into the data cube if the geometry is fully within the |
| 152 | + bounding box or any of the polygons (as defined in the Simple Features standard by the OGC). |
| 153 | + Empty geometries may only be in the data cube if no spatial extent has been provided. |
| 154 | +
|
| 155 | + The GeoJSON can be one of the following feature types: |
| 156 | +
|
| 157 | + * A ``Polygon`` or ``MultiPolygon`` geometry, |
| 158 | + * a ``Feature`` with a ``Polygon`` or ``MultiPolygon`` geometry, or |
| 159 | + * a ``FeatureCollection`` containing at least one ``Feature`` with ``Polygon`` or ``MultiPolygon`` geometries. |
| 160 | +
|
| 161 | + Set this parameter to ``None`` to set no limit for the spatial extent. |
| 162 | + Be careful with this when loading large datasets. It is recommended to use this parameter instead of |
| 163 | + using ``filter_bbox()`` or ``filter_spatial()`` directly after loading unbounded data. |
| 164 | +
|
| 165 | + :param temporal_extent: |
| 166 | + Limits the data to load to the specified left-closed temporal interval. |
| 167 | + Applies to all temporal dimensions. |
| 168 | + The interval has to be specified as an array with exactly two elements: |
| 169 | +
|
| 170 | + 1. The first element is the start of the temporal interval. |
| 171 | + The specified instance in time is **included** in the interval. |
| 172 | + 2. The second element is the end of the temporal interval. |
| 173 | + The specified instance in time is **excluded** from the interval. |
| 174 | +
|
| 175 | + The second element must always be greater/later than the first element. |
| 176 | + Otherwise, a `TemporalExtentEmpty` exception is thrown. |
| 177 | +
|
| 178 | + Also supports open intervals by setting one of the boundaries to ``None``, but never both. |
| 179 | +
|
| 180 | + Set this parameter to ``None`` to set no limit for the temporal extent. |
| 181 | + Be careful with this when loading large datasets. It is recommended to use this parameter instead of |
| 182 | + using ``filter_temporal()`` directly after loading unbounded data. |
| 183 | +
|
| 184 | + :param bands: |
| 185 | + Only adds the specified bands into the data cube so that bands that don't match the list |
| 186 | + of band names are not available. Applies to all dimensions of type `bands`. |
| 187 | +
|
| 188 | + Either the unique band name (metadata field ``name`` in bands) or one of the common band names |
| 189 | + (metadata field ``common_name`` in bands) can be specified. |
| 190 | + If the unique band name and the common name conflict, the unique band name has a higher priority. |
| 191 | +
|
| 192 | + The order of the specified array defines the order of the bands in the data cube. |
| 193 | + If multiple bands match a common name, all matched bands are included in the original order. |
| 194 | +
|
| 195 | + It is recommended to use this parameter instead of using ``filter_bands()`` directly after loading unbounded data. |
| 196 | +
|
| 197 | + :param properties: |
| 198 | + Limits the data by metadata properties to include only data in the data cube which |
| 199 | + all given conditions return ``True`` for (AND operation). |
| 200 | +
|
| 201 | + Specify key-value-pairs with the key being the name of the metadata property, |
| 202 | + which can be retrieved with the openEO Data Discovery for Collections. |
| 203 | + The value must be a condition (user-defined process) to be evaluated against a STAC API. |
| 204 | + This parameter is not supported for static STAC. |
| 205 | +
|
| 206 | + .. versionadded:: 0.21.0 |
| 207 | + """ |
| 208 | + arguments = {"url": url} |
| 209 | + # TODO: more normalization/validation of extent/band parameters and `properties` |
| 210 | + if spatial_extent: |
| 211 | + arguments["spatial_extent"] = spatial_extent |
| 212 | + if temporal_extent: |
| 213 | + arguments["temporal_extent"] = DataCube._get_temporal_extent(temporal_extent) |
| 214 | + if bands: |
| 215 | + arguments["bands"] = bands |
| 216 | + if properties: |
| 217 | + arguments["properties"] = properties |
| 218 | + cube = self.datacube_from_process(process_id="load_stac", **arguments) |
| 219 | + # detect actual metadata from URL |
| 220 | + # run load_stac to get the datacube metadata |
| 221 | + arguments["spatial_extent"] = BoundingBox.parse_obj(spatial_extent) |
| 222 | + arguments["temporal_extent"] = TemporalInterval.parse_obj(temporal_extent) |
| 223 | + xarray_cube = load_stac(**arguments) |
| 224 | + attrs = xarray_cube.attrs |
| 225 | + for at in attrs: |
| 226 | + # allowed types: str, Number, ndarray, number, list, tuple |
| 227 | + if not isinstance(attrs[at], (int, float, str, np.ndarray, list, tuple)): |
| 228 | + attrs[at] = str(attrs[at]) |
| 229 | + metadata = CollectionMetadata( |
| 230 | + attrs, |
| 231 | + dimensions=[ |
| 232 | + SpatialDimension(name=xarray_cube.openeo.x_dim, extent=[]), |
| 233 | + SpatialDimension(name=xarray_cube.openeo.y_dim, extent=[]), |
| 234 | + TemporalDimension(name=xarray_cube.openeo.temporal_dims[0], extent=[]), |
| 235 | + BandDimension( |
| 236 | + name=xarray_cube.openeo.band_dims[0], |
| 237 | + bands=[Band(x) for x in xarray_cube[xarray_cube.openeo.band_dims[0]].values], |
| 238 | + ), |
| 239 | + ], |
| 240 | + ) |
| 241 | + cube.metadata = metadata |
| 242 | + return cube |
| 243 | + |
91 | 244 | def execute(self, process_graph: Union[dict, str, Path]) -> xr.DataArray:
|
92 | 245 | """
|
93 | 246 | Execute locally the process graph and return the result as an xarray.DataArray.
|
|
0 commit comments