Skip to content

Commit febdeca

Browse files
committed
Issue #457 basic metadata handling in VectoCube
1 parent 3c50154 commit febdeca

File tree

5 files changed

+62
-22
lines changed

5 files changed

+62
-22
lines changed

openeo/metadata.py

Lines changed: 14 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
import logging
12
import warnings
23
from collections import namedtuple
34
from typing import List, Union, Tuple, Callable, Any
@@ -6,6 +7,9 @@
67
from openeo.internal.jupyter import render_component
78

89

10+
_log = logging.getLogger(__name__)
11+
12+
913
class MetadataException(Exception):
1014
pass
1115

@@ -191,6 +195,10 @@ class CollectionMetadata:
191195
192196
"""
193197

198+
# TODO: "CollectionMetadata" is also used as "cube metadata" where the link to original collection
199+
# might be lost (if any). Better separation between rich EO raster collection metadata and
200+
# essential cube metadata? E.g.: also thing of vector cubes.
201+
194202
def __init__(self, metadata: dict, dimensions: List[Dimension] = None):
195203
# Original collection metadata (actual cube metadata might be altered through processes)
196204
self._orig_metadata = metadata
@@ -317,11 +325,15 @@ def extent(self) -> dict:
317325
def dimension_names(self) -> List[str]:
318326
return list(d.name for d in self._dimensions)
319327

320-
def assert_valid_dimension(self, dimension: str) -> str:
328+
def assert_valid_dimension(self, dimension: str, just_warn: bool = False) -> str:
321329
"""Make sure given dimension name is valid."""
322330
names = self.dimension_names()
323331
if dimension not in names:
324-
raise ValueError("Invalid dimension {d!r}. Should be one of {n}".format(d=dimension, n=names))
332+
msg = f"Invalid dimension {dimension!r}. Should be one of {names}"
333+
if just_warn:
334+
_log.warning(msg)
335+
else:
336+
raise ValueError(msg)
325337
return dimension
326338

327339
def has_band_dimension(self) -> bool:

openeo/rest/connection.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1050,10 +1050,12 @@ def vectorcube_from_paths(
10501050
10511051
.. versionadded:: 0.14.0
10521052
"""
1053+
# TODO #457 deprecate this in favor of `load_url` and standard support for `load_uploaded_files`
10531054
graph = PGNode(
10541055
"load_uploaded_files",
10551056
arguments=dict(paths=paths, format=format, options=options),
10561057
)
1058+
# TODO: load_uploaded_files might also return a raster data cube. Determine this based on format?
10571059
return VectorCube(graph=graph, connection=self)
10581060

10591061
def datacube_from_process(self, process_id: str, namespace: Optional[str] = None, **kwargs) -> DataCube:
@@ -1336,7 +1338,6 @@ def load_geojson(
13361338
13371339
.. versionadded:: 0.22.0
13381340
"""
1339-
13401341
return VectorCube.load_geojson(connection=self, data=data, properties=properties)
13411342

13421343
@openeo_process

openeo/rest/datacube.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -907,7 +907,7 @@ def aggregate_spatial(
907907
),
908908
),
909909
connection=self._connection,
910-
# TODO: metadata?
910+
# TODO: metadata? And correct dimension of created vector cube? #457
911911
)
912912

913913
@openeo_process
@@ -1723,6 +1723,7 @@ def raster_to_vector(self) -> VectorCube:
17231723
:return: a :py:class:`~openeo.rest.vectorcube.VectorCube`
17241724
"""
17251725
pg_node = PGNode(process_id="raster_to_vector", arguments={"data": self})
1726+
# TODO: properly update metadata (e.g. "geometry" dimension) related to #457
17261727
return VectorCube(pg_node, connection=self._connection, metadata=self.metadata)
17271728

17281729
####VIEW methods #######

openeo/rest/vectorcube.py

Lines changed: 29 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
from openeo.internal.documentation import openeo_process
1111
from openeo.internal.graph_building import PGNode
1212
from openeo.internal.warnings import legacy_alias
13-
from openeo.metadata import CollectionMetadata
13+
from openeo.metadata import CollectionMetadata, Dimension
1414
from openeo.rest._datacube import THIS, UDF, _ProcessGraphAbstraction, build_child_callback
1515
from openeo.rest.job import BatchJob
1616
from openeo.rest.mlmodel import MlModel
@@ -32,16 +32,26 @@ class VectorCube(_ProcessGraphAbstraction):
3232

3333
def __init__(self, graph: PGNode, connection: 'Connection', metadata: CollectionMetadata = None):
3434
super().__init__(pgnode=graph, connection=connection)
35-
# TODO: does VectorCube need CollectionMetadata?
36-
self.metadata = metadata
35+
self.metadata = metadata or self._build_metadata()
36+
37+
@classmethod
38+
def _build_metadata(cls, add_properties: bool = False) -> CollectionMetadata:
39+
"""Helper to build a (minimal) `CollectionMetadata` object."""
40+
# Vector cubes have at least a "geometry" dimension
41+
dimensions = [Dimension(name="geometry", type="geometry")]
42+
if add_properties:
43+
dimensions.append(Dimension(name="properties", type="other"))
44+
# TODO: use a more generic metadata container than "collection" metadata
45+
return CollectionMetadata(metadata={}, dimensions=dimensions)
3746

3847
def process(
39-
self,
40-
process_id: str,
41-
arguments: dict = None,
42-
metadata: Optional[CollectionMetadata] = None,
43-
namespace: Optional[str] = None,
44-
**kwargs) -> 'VectorCube':
48+
self,
49+
process_id: str,
50+
arguments: dict = None,
51+
metadata: Optional[CollectionMetadata] = None,
52+
namespace: Optional[str] = None,
53+
**kwargs,
54+
) -> "VectorCube":
4555
"""
4656
Generic helper to create a new DataCube by applying a process.
4757
@@ -79,7 +89,7 @@ def load_geojson(
7989
.. versionadded:: 0.22.0
8090
"""
8191
# TODO: unify with `DataCube._get_geometry_argument`
82-
# TODO: also support client side fetching of GeoJSON from URL?
92+
# TODO #457 also support client side fetching of GeoJSON from URL?
8393
if isinstance(data, str) and data.strip().startswith("{"):
8494
# Assume JSON dump
8595
geometry = json.loads(data)
@@ -96,10 +106,12 @@ def load_geojson(
96106
geometry = data
97107
else:
98108
raise ValueError(data)
109+
# TODO #457 client side verification of GeoJSON construct: valid type, valid structure, presence of CRS, ...?
99110

100111
pg = PGNode(process_id="load_geojson", data=geometry, properties=properties or [])
101-
# TODO #424 add basic metadata
102-
return cls(graph=pg, connection=connection)
112+
# TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448
113+
metadata = cls._build_metadata(add_properties=True)
114+
return cls(graph=pg, connection=connection, metadata=metadata)
103115

104116
@classmethod
105117
@openeo_process
@@ -121,8 +133,9 @@ def load_url(
121133
.. versionadded:: 0.22.0
122134
"""
123135
pg = PGNode(process_id="load_url", arguments=dict_no_none(url=url, format=format, options=options))
124-
# TODO #424 add basic metadata
125-
return cls(graph=pg, connection=connection)
136+
# TODO #457 always a "properties" dimension? https://github.com/Open-EO/openeo-processes/issues/448
137+
metadata = cls._build_metadata(add_properties=True)
138+
return cls(graph=pg, connection=connection, metadata=metadata)
126139

127140
@openeo_process
128141
def run_udf(
@@ -446,7 +459,8 @@ def apply_dimension(
446459
{
447460
"data": THIS,
448461
"process": process,
449-
"dimension": dimension, # TODO #424: self.metadata.assert_valid_dimension(dimension)
462+
# TODO: drop `just_warn`?
463+
"dimension": self.metadata.assert_valid_dimension(dimension, just_warn=True),
450464
"target_dimension": target_dimension,
451465
"context": context,
452466
}

tests/rest/datacube/test_vectorcube.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -255,9 +255,17 @@ def test_load_url(con100, dummy_backend):
255255
}
256256

257257

258-
def test_apply_dimension(con100, dummy_backend):
258+
@pytest.mark.parametrize(
259+
["dimension", "expect_warning"],
260+
[
261+
("geometry", False),
262+
("geometries", True),
263+
("wibbles", True),
264+
],
265+
)
266+
def test_apply_dimension(con100, dummy_backend, dimension, expect_warning, caplog):
259267
vc = con100.load_geojson({"type": "Point", "coordinates": [1, 2]})
260-
result = vc.apply_dimension("sort", dimension="geometries")
268+
result = vc.apply_dimension("sort", dimension=dimension)
261269
result.execute()
262270
assert dummy_backend.get_pg() == {
263271
"loadgeojson1": {
@@ -268,7 +276,7 @@ def test_apply_dimension(con100, dummy_backend):
268276
"process_id": "apply_dimension",
269277
"arguments": {
270278
"data": {"from_node": "loadgeojson1"},
271-
"dimension": "geometries",
279+
"dimension": dimension,
272280
"process": {
273281
"process_graph": {
274282
"sort1": {
@@ -282,3 +290,7 @@ def test_apply_dimension(con100, dummy_backend):
282290
"result": True,
283291
},
284292
}
293+
294+
assert (
295+
f"Invalid dimension {dimension!r}. Should be one of ['geometry', 'properties']" in caplog.messages
296+
) == expect_warning

0 commit comments

Comments
 (0)