Skip to content

Commit 55402b7

Browse files
committed
Issue #197 add basic pg based test of UDF on VectorCube
1 parent d0c34f4 commit 55402b7

File tree

6 files changed

+172
-16
lines changed

6 files changed

+172
-16
lines changed

openeo_driver/ProcessGraphDeserializer.py

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -677,10 +677,10 @@ def apply_neighborhood(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
677677

678678
@process
679679
def apply_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
680-
data_cube = args.get_required("data", expected_type=DriverDataCube)
680+
data_cube = args.get_required("data", expected_type=(DriverDataCube, DriverVectorCube))
681681
process = args.get_deep("process", "process_graph", expected_type=dict)
682682
dimension = args.get_required(
683-
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names())
683+
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names())
684684
)
685685
target_dimension = args.get_optional("target_dimension", default=None, expected_type=str)
686686
context = args.get_optional("context", default=None)
@@ -748,7 +748,7 @@ def reduce_dimension(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
748748
data_cube: DriverDataCube = args.get_required("data", expected_type=DriverDataCube)
749749
reduce_pg = args.get_deep("reducer", "process_graph", expected_type=dict)
750750
dimension = args.get_required(
751-
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names())
751+
"dimension", expected_type=str, validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names())
752752
)
753753
context = args.get_optional("context", default=None)
754754
return data_cube.reduce_dimension(reducer=reduce_pg, dimension=dimension, context=context, env=env)
@@ -924,7 +924,7 @@ def aggregate_temporal(args: ProcessArgs, env: EvalEnv) -> DriverDataCube:
924924
dimension = args.get_optional(
925925
"dimension",
926926
default=lambda: data_cube.metadata.temporal_dimension.name,
927-
validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()),
927+
validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()),
928928
)
929929
context = args.get_optional("context", default=None)
930930

@@ -941,7 +941,7 @@ def aggregate_temporal_period(args: ProcessArgs, env: EvalEnv) -> DriverDataCube
941941
dimension = args.get_optional(
942942
"dimension",
943943
default=lambda: data_cube.metadata.temporal_dimension.name,
944-
validator=ProcessArgs.validator_one_of(data_cube.metadata.dimension_names()),
944+
validator=ProcessArgs.validator_one_of(data_cube.get_dimension_names()),
945945
)
946946
context = args.get_optional("context", default=None)
947947

openeo_driver/datacube.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -61,6 +61,9 @@ def __eq__(self, o: object) -> bool:
6161
return True
6262
return False
6363

64+
def get_dimension_names(self) -> List[str]:
65+
return self.metadata.dimension_names()
66+
6467
def _not_implemented(self):
6568
"""Helper to raise a NotImplemented exception containing method name"""
6669
raise NotImplementedError("DataCube method not implemented: {m!r}".format(m=inspect.stack()[1].function))
@@ -511,6 +514,12 @@ def to_legacy_save_result(self) -> Union["AggregatePolygonResult", "JSONResult"]
511514
f"Unsupported cube configuration {cube.dims} for _write_legacy_aggregate_polygon_result_json"
512515
)
513516

517+
def get_dimension_names(self) -> List[str]:
518+
if self._cube is None:
519+
return [self.DIM_GEOMETRIES]
520+
else:
521+
return list(self._cube.dims)
522+
514523
def get_bounding_box(self) -> Tuple[float, float, float, float]:
515524
# TODO: cache bounding box?
516525
# TODO #114 #141 Open-EO/openeo-geopyspark-driver#239: option to buffer point geometries (if any)

openeo_driver/dummy/dummy_backend.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,10 +181,12 @@ def __init__(self, metadata: CollectionMetadata = None):
181181
self.apply_tiles = Mock(name="apply_tiles", return_value=self)
182182
self.apply_tiles_spatiotemporal = Mock(name="apply_tiles_spatiotemporal", return_value=self)
183183

184-
# Create mock methods for remaining data cube methods that are not yet defined
185-
already_defined = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys())
184+
# Create mock methods for remaining DriverDataCube methods that are not yet defined directly by DummyDataCube
185+
to_keep = set(DummyDataCube.__dict__.keys()).union(self.__dict__.keys())
186+
to_keep.update(m for m in DriverDataCube.__dict__.keys() if m.startswith("_"))
187+
to_keep.update(["get_dimension_names"])
186188
for name, method in DriverDataCube.__dict__.items():
187-
if not name.startswith('_') and name not in already_defined and callable(method):
189+
if not name in to_keep and callable(method):
188190
setattr(self, name, Mock(name=name, return_value=self))
189191

190192
for name in [n for n, m in DummyDataCube.__dict__.items() if getattr(m, '_mock_side_effect', False)]:

openeo_driver/testing.py

Lines changed: 43 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
import re
1111
import urllib.request
1212
from pathlib import Path
13-
from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union
13+
from typing import Any, Callable, Dict, Optional, Pattern, Tuple, Union, Collection
1414
from unittest import mock
1515

1616
import pytest
@@ -532,9 +532,48 @@ def to_geojson_feature_collection(self) -> dict:
532532
return approxify(result, rel=self.rel, abs=self.abs)
533533

534534

535-
def caplog_with_custom_formatter(
536-
caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]
537-
):
535+
class ApproxGeoJSONByBounds:
536+
"""pytest assert helper to build a matcher to check if a certain GeoJSON construct is within expected bounds"""
537+
538+
def __init__(
539+
self,
540+
*args,
541+
types: Collection[str] = ("Polygon", "MultiPolygon"),
542+
rel: Optional[float] = None,
543+
abs: Optional[float] = None,
544+
):
545+
bounds = args[0] if len(args) == 1 else args
546+
assert isinstance(bounds, (list, tuple)) and len(bounds) == 4
547+
self.expected_bounds = [float(b) for b in bounds]
548+
self.rel = rel
549+
self.abs = abs
550+
self.expected_types = set(types)
551+
self.actual_info = []
552+
553+
def __eq__(self, other):
554+
try:
555+
assert isinstance(other, dict), "Not a dict"
556+
assert "type" in other, "No 'type' field"
557+
assert other["type"] in self.expected_types, f"Wrong type {other['type']!r}"
558+
assert "coordinates" in other, "No 'coordinates' field"
559+
560+
actual_bounds = shapely.geometry.shape(other).bounds
561+
matching = actual_bounds == pytest.approx(self.expected_bounds, rel=self.rel, abs=self.abs)
562+
if not matching:
563+
self.actual_info.append(f"expected bounds {self.expected_bounds} != actual bounds: {actual_bounds}")
564+
return matching
565+
except Exception as e:
566+
self.actual_info.append(str(e))
567+
return False
568+
569+
def __repr__(self):
570+
msg = f"<{type(self).__name__} types={self.expected_types} bounds={self.expected_bounds} rel={self.rel}, abs={self.abs}>"
571+
if self.actual_info:
572+
msg += "\n" + "\n".join(f" # {i}" for i in self.actual_info)
573+
return msg
574+
575+
576+
def caplog_with_custom_formatter(caplog: pytest.LogCaptureFixture, format: Union[str, logging.Formatter]):
538577
"""
539578
Context manager to set a custom formatter on the caplog fixture.
540579

tests/test_testing.py

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
caplog_with_custom_formatter,
2222
ephemeral_fileserver,
2323
preprocess_check_and_replace,
24+
ApproxGeoJSONByBounds,
2425
)
2526

2627

@@ -284,3 +285,38 @@ def test_caplog_with_custom_formatter(caplog, format):
284285
"[WARNING] still not good (root)",
285286
"WARNING root:test_testing.py:XXX hmm bad times",
286287
]
288+
289+
290+
class TestApproxGeoJSONByBounds:
291+
def test_basic(self):
292+
geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
293+
assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, abs=0.1)
294+
295+
@pytest.mark.parametrize(
296+
["data", "expected_message"],
297+
[
298+
("nope", "# Not a dict"),
299+
({"foo": "bar"}, " # No 'type' field"),
300+
({"type": "Polygommm", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}, " # Wrong type 'Polygommm'"),
301+
({"type": "Polygon"}, " # No 'coordinates' field"),
302+
],
303+
)
304+
def test_invalid_construct(self, data, expected_message):
305+
expected = ApproxGeoJSONByBounds(1, 2, 3, 4)
306+
assert data != expected
307+
assert expected_message in repr(expected)
308+
309+
def test_out_of_bounds(self):
310+
geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
311+
expected = ApproxGeoJSONByBounds(11, 22, 33, 44, abs=0.1)
312+
assert geometry != expected
313+
assert "# expected bounds [11.0, 22.0, 33.0, 44.0] != actual bounds: (1.0, 1.0, 3.0, 4.0)" in repr(expected)
314+
315+
def test_types(self):
316+
geometry = {"type": "Polygon", "coordinates": [[[1, 2], [3, 1], [2, 4], [1, 2]]]}
317+
assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon"], abs=0.1)
318+
assert geometry == ApproxGeoJSONByBounds(1, 1, 3, 4, types=["Polygon", "Point"], abs=0.1)
319+
320+
expected = ApproxGeoJSONByBounds(1, 1, 3, 4, types=["MultiPolygon"], abs=0.1)
321+
assert geometry != expected
322+
assert "Wrong type 'Polygon'" in repr(expected)

tests/test_views_execute.py

Lines changed: 74 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
generate_unique_test_process_id,
3434
preprocess_check_and_replace,
3535
preprocess_regex_check_and_replace,
36+
ApproxGeoJSONByBounds,
3637
)
3738
from openeo_driver.util.geometry import as_geojson_feature, as_geojson_feature_collection
3839
from openeo_driver.util.ioformats import IOFORMATS
@@ -1310,7 +1311,7 @@ def test_run_udf_on_vector_read_vector(api100, udf_code):
13101311
"udf": udf_code,
13111312
"runtime": "Python",
13121313
},
1313-
"result": "true",
1314+
"result": True,
13141315
},
13151316
}
13161317
resp = api100.check_result(process_graph)
@@ -1355,8 +1356,8 @@ def test_run_udf_on_vector_get_geometries(api100, udf_code):
13551356
"udf": udf_code,
13561357
"runtime": "Python",
13571358
},
1358-
"result": "true"
1359-
}
1359+
"result": True,
1360+
},
13601361
}
13611362
resp = api100.check_result(process_graph)
13621363
assert resp.json == [
@@ -1401,7 +1402,7 @@ def test_run_udf_on_vector_load_uploaded_files(api100, udf_code):
14011402
"udf": udf_code,
14021403
"runtime": "Python",
14031404
},
1404-
"result": "true",
1405+
"result": True,
14051406
},
14061407
}
14071408
resp = api100.check_result(process_graph)
@@ -3522,3 +3523,72 @@ def test_request_costs_for_failed_request(api, backend_implementation):
35223523
assert env["correlation_id"] == "r-abc123"
35233524

35243525
get_request_costs.assert_called_with(TEST_USER, "r-abc123", False)
3526+
3527+
3528+
class TestVectorCubeRunUDF:
3529+
"""
3530+
Tests about running UDF based manipulations on vector cubes
3531+
3532+
References:
3533+
- https://github.com/Open-EO/openeo-python-driver/issues/197
3534+
- https://github.com/Open-EO/openeo-python-driver/pull/200
3535+
- https://github.com/Open-EO/openeo-geopyspark-driver/issues/437
3536+
"""
3537+
3538+
def test_apply_dimension_run_udf_change_geometry(self, api100):
3539+
udf_code = """
3540+
from openeo.udf import UdfData, FeatureCollection
3541+
def process_geometries(udf_data: UdfData) -> UdfData:
3542+
[feature_collection] = udf_data.get_feature_collection_list()
3543+
gdf = feature_collection.data
3544+
gdf["geometry"] = gdf["geometry"].buffer(distance=1, resolution=2)
3545+
udf_data.set_feature_collection_list([
3546+
FeatureCollection(id="_", data=gdf),
3547+
])
3548+
"""
3549+
udf_code = textwrap.dedent(udf_code)
3550+
process_graph = {
3551+
"get_vector_data": {
3552+
"process_id": "load_uploaded_files",
3553+
"arguments": {"paths": [str(get_path("geojson/FeatureCollection02.json"))], "format": "GeoJSON"},
3554+
},
3555+
"apply_dimension": {
3556+
"process_id": "apply_dimension",
3557+
"arguments": {
3558+
"data": {"from_node": "get_vector_data"},
3559+
"dimension": "properties",
3560+
"process": {
3561+
"process_graph": {
3562+
"runudf1": {
3563+
"process_id": "run_udf",
3564+
"arguments": {
3565+
"data": {"from_node": "get_vector_data"},
3566+
"udf": udf_code,
3567+
"runtime": "Python",
3568+
},
3569+
"result": True,
3570+
}
3571+
},
3572+
},
3573+
},
3574+
"result": True,
3575+
},
3576+
}
3577+
resp = api100.check_result(process_graph)
3578+
assert resp.json == DictSubSet(
3579+
{
3580+
"type": "FeatureCollection",
3581+
"features": [
3582+
{
3583+
"type": "Feature",
3584+
"geometry": ApproxGeoJSONByBounds(0, 0, 4, 4, types=["Polygon"], abs=0.1),
3585+
"properties": {"id": "first", "pop": 1234},
3586+
},
3587+
{
3588+
"type": "Feature",
3589+
"geometry": ApproxGeoJSONByBounds(2, 1, 6, 5, types=["Polygon"], abs=0.1),
3590+
"properties": {"id": "second", "pop": 5678},
3591+
},
3592+
],
3593+
}
3594+
)

0 commit comments

Comments
 (0)