diff --git a/openeo_driver/ProcessGraphDeserializer.py b/openeo_driver/ProcessGraphDeserializer.py index 52484ce2..79518cf5 100644 --- a/openeo_driver/ProcessGraphDeserializer.py +++ b/openeo_driver/ProcessGraphDeserializer.py @@ -740,32 +740,24 @@ def fit_class_random_forest(args: dict, env: EvalEnv) -> DriverMlModel: return DriverMlModel() predictors = extract_arg(args, 'predictors') - if not isinstance(predictors, AggregatePolygonSpatialResult): - # TODO #114 EP-3981 add support for real vector cubes. + if not isinstance(predictors, (AggregatePolygonSpatialResult, DriverVectorCube)): + # TODO #114 EP-3981 drop AggregatePolygonSpatialResult support. raise ProcessParameterInvalidException( - parameter="predictors", process="fit_class_random_forest", - reason=f"should be non-temporal vector-cube (got `{type(predictors)}`)." - ) - target = extract_arg(args, 'target') - if not ( - isinstance(target, dict) - and target.get("type") == "FeatureCollection" - and isinstance(target.get("features"), list) - ): - # TODO #114 EP-3981 vector cube support - raise ProcessParameterInvalidException( - parameter="target", process="fit_class_random_forest", - reason='only GeoJSON FeatureCollection is currently supported.', + parameter="predictors", + process="fit_class_random_forest", + reason=f"should be non-temporal vector-cube, got {type(predictors)}.", ) - if any( - # TODO: allow string based target labels too? - not isinstance(deep_get(f, "properties", "target", default=None), int) - for f in target.get("features", []) - ): + + target = extract_arg(args, "target") + if isinstance(target, DriverVectorCube): + pass + elif isinstance(target, dict) and target.get("type") == "FeatureCollection": + target = env.backend_implementation.DriverVectorCube.from_geojson(target) + else: raise ProcessParameterInvalidException( parameter="target", process="fit_class_random_forest", - reason="Each feature (from target feature collection) should have an integer 'target' property.", + reason=f"expected vector-cube like value but got {type(target)}.", ) # TODO: get defaults from process spec? @@ -1056,7 +1048,7 @@ def aggregate_spatial(args: dict, env: EvalEnv) -> DriverDataCube: if isinstance(geoms, DriverVectorCube): geoms = geoms elif isinstance(geoms, dict): - geoms = geojson_to_geometry(geoms) + geoms = env.backend_implementation.DriverVectorCube.from_geojson(geoms) elif isinstance(geoms, DelayedVector): geoms = geoms.path else: @@ -1321,6 +1313,12 @@ def run_udf(args: dict, env: EvalEnv): # TODO #114 add support for DriverVectorCube if isinstance(data, AggregatePolygonResult): pass + if isinstance(data, DriverVectorCube): + # TODO: this is temporary stopgap measure, converting to old-style save results to stay backward compatible. + # Better have proper DriverVectorCube support in run_udf? + # How does that fit in UdfData and UDF function signatures? + data = data.to_legacy_save_result() + if isinstance(data, (DelayedVector, dict)): if isinstance(data, dict): data = DelayedVector.from_json_dict(data) @@ -1338,7 +1336,10 @@ def run_udf(args: dict, env: EvalEnv): ) else: raise ProcessParameterInvalidException( - parameter='data', process='run_udf', reason=f"Invalid data type {type(data)!r} expected raster-cube.") + parameter="data", + process="run_udf", + reason=f"Unsupported data type {type(data)}.", + ) _log.info(f"[run_udf] Running UDF {str_truncate(udf, width=256)!r} on {data!r}") result_data = openeo.udf.run_udf_code(udf, data) @@ -1550,7 +1551,7 @@ def to_vector_cube(args: Dict, env: EvalEnv): # TODO: standardization of something like this? https://github.com/Open-EO/openeo-processes/issues/346 data = extract_arg(args, "data", process_id="to_vector_cube") if isinstance(data, dict) and data.get("type") in {"Polygon", "MultiPolygon", "Feature", "FeatureCollection"}: - return DriverVectorCube.from_geojson(data) + return env.backend_implementation.DriverVectorCube.from_geojson(data) # TODO: support more inputs: string with geojson, string with WKT, list of WKT, string with URL to GeoJSON, ... raise FeatureUnsupportedException(f"Converting {type(data)} to vector cube is not supported") diff --git a/openeo_driver/_version.py b/openeo_driver/_version.py index b21c095c..6cb6628d 100644 --- a/openeo_driver/_version.py +++ b/openeo_driver/_version.py @@ -1 +1 @@ -__version__ = '0.20.4a1' +__version__ = "0.21.0a1" diff --git a/openeo_driver/backend.py b/openeo_driver/backend.py index 3af96c65..bdd73300 100644 --- a/openeo_driver/backend.py +++ b/openeo_driver/backend.py @@ -23,7 +23,7 @@ from openeo.capabilities import ComparableVersion from openeo.internal.process_graph_visitor import ProcessGraphVisitor from openeo.util import rfc3339, dict_no_none -from openeo_driver.datacube import DriverDataCube, DriverMlModel +from openeo_driver.datacube import DriverDataCube, DriverMlModel, DriverVectorCube from openeo_driver.datastructs import SarBackscatterArgs from openeo_driver.dry_run import SourceConstraint from openeo_driver.errors import CollectionNotFoundException, ServiceUnsupportedException, FeatureUnsupportedException @@ -570,6 +570,9 @@ class OpenEoBackendImplementation: enable_basic_auth = True enable_oidc_auth = True + # Overridable vector cube implementation + DriverVectorCube = DriverVectorCube + def __init__( self, secondary_services: Optional[SecondaryServices] = None, diff --git a/openeo_driver/datacube.py b/openeo_driver/datacube.py index b4deb080..5638ec14 100644 --- a/openeo_driver/datacube.py +++ b/openeo_driver/datacube.py @@ -177,7 +177,9 @@ def __init__( def with_cube(self, cube: xarray.DataArray, flatten_prefix: str = FLATTEN_PREFIX) -> "DriverVectorCube": """Create new vector cube with same geometries but new cube""" log.info(f"Creating vector cube with new cube {cube.name!r}") - return DriverVectorCube(geometries=self._geometries, cube=cube, flatten_prefix=flatten_prefix) + return type(self)( + geometries=self._geometries, cube=cube, flatten_prefix=flatten_prefix + ) @classmethod def from_fiona(cls, paths: List[str], driver: str, options: dict) -> "DriverVectorCube": @@ -226,6 +228,7 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame: return df def to_geojson(self): + """Export as GeoJSON FeatureCollection.""" return shapely.geometry.mapping(self._as_geopandas_df()) def to_wkt(self) -> List[str]: @@ -242,6 +245,11 @@ def write_assets( format_info = IOFORMATS.get(format) # TODO: check if format can be used for vector data? path = directory / f"vectorcube.{format_info.extension}" + + if format_info.format == "JSON": + # TODO: eliminate this legacy format? + return self._write_legacy_aggregate_polygon_result_json(directory=directory) + self._as_geopandas_df().to_file(path, driver=format_info.fiona_driver) if not format_info.multi_file: @@ -274,6 +282,40 @@ def write_assets( def to_multipolygon(self) -> shapely.geometry.MultiPolygon: return shapely.ops.unary_union(self._geometries.geometry) + def to_legacy_save_result(self) -> Union["AggregatePolygonResult", "JSONResult"]: + """ + Export to legacy AggregatePolygonResult/JSONResult objects. + Provided as temporary adaption layer while migrating to real vector cubes. + """ + # TODO: eliminate these legacy, non-standard format? + from openeo_driver.save_result import AggregatePolygonResult, JSONResult + + cube = self._cube + # TODO: more flexible temporal/band dimension detection? + if cube.dims == (self.DIM_GEOMETRIES, "t"): + # Add single band dimension + cube = cube.expand_dims({"bands": ["band"]}, axis=-1) + if cube.dims == (self.DIM_GEOMETRIES, "t", "bands"): + cube = cube.transpose("t", self.DIM_GEOMETRIES, "bands") + timeseries = { + t.item(): t_slice.values.tolist() + for t, t_slice in zip(cube.coords["t"], cube) + } + return AggregatePolygonResult(timeseries=timeseries, regions=self) + elif cube.dims == (self.DIM_GEOMETRIES, "bands"): + cube = cube.transpose(self.DIM_GEOMETRIES, "bands") + return JSONResult(data=cube.values.tolist()) + raise ValueError( + f"Unsupported cube configuration {cube.dims} for _write_legacy_aggregate_polygon_result_json" + ) + + def _write_legacy_aggregate_polygon_result_json( + self, directory: Path + ) -> Dict[str, StacAsset]: + """Export to legacy AggregatePolygonResult JSON format""" + # TODO: eliminate this legacy, non-standard format? + return self.to_legacy_save_result().write_assets(directory) + def get_bounding_box(self) -> Tuple[float, float, float, float]: return tuple(self._geometries.total_bounds) @@ -293,6 +335,15 @@ def __eq__(self, other): return (isinstance(other, DriverVectorCube) and np.array_equal(self._as_geopandas_df().values, other._as_geopandas_df().values)) + def fit_class_random_forest( + self, + target: "DriverVectorCube", + num_trees: int = 100, + max_variables: Optional[Union[int, str]] = None, + seed: Optional[int] = None, + ) -> "DriverMlModel": + raise NotImplementedError + class DriverMlModel: """Base class for driver-side 'ml-model' data structures""" diff --git a/openeo_driver/dry_run.py b/openeo_driver/dry_run.py index bcd801b7..86b8a96b 100644 --- a/openeo_driver/dry_run.py +++ b/openeo_driver/dry_run.py @@ -350,8 +350,10 @@ def get_source_constraints(self, merge=True) -> List[SourceConstraint]: return source_constraints def get_geometries( - self, operation="aggregate_spatial" - ) -> List[Union[shapely.geometry.base.BaseGeometry, DelayedVector]]: + self, operation="aggregate_spatial" + ) -> List[ + Union[shapely.geometry.base.BaseGeometry, DelayedVector, DriverVectorCube] + ]: """Get geometries (polygons or DelayedVector), as used by aggregate_spatial""" geometries_by_id = {} for leaf in self.get_trace_leaves(): diff --git a/openeo_driver/dummy/dummy_backend.py b/openeo_driver/dummy/dummy_backend.py index efa2de75..a161ba17 100644 --- a/openeo_driver/dummy/dummy_backend.py +++ b/openeo_driver/dummy/dummy_backend.py @@ -227,8 +227,16 @@ def assert_polygon_sequence(geometries: Union[Sequence, BaseMultipartGeometry]) dims += (self.metadata.band_dimension.name,) coords[self.metadata.band_dimension.name] = self.metadata.band_names shape = [len(coords[d]) for d in dims] - data = numpy.arange(numpy.prod(shape)).reshape(shape) - cube = xarray.DataArray(data=data, dims=dims, coords=coords, name="aggregate_spatial") + data = numpy.arange(numpy.prod(shape), dtype="float") + # Start with some more interesting values (e.g. to test NaN/null/None handling) + data[0] = 2.345 + data[1] = float("nan") + cube = xarray.DataArray( + data=data.reshape(shape), + dims=dims, + coords=coords, + name="aggregate_spatial", + ) return geometries.with_cube(cube=cube, flatten_prefix="agg") elif isinstance(geometries, str): geometries = [geometry for geometry in DelayedVector(geometries).geometries] @@ -276,6 +284,25 @@ def fit_class_random_forest( ) +class DummyVectorCube(DriverVectorCube): + def fit_class_random_forest( + self, + target: DriverVectorCube, + num_trees: int = 100, + max_variables: Optional[Union[int, str]] = None, + seed: Optional[int] = None, + ) -> "DriverMlModel": + return DummyMlModel( + process_id="fit_class_random_forest", + # TODO: handle `to_geojson` in `DummyMlModel.write_assets` instead of here? + data=self.to_geojson(), + target=target.to_geojson(), + num_trees=num_trees, + max_variables=max_variables, + seed=seed, + ) + + class DummyMlModel(DriverMlModel): def __init__(self, **kwargs): @@ -600,6 +627,9 @@ def delete(self, user_id: str, process_id: str) -> None: class DummyBackendImplementation(OpenEoBackendImplementation): + + DriverVectorCube = DummyVectorCube + def __init__(self, processing: Optional[Processing] = None): super(DummyBackendImplementation, self).__init__( secondary_services=DummySecondaryServices(), diff --git a/openeo_driver/save_result.py b/openeo_driver/save_result.py index 052430e8..3fb0a733 100644 --- a/openeo_driver/save_result.py +++ b/openeo_driver/save_result.py @@ -161,7 +161,7 @@ def __init__(self, data, format: str = "json", options: dict = None): super().__init__(format=format, options=options) self.data = data - def write_assets(self, path:str) -> Dict[str, StacAsset]: + def write_assets(self, path: Union[str, Path]) -> Dict[str, StacAsset]: """ Save generated assets into a directory, return asset metadata. TODO: can an asset also be a full STAC item? In principle, one openEO job can either generate a full STAC collection, or one STAC item with multiple assets... @@ -220,7 +220,7 @@ def get_data(self): # By default, keep original (proprietary) result format return self.data - def write_assets(self, directory: str) -> Dict[str, StacAsset]: + def write_assets(self, directory: Union[str, Path]) -> Dict[str, StacAsset]: """ Save generated assets into a directory, return asset metadata. TODO: can an asset also be a full STAC item? In principle, one openEO job can either generate a full STAC collection, or one STAC item with multiple assets... diff --git a/tests/conftest.py b/tests/conftest.py index f64efd71..7de965c6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -42,7 +42,7 @@ def udp_registry(backend_implementation) -> UserDefinedProcesses: def flask_app(backend_implementation) -> flask.Flask: app = build_app( backend_implementation=backend_implementation, - # error_handling=False + # error_handling=False, ) app.config.from_mapping(TEST_APP_CONFIG) return app diff --git a/tests/data/pg/1.0/no_nested_json_result.json b/tests/data/pg/1.0/no_nested_json_result.json index 71ffe657..2998850f 100644 --- a/tests/data/pg/1.0/no_nested_json_result.json +++ b/tests/data/pg/1.0/no_nested_json_result.json @@ -67,7 +67,7 @@ "result": true, "process_id": "save_result", "arguments": { - "format": "GTIFF", + "format": "GeoJSON", "data": { "from_node": "aggregatespatial1" }, diff --git a/tests/data/pg/1.0/run_udf_on_timeseries.json b/tests/data/pg/1.0/run_udf_on_timeseries.json index 7c4d107f..b25cc3e8 100644 --- a/tests/data/pg/1.0/run_udf_on_timeseries.json +++ b/tests/data/pg/1.0/run_udf_on_timeseries.json @@ -12,6 +12,10 @@ "temporal_extent": [ "2017-11-21", "2017-11-21" + ], + "bands": [ + "B02", + "B03" ] } }, diff --git a/tests/test_dry_run.py b/tests/test_dry_run.py index 2fda0f0f..f5e3e8f3 100644 --- a/tests/test_dry_run.py +++ b/tests/test_dry_run.py @@ -5,9 +5,11 @@ from openeo.rest.datacube import DataCube from openeo_driver.ProcessGraphDeserializer import evaluate, ENV_DRY_RUN_TRACER, _extract_load_parameters, \ ENV_SOURCE_CONSTRAINTS, custom_process_from_process_graph, process_registry_100 +from openeo_driver.datacube import DriverVectorCube from openeo_driver.datastructs import SarBackscatterArgs from openeo_driver.delayed_vector import DelayedVector from openeo_driver.dry_run import DryRunDataTracer, DataSource, DataTrace, ProcessType +from openeo_driver.testing import DictSubSet from openeo_driver.utils import EvalEnv from tests.data import get_path, load_json @@ -526,67 +528,148 @@ def test_aggregate_spatial_only(dry_run_env, dry_run_tracer): assert src == ("load_collection", ("S2_FOOBAR", ())) assert constraints == { "spatial_extent": {"west": 0.0, "south": 0.0, "east": 8.0, "north": 5.0, "crs": "EPSG:4326"}, - "aggregate_spatial": {"geometries": shapely.geometry.shape(polygon)}, + "aggregate_spatial": {"geometries": DriverVectorCube.from_geojson(polygon)}, } geometries, = dry_run_tracer.get_geometries() - assert isinstance(geometries, shapely.geometry.Polygon) - assert shapely.geometry.mapping(geometries) == { - "type": "Polygon", - "coordinates": (((0.0, 0.0), (3.0, 5.0), (8.0, 2.0), (0.0, 0.0)),) - } + assert isinstance(geometries, DriverVectorCube) + assert geometries.to_geojson() == DictSubSet( + type="FeatureCollection", + features=[ + DictSubSet( + geometry={ + "type": "Polygon", + "coordinates": (((0.0, 0.0), (3.0, 5.0), (8.0, 2.0), (0.0, 0.0)),), + } + ), + ], + ) def test_aggregate_spatial_apply_dimension(dry_run_env, dry_run_tracer): polygon = {"type": "Polygon", "coordinates": [[(0, 0), (3, 5), (8, 2), (0, 0)]]} - pg = {'loadcollection1': {'process_id': 'load_collection', 'arguments': {'bands': ['B04', 'B08', 'B11', 'SCL'], - 'id': 'S2_FOOBAR', - 'spatial_extent': None, - 'temporal_extent': ['2018-11-01', - '2020-02-01']}}, - 'maskscldilation1': {'process_id': 'mask_scl_dilation', - 'arguments': {'data': {'from_node': 'loadcollection1'}, 'scl_band_name': 'SCL'}}, - 'aggregatetemporalperiod1': {'process_id': 'aggregate_temporal_period', - 'arguments': {'data': {'from_node': 'maskscldilation1'}, 'period': 'month', - 'reducer': {'process_graph': {'mean1': {'process_id': 'mean', - 'arguments': {'data': { - 'from_parameter': 'data'}}, - 'result': True}}}}}, - 'applydimension1': {'process_id': 'apply_dimension', - 'arguments': {'data': {'from_node': 'aggregatetemporalperiod1'}, 'dimension': 't', - 'process': {'process_graph': { - 'arrayinterpolatelinear1': {'process_id': 'array_interpolate_linear', - 'arguments': { - 'data': {'from_parameter': 'data'}}, - 'result': True}}}}}, - 'filtertemporal1': {'process_id': 'filter_temporal', 'arguments': {'data': {'from_node': 'applydimension1'}, - 'extent': ['2019-01-01', '2020-01-01']}}, - 'applydimension2': {'process_id': 'apply_dimension', - 'arguments': {'data': {'from_node': 'filtertemporal1'}, 'dimension': 'bands', 'process': { - 'process_graph': {'arrayelement1': {'process_id': 'array_element', - 'arguments': {'data': {'from_parameter': 'data'}, - 'index': 1}}, - 'arrayelement2': {'process_id': 'array_element', - 'arguments': {'data': {'from_parameter': 'data'}, - 'index': 0}}, - 'normalizeddifference1': {'process_id': 'normalized_difference', - 'arguments': { - 'x': {'from_node': 'arrayelement1'}, - 'y': {'from_node': 'arrayelement2'}}}, - 'arraymodify1': {'process_id': 'array_modify', - 'arguments': {'data': {'from_parameter': 'data'}, - 'index': 0, 'values': { - 'from_node': 'normalizeddifference1'}}, - 'result': True}}}}}, - 'renamelabels1': {'process_id': 'rename_labels', - 'arguments': {'data': {'from_node': 'applydimension2'}, 'dimension': 'bands', - 'target': ['NDVI', 'B04', 'B08']}}, - 'aggregatespatial1': {'process_id': 'aggregate_spatial', - 'arguments': {'data': {'from_node': 'renamelabels1'}, 'geometries': polygon, - 'reducer': {'process_graph': {'mean2': {'process_id': 'mean', - 'arguments': {'data': { - 'from_parameter': 'data'}}, - 'result': True}}}}, - 'result': True}} + pg = { + "loadcollection1": { + "process_id": "load_collection", + "arguments": { + "bands": ["B04", "B08", "B11", "SCL"], + "id": "S2_FOOBAR", + "spatial_extent": None, + "temporal_extent": ["2018-11-01", "2020-02-01"], + }, + }, + "maskscldilation1": { + "process_id": "mask_scl_dilation", + "arguments": { + "data": {"from_node": "loadcollection1"}, + "scl_band_name": "SCL", + }, + }, + "aggregatetemporalperiod1": { + "process_id": "aggregate_temporal_period", + "arguments": { + "data": {"from_node": "maskscldilation1"}, + "period": "month", + "reducer": { + "process_graph": { + "mean1": { + "process_id": "mean", + "arguments": {"data": {"from_parameter": "data"}}, + "result": True, + } + } + }, + }, + }, + "applydimension1": { + "process_id": "apply_dimension", + "arguments": { + "data": {"from_node": "aggregatetemporalperiod1"}, + "dimension": "t", + "process": { + "process_graph": { + "arrayinterpolatelinear1": { + "process_id": "array_interpolate_linear", + "arguments": {"data": {"from_parameter": "data"}}, + "result": True, + } + } + }, + }, + }, + "filtertemporal1": { + "process_id": "filter_temporal", + "arguments": { + "data": {"from_node": "applydimension1"}, + "extent": ["2019-01-01", "2020-01-01"], + }, + }, + "applydimension2": { + "process_id": "apply_dimension", + "arguments": { + "data": {"from_node": "filtertemporal1"}, + "dimension": "bands", + "process": { + "process_graph": { + "arrayelement1": { + "process_id": "array_element", + "arguments": { + "data": {"from_parameter": "data"}, + "index": 1, + }, + }, + "arrayelement2": { + "process_id": "array_element", + "arguments": { + "data": {"from_parameter": "data"}, + "index": 0, + }, + }, + "normalizeddifference1": { + "process_id": "normalized_difference", + "arguments": { + "x": {"from_node": "arrayelement1"}, + "y": {"from_node": "arrayelement2"}, + }, + }, + "arraymodify1": { + "process_id": "array_modify", + "arguments": { + "data": {"from_parameter": "data"}, + "index": 0, + "values": {"from_node": "normalizeddifference1"}, + }, + "result": True, + }, + } + }, + }, + }, + "renamelabels1": { + "process_id": "rename_labels", + "arguments": { + "data": {"from_node": "applydimension2"}, + "dimension": "bands", + "target": ["NDVI", "B04", "B08"], + }, + }, + "aggregatespatial1": { + "process_id": "aggregate_spatial", + "arguments": { + "data": {"from_node": "renamelabels1"}, + "geometries": polygon, + "reducer": { + "process_graph": { + "mean2": { + "process_id": "mean", + "arguments": {"data": {"from_parameter": "data"}}, + "result": True, + } + } + }, + }, + "result": True, + }, + } cube = evaluate(pg, env=dry_run_env) @@ -599,15 +682,22 @@ def test_aggregate_spatial_apply_dimension(dry_run_env, dry_run_tracer): "process_type": [ProcessType.GLOBAL_TIME], "bands": ["B04", "B08", "B11", "SCL"], "custom_cloud_mask": {"method": "mask_scl_dilation", 'scl_band_name': 'SCL'}, - "aggregate_spatial": {"geometries": shapely.geometry.shape(polygon)}, + "aggregate_spatial": {"geometries": DriverVectorCube.from_geojson(polygon)}, "temporal_extent": ("2018-11-01", "2020-02-01") } geometries, = dry_run_tracer.get_geometries() - assert isinstance(geometries, shapely.geometry.Polygon) - assert shapely.geometry.mapping(geometries) == { - "type": "Polygon", - "coordinates": (((0.0, 0.0), (3.0, 5.0), (8.0, 2.0), (0.0, 0.0)),) - } + assert isinstance(geometries, DriverVectorCube) + assert geometries.to_geojson() == DictSubSet( + type="FeatureCollection", + features=[ + DictSubSet( + geometry={ + "type": "Polygon", + "coordinates": (((0.0, 0.0), (3.0, 5.0), (8.0, 2.0), (0.0, 0.0)),), + } + ), + ], + ) def test_aggregate_spatial_and_filter_bbox(dry_run_env, dry_run_tracer): @@ -626,14 +716,21 @@ def test_aggregate_spatial_and_filter_bbox(dry_run_env, dry_run_tracer): assert src == ("load_collection", ("S2_FOOBAR", ())) assert constraints == { "spatial_extent": bbox, - "aggregate_spatial": {"geometries": shapely.geometry.shape(polygon)}, + "aggregate_spatial": {"geometries": DriverVectorCube.from_geojson(polygon)}, } geometries, = dry_run_tracer.get_geometries() - assert isinstance(geometries, shapely.geometry.Polygon) - assert shapely.geometry.mapping(geometries) == { - "type": "Polygon", - "coordinates": (((0.0, 0.0), (3.0, 5.0), (8.0, 2.0), (0.0, 0.0)),) - } + assert isinstance(geometries, DriverVectorCube) + assert geometries.to_geojson() == DictSubSet( + type="FeatureCollection", + features=[ + DictSubSet( + geometry={ + "type": "Polygon", + "coordinates": (((0.0, 0.0), (3.0, 5.0), (8.0, 2.0), (0.0, 0.0)),), + } + ), + ], + ) def test_resample_filter_spatial(dry_run_env, dry_run_tracer): @@ -697,31 +794,34 @@ def test_aggregate_spatial_read_vector(dry_run_env, dry_run_tracer): assert isinstance(geometries, DelayedVector) -def test_aggregate_spatial_get_geometries_feature_collection(dry_run_env, dry_run_tracer): +def test_aggregate_spatial_get_geometries_feature_collection( + dry_run_env, dry_run_tracer +): pg = { "lc": {"process_id": "load_collection", "arguments": {"id": "S2_FOOBAR"}}, - "vector": {"process_id": "get_geometries", "arguments": {"feature_collection": { - "type": "FeatureCollection", - "name": "fields", - "crs": { - "type": "name", - "properties": { - "name": "urn:ogc:def:crs:OGC:1.3:CRS84" - } - }, - "features": [ - { - "type": "Feature", - "geometry": { - "type": "Polygon", - "coordinates": [[(0, 0), (3, 5), (8, 2), (0, 0)]] + "vector": { + "process_id": "get_geometries", + "arguments": { + "feature_collection": { + "type": "FeatureCollection", + "name": "fields", + "crs": { + "type": "name", + "properties": {"name": "urn:ogc:def:crs:OGC:1.3:CRS84"}, }, - "properties": { - "CODE_OBJ": "0000000000000001" - } + "features": [ + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [[(0, 0), (3, 5), (8, 2), (0, 0)]], + }, + "properties": {"CODE_OBJ": "0000000000000001"}, + } + ], } - ] - }}}, + }, + }, "agg": { "process_id": "aggregate_spatial", "arguments": { @@ -744,19 +844,28 @@ def test_aggregate_spatial_get_geometries_feature_collection(dry_run_env, dry_ru assert len(source_constraints) == 1 src, constraints = source_constraints[0] assert src == ("load_collection", ("S2_FOOBAR", ())) - expected_geometry_collection = shapely.geometry.GeometryCollection( - [shapely.geometry.shape({"type": "Polygon", "coordinates": [[(0, 0), (3, 5), (8, 2), (0, 0)]]})] + + expected_geometry_collection = DriverVectorCube.from_geojson( + pg["vector"]["arguments"]["feature_collection"] ) assert constraints == { - "spatial_extent": {'west': 0.0, 'south': 0.0, 'east': 8.0, 'north': 5.0, 'crs': 'EPSG:4326'}, - "aggregate_spatial": {"geometries": expected_geometry_collection} + "spatial_extent": { + "west": 0.0, + "south": 0.0, + "east": 8.0, + "north": 5.0, + "crs": "EPSG:4326", + }, + "aggregate_spatial": {"geometries": expected_geometry_collection}, } - geometries, = dry_run_tracer.get_geometries() - assert isinstance(geometries, shapely.geometry.GeometryCollection) + (geometries,) = dry_run_tracer.get_geometries() + assert isinstance(geometries, DriverVectorCube) -@pytest.mark.parametrize(["arguments", "expected"], [ - ( +@pytest.mark.parametrize( + ["arguments", "expected"], + [ + ( {}, SarBackscatterArgs(coefficient="gamma0-terrain", elevation_model=None, mask=False, contributing_area=False, local_incidence_angle=False, ellipsoid_incidence_angle=False, noise_removal=True, diff --git a/tests/test_views_execute.py b/tests/test_views_execute.py index c4c102be..68abfc98 100644 --- a/tests/test_views_execute.py +++ b/tests/test_views_execute.py @@ -537,24 +537,28 @@ def test_execute_mask(api): api.check_result("mask.json") assert dummy_backend.get_collection("S2_FAPAR_CLOUDCOVER").mask.call_count == 1 - expected = { + expected_spatial_extent = { "west": 7.02, "south": 51.2, "east": 7.65, "north": 51.7, - "crs": 'EPSG:4326', + "crs": "EPSG:4326", } - expected_geometry = shapely.geometry.shape({ - "type": "Polygon", - "coordinates": [[[7.02, 51.7], [7.65, 51.7], [7.65, 51.2], [7.04, 51.3], [7.02, 51.7]]] - }) + expected_geometry = DriverVectorCube.from_geojson( + { + "type": "Polygon", + "coordinates": [ + [[7.02, 51.7], [7.65, 51.7], [7.65, 51.2], [7.04, 51.3], [7.02, 51.7]], + ], + } + ) params = dummy_backend.last_load_collection_call('PROBAV_L3_S10_TOC_NDVI_333M_V2') - assert params["spatial_extent"] == expected + assert params["spatial_extent"] == expected_spatial_extent assert params["aggregate_spatial_geometries"] == expected_geometry params = dummy_backend.last_load_collection_call('S2_FAPAR_CLOUDCOVER') - assert params["spatial_extent"] == expected + assert params["spatial_extent"] == expected_spatial_extent def test_execute_mask_optimized_loading(api): @@ -563,24 +567,28 @@ def test_execute_mask_optimized_loading(api): ) assert dummy_backend.get_collection("S2_FAPAR_CLOUDCOVER").mask.call_count == 1 - expected = { + expected_spatial_extent = { "west": 7.02, "south": 51.2, "east": 7.65, "north": 51.7, - "crs": 'EPSG:4326', + "crs": "EPSG:4326", } - expected_geometry = shapely.geometry.shape({ - "type": "Polygon", - "coordinates": [[[7.02, 51.7], [7.65, 51.7], [7.65, 51.2], [7.04, 51.3], [7.02, 51.7]]] - }) + expected_geometry = DriverVectorCube.from_geojson( + { + "type": "Polygon", + "coordinates": [ + [[7.02, 51.7], [7.65, 51.7], [7.65, 51.2], [7.04, 51.3], [7.02, 51.7]], + ], + } + ) params = dummy_backend.last_load_collection_call('S2_FAPAR_CLOUDCOVER') - assert params["spatial_extent"] == expected + assert params["spatial_extent"] == expected_spatial_extent assert isinstance(params.data_mask, DriverDataCube) params = dummy_backend.last_load_collection_call('PROBAV_L3_S10_TOC_NDVI_333M_V2') - assert params["spatial_extent"] == expected + assert params["spatial_extent"] == expected_spatial_extent assert params["aggregate_spatial_geometries"] == expected_geometry @@ -726,17 +734,33 @@ def test_aggregate_spatial(api): "2015-07-06T00:00:00Z": [[2.345]], "2015-08-22T00:00:00Z": [[None]] } - params = dummy_backend.last_load_collection_call('S2_FAPAR_CLOUDCOVER') - assert params["spatial_extent"] == {"west": 7.02, "south": 51.29, "east": 7.65, "north": 51.75, "crs": 'EPSG:4326'} - assert params["aggregate_spatial_geometries"] == shapely.geometry.shape({ - "type": "Polygon", - "coordinates": [[[7.02, 51.75], [7.65, 51.74], [7.65, 51.29], [7.04, 51.31], [7.02, 51.75]]] - }) + params = dummy_backend.last_load_collection_call("S2_FAPAR_CLOUDCOVER") + assert params["spatial_extent"] == { + "west": 7.02, + "south": 51.29, + "east": 7.65, + "north": 51.75, + "crs": "EPSG:4326", + } + assert params["aggregate_spatial_geometries"] == DriverVectorCube.from_geojson( + { + "type": "Polygon", + "coordinates": [ + [ + [7.02, 51.75], + [7.65, 51.74], + [7.65, 51.29], + [7.04, 51.31], + [7.02, 51.75], + ] + ], + } + ) def test_execute_aggregate_spatial_spatial_cube(api100): resp = api100.check_result("aggregate_spatial_spatial_cube.json") - assert resp.json == [[100.0, 100.1], [101.0, 101.1]] + assert resp.json == [[2.345, None], [2.0, 3.0]] @pytest.mark.parametrize(["geometries", "expected"], [ @@ -781,37 +805,51 @@ def test_aggregate_spatial_vector_cube_basic(api100, feature_collection_test_pat assert params["spatial_extent"] == {"west": 1, "south": 1, "east": 5, "north": 4, "crs": "EPSG:4326"} assert isinstance(params["aggregate_spatial_geometries"], DriverVectorCube) - assert res.json == DictSubSet({ - "type": "FeatureCollection", - "features": [ - DictSubSet({ - "type": "Feature", - "geometry": {"type": "Polygon", "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]]}, - "properties": { - "id": "first", "pop": 1234, - "agg~2015-07-06T00:00:00Z~B02": 0, - "agg~2015-07-06T00:00:00Z~B03": 1, - "agg~2015-07-06T00:00:00Z~B04": 2, - "agg~2015-08-22T00:00:00Z~B02": 3, - "agg~2015-08-22T00:00:00Z~B03": 4, - "agg~2015-08-22T00:00:00Z~B04": 5, - }, - }), - DictSubSet({ - "type": "Feature", - "geometry": {"type": "Polygon", "coordinates": [[[4, 2], [5, 4], [3, 4], [4, 2]]]}, - "properties": { - "id": "second", "pop": 5678, - "agg~2015-07-06T00:00:00Z~B02": 6, - "agg~2015-07-06T00:00:00Z~B03": 7, - "agg~2015-07-06T00:00:00Z~B04": 8, - "agg~2015-08-22T00:00:00Z~B02": 9, - "agg~2015-08-22T00:00:00Z~B03": 10, - "agg~2015-08-22T00:00:00Z~B04": 11, - }, - }), - ] - }) + assert res.json == DictSubSet( + { + "type": "FeatureCollection", + "features": [ + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [[[1, 1], [3, 1], [2, 3], [1, 1]]], + }, + "properties": { + "id": "first", + "pop": 1234, + "agg~2015-07-06T00:00:00Z~B02": 2.345, + "agg~2015-07-06T00:00:00Z~B03": None, + "agg~2015-07-06T00:00:00Z~B04": 2.0, + "agg~2015-08-22T00:00:00Z~B02": 3.0, + "agg~2015-08-22T00:00:00Z~B03": 4.0, + "agg~2015-08-22T00:00:00Z~B04": 5.0, + }, + } + ), + DictSubSet( + { + "type": "Feature", + "geometry": { + "type": "Polygon", + "coordinates": [[[4, 2], [5, 4], [3, 4], [4, 2]]], + }, + "properties": { + "id": "second", + "pop": 5678, + "agg~2015-07-06T00:00:00Z~B02": 6.0, + "agg~2015-07-06T00:00:00Z~B03": 7.0, + "agg~2015-07-06T00:00:00Z~B04": 8.0, + "agg~2015-08-22T00:00:00Z~B02": 9.0, + "agg~2015-08-22T00:00:00Z~B03": 10.0, + "agg~2015-08-22T00:00:00Z~B04": 11.0, + }, + } + ), + ], + } + ) @pytest.mark.parametrize(["info", "preprocess_pg", "aggregate_data", "p1_properties", "p2_properties"], [ @@ -820,9 +858,14 @@ def test_aggregate_spatial_vector_cube_basic(api100, feature_collection_test_pat {}, "lc", { - "id": "first", "pop": 1234, - "agg~2015-07-06T00:00:00Z~B02": 0, "agg~2015-07-06T00:00:00Z~B03": 1, "agg~2015-07-06T00:00:00Z~B04": 2, - "agg~2015-08-22T00:00:00Z~B02": 3, "agg~2015-08-22T00:00:00Z~B03": 4, "agg~2015-08-22T00:00:00Z~B04": 5, + "id": "first", + "pop": 1234, + "agg~2015-07-06T00:00:00Z~B02": 2.345, + "agg~2015-07-06T00:00:00Z~B03": None, + "agg~2015-07-06T00:00:00Z~B04": 2, + "agg~2015-08-22T00:00:00Z~B02": 3, + "agg~2015-08-22T00:00:00Z~B03": 4, + "agg~2015-08-22T00:00:00Z~B04": 5, }, { "id": "second", "pop": 5678, @@ -842,7 +885,13 @@ def test_aggregate_spatial_vector_cube_basic(api100, feature_collection_test_pat }}, }, "r", - {"id": "first", "pop": 1234, "agg~B02": 0, "agg~B03": 1, "agg~B04": 2}, + { + "id": "first", + "pop": 1234, + "agg~B02": 2.345, + "agg~B03": None, + "agg~B04": 2, + }, {"id": "second", "pop": 5678, "agg~B02": 3, "agg~B03": 4, "agg~B04": 5}, ), ( @@ -857,10 +906,20 @@ def test_aggregate_spatial_vector_cube_basic(api100, feature_collection_test_pat }} }, "r", - {"id": "first", "pop": 1234, "agg~2015-07-06T00:00:00Z": 0, "agg~2015-08-22T00:00:00Z": 1}, - {"id": "second", "pop": 5678, "agg~2015-07-06T00:00:00Z": 2, "agg~2015-08-22T00:00:00Z": 3}, - ), - ( + { + "id": "first", + "pop": 1234, + "agg~2015-07-06T00:00:00Z": 2.345, + "agg~2015-08-22T00:00:00Z": None, + }, + { + "id": "second", + "pop": 5678, + "agg~2015-07-06T00:00:00Z": 2, + "agg~2015-08-22T00:00:00Z": 3, + }, + ), + ( "no-time-nor-bands", { "r1": {"process_id": "reduce_dimension", "arguments": { @@ -879,8 +938,8 @@ def test_aggregate_spatial_vector_cube_basic(api100, feature_collection_test_pat }}, }, "r2", - {"id": "first", "pop": 1234, "agg": 0}, - {"id": "second", "pop": 5678, "agg": 1}, + {"id": "first", "pop": 1234, "agg": 2.345}, + {"id": "second", "pop": 5678, "agg": None}, ), ]) def test_aggregate_spatial_vector_cube_dimensions( @@ -1056,7 +1115,11 @@ def test_run_udf_on_json(api100, udf_code): preprocess=lambda s: s.replace('"PLACEHOLDER_UDF"', repr(udf_code)) ) resp = api100.check_result(process_graph) - assert resp.json == {'len': 2, 'keys': ['2015-07-06T00:00:00Z', '2015-08-22T00:00:00Z'], 'values': [[[2.345]], [[None]]]} + assert resp.json == { + "len": 2, + "keys": ["2015-07-06T00:00:00Z", "2015-08-22T00:00:00Z"], + "values": [[[2.345, None]], [[2.0, 3.0]]], + } @pytest.mark.parametrize("udf_code", [ @@ -2636,17 +2699,84 @@ def test_chunk_polygon(api100): def test_fit_class_random_forest(api100): res = api100.check_result("fit_class_random_forest.json") - assert res.json == DictSubSet({ - "type": "DummyMlModel", - "creation_data": { - "process_id": "fit_class_random_forest", - "data": [[100.0, 100.1, 100.2, 100.3], [101.0, 101.1, 101.2, 101.3]], - "target": DictSubSet({"type": "FeatureCollection"}), - "num_trees": 200, - "max_variables": None, - "seed": None, + + geom1 = { + "type": "Polygon", + "coordinates": [[[3.0, 5.0], [4.0, 5.0], [4.0, 6.0], [3.0, 6.0], [3.0, 5.0]]], + } + geom2 = { + "type": "Polygon", + "coordinates": [[[8.0, 1.0], [9.0, 1.0], [9.0, 2.0], [8.0, 2.0], [8.0, 1.0]]], + } + assert res.json == DictSubSet( + { + "type": "DummyMlModel", + "creation_data": { + "process_id": "fit_class_random_forest", + "data": DictSubSet( + { + "type": "FeatureCollection", + "features": [ + DictSubSet( + { + "type": "Feature", + "id": "0", + "geometry": geom1, + "properties": { + "agg~B02": 2.345, + "agg~B03": None, + "agg~B04": 2.0, + "agg~B08": 3.0, + "target": 0, + }, + } + ), + DictSubSet( + { + "type": "Feature", + "id": "1", + "geometry": geom2, + "properties": { + "agg~B02": 4.0, + "agg~B03": 5.0, + "agg~B04": 6.0, + "agg~B08": 7.0, + "target": 1, + }, + } + ), + ], + } + ), + "target": DictSubSet( + { + "type": "FeatureCollection", + "features": [ + DictSubSet( + { + "type": "Feature", + "id": "0", + "geometry": geom1, + "properties": {"target": 0}, + } + ), + DictSubSet( + { + "type": "Feature", + "id": "1", + "geometry": geom2, + "properties": {"target": 1}, + } + ), + ], + } + ), + "max_variables": None, + "num_trees": 200, + "seed": None, + }, } - }) + ) def test_if_merge_cubes(api100):