Skip to content

Commit a239fe5

Browse files
committed
Issue #457/#104/#672 add automatic load_url support
when providing URL to aggregate_spatial, mask_polygon, ...
1 parent 5a3e6f4 commit a239fe5

File tree

3 files changed

+236
-27
lines changed

3 files changed

+236
-27
lines changed

CHANGELOG.md

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
99

1010
### Added
1111

12+
- Automatically use `load_url` when providing a URL as geometries to `DataCube.aggregate_spatial()`, `DataCube.mask_polygon()`, etc. ([#104](https://github.com/Open-EO/openeo-python-client/issues/104), [#457](https://github.com/Open-EO/openeo-python-client/issues/457))
13+
1214
### Changed
1315

1416
- `MultiBackendJobManager`: costs has been added as a column in tracking databases ([[#588](https://github.com/Open-EO/openeo-python-client/issues/588)])

openeo/rest/datacube.py

Lines changed: 85 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,9 @@
1212
import datetime
1313
import logging
1414
import pathlib
15+
import re
1516
import typing
17+
import urllib.parse
1618
import warnings
1719
from builtins import staticmethod
1820
from typing import Any, Callable, Dict, Iterable, List, Optional, Sequence, Tuple, Union
@@ -584,7 +586,9 @@ def filter_bbox(
584586
)
585587

586588
@openeo_process
587-
def filter_spatial(self, geometries) -> DataCube:
589+
def filter_spatial(
590+
self, geometries: Union[shapely.geometry.base.BaseGeometry, dict, str, pathlib.Path, Parameter, VectorCube]
591+
) -> DataCube:
588592
"""
589593
Limits the data cube over the spatial dimensions to the specified geometries.
590594
@@ -597,10 +601,24 @@ def filter_spatial(self, geometries) -> DataCube:
597601
More specifically, pixels outside of the bounding box of the given geometry will not be available after filtering.
598602
All pixels inside the bounding box that are not retained will be set to null (no data).
599603
600-
:param geometries: One or more geometries used for filtering, specified as GeoJSON in EPSG:4326.
604+
:param geometries: One or more geometries used for filtering, Can be provided in different ways:
605+
606+
- a shapely geometry
607+
- a GeoJSON-style dictionary,
608+
- a public URL to the geometries in a vector format that is supported by the backend
609+
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
610+
e.g. GeoJSON, GeoParquet, etc.
611+
A ``load_url`` process will automatically be added to the process graph.
612+
- a path (that is valid for the back-end) to a GeoJSON file.
613+
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
614+
- a :py:class:`~openeo.api.process.Parameter` instance.
615+
601616
:return: A data cube restricted to the specified geometries. The dimensions and dimension properties (name,
602617
type, labels, reference system and resolution) remain unchanged, except that the spatial dimensions have less
603618
(or the same) dimension labels.
619+
620+
.. versionchanged:: 0.36.0
621+
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
604622
"""
605623
valid_geojson_types = [
606624
"Point", "MultiPoint", "LineString", "MultiLineString",
@@ -1052,15 +1070,29 @@ def _get_geometry_argument(
10521070
:param crs: value that encodes a coordinate reference system.
10531071
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
10541072
"""
1073+
if isinstance(geometry, Parameter):
1074+
return geometry
1075+
elif isinstance(geometry, _FromNodeMixin):
1076+
return geometry.from_node()
1077+
1078+
if isinstance(geometry, str) and re.match(r"^https?://", geometry, flags=re.I):
1079+
# Geometry provided as URL: load with `load_url` (with best-effort format guess)
1080+
url = urllib.parse.urlparse(geometry)
1081+
suffix = pathlib.Path(url.path.lower()).suffix
1082+
format = {
1083+
".json": "GeoJSON",
1084+
".geojson": "GeoJSON",
1085+
".pq": "Parquet",
1086+
".parquet": "Parquet",
1087+
".geoparquet": "Parquet",
1088+
}.get(suffix, suffix.split(".")[-1])
1089+
return self.connection.load_url(url=geometry, format=format)
1090+
10551091
if isinstance(geometry, (str, pathlib.Path)):
10561092
# Assumption: `geometry` is path to polygon is a path to vector file at backend.
10571093
# TODO #104: `read_vector` is non-standard process.
10581094
# TODO: If path exists client side: load it client side?
10591095
return PGNode(process_id="read_vector", arguments={"filename": str(geometry)})
1060-
elif isinstance(geometry, Parameter):
1061-
return geometry
1062-
elif isinstance(geometry, _FromNodeMixin):
1063-
return geometry.from_node()
10641096

10651097
if isinstance(geometry, shapely.geometry.base.BaseGeometry):
10661098
geometry = mapping(geometry)
@@ -1107,8 +1139,18 @@ def aggregate_spatial(
11071139
Aggregates statistics for one or more geometries (e.g. zonal statistics for polygons)
11081140
over the spatial dimensions.
11091141
1110-
:param geometries: a shapely geometry, a GeoJSON-style dictionary,
1111-
a public GeoJSON URL, or a path (that is valid for the back-end) to a GeoJSON file.
1142+
:param geometries: The geometries to aggregate in. Can be provided in different ways:
1143+
1144+
- a shapely geometry
1145+
- a GeoJSON-style dictionary,
1146+
- a public URL to the geometries in a vector format that is supported by the backend
1147+
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
1148+
e.g. GeoJSON, GeoParquet, etc.
1149+
A ``load_url`` process will automatically be added to the process graph.
1150+
- a path (that is valid for the back-end) to a GeoJSON file.
1151+
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
1152+
- a :py:class:`~openeo.api.process.Parameter` instance.
1153+
11121154
:param reducer: the "child callback":
11131155
the name of a single openEO process,
11141156
or a callback function as discussed in :ref:`callbackfunctions`,
@@ -1128,10 +1170,13 @@ def aggregate_spatial(
11281170
By default, longitude-latitude (EPSG:4326) is assumed.
11291171
See :py:func:`openeo.util.normalize_crs` for more details about additional normalization that is applied to this argument.
11301172
1131-
:param context: Additional data to be passed to the reducer process.
1132-
11331173
.. note:: this ``crs`` argument is a non-standard/experimental feature, only supported by specific back-ends.
11341174
See https://github.com/Open-EO/openeo-processes/issues/235 for details.
1175+
1176+
:param context: Additional data to be passed to the reducer process.
1177+
1178+
.. versionchanged:: 0.36.0
1179+
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
11351180
"""
11361181
valid_geojson_types = [
11371182
"Point", "MultiPoint", "LineString", "MultiLineString",
@@ -1461,8 +1506,18 @@ def apply_polygon(
14611506
the GeometriesOverlap exception is thrown.
14621507
Each sub data cube is passed individually to the given process.
14631508
1464-
:param geometries: Polygons, provided as a shapely geometry, a GeoJSON-style dictionary,
1465-
a public GeoJSON URL, or a path (that is valid for the back-end) to a GeoJSON file.
1509+
:param geometries: Can be provided in different ways:
1510+
1511+
- a shapely geometry
1512+
- a GeoJSON-style dictionary,
1513+
- a public URL to the geometries in a vector format that is supported by the backend
1514+
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
1515+
e.g. GeoJSON, GeoParquet, etc.
1516+
A ``load_url`` process will automatically be added to the process graph.
1517+
- a path (that is valid for the back-end) to a GeoJSON file.
1518+
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
1519+
- a :py:class:`~openeo.api.process.Parameter` instance.
1520+
14661521
:param process: "child callback" function, see :ref:`callbackfunctions`
14671522
:param mask_value: The value used for pixels outside the polygon.
14681523
:param context: Additional data to be passed to the process.
@@ -1473,6 +1528,9 @@ def apply_polygon(
14731528
Argument ``polygons`` was renamed to ``geometries``.
14741529
While deprecated, the old name ``polygons`` is still supported
14751530
as keyword argument for backwards compatibility.
1531+
1532+
.. versionchanged:: 0.36.0
1533+
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
14761534
"""
14771535
# TODO drop support for legacy `polygons` argument:
14781536
# remove `kwargs, remove default `None` value for `geometries` and `process`
@@ -1957,14 +2015,27 @@ def mask_polygon(
19572015
The pixel values are replaced with the value specified for `replacement`,
19582016
which defaults to `no data`.
19592017
1960-
:param mask: The geometry to mask with: a shapely geometry, a GeoJSON-style dictionary,
1961-
a public GeoJSON URL, or a path (that is valid for the back-end) to a GeoJSON file.
2018+
:param mask: The geometry to mask with.an be provided in different ways:
2019+
2020+
- a shapely geometry
2021+
- a GeoJSON-style dictionary,
2022+
- a public URL to the geometries in a vector format that is supported by the backend
2023+
(also see :py:func:`Connection.list_file_formats() <openeo.rest.connection.Connection.list_file_formats>`),
2024+
e.g. GeoJSON, GeoParquet, etc.
2025+
A ``load_url`` process will automatically be added to the process graph.
2026+
- a path (that is valid for the back-end) to a GeoJSON file.
2027+
- a :py:class:`~openeo.rest.vectorcube.VectorCube` instance.
2028+
- a :py:class:`~openeo.api.process.Parameter` instance.
2029+
19622030
:param srs: The spatial reference system of the provided polygon.
19632031
By default longitude-latitude (EPSG:4326) is assumed.
19642032
19652033
.. note:: this ``srs`` argument is a non-standard/experimental feature, only supported by specific back-ends.
19662034
See https://github.com/Open-EO/openeo-processes/issues/235 for details.
19672035
:param replacement: the value to replace the masked pixels with
2036+
2037+
.. versionchanged:: 0.36.0
2038+
Support passing a URL as ``geometries`` argument, which will be loaded with the ``load_url`` process.
19682039
"""
19692040
valid_geojson_types = ["Polygon", "MultiPolygon", "GeometryCollection", "Feature", "FeatureCollection"]
19702041
mask = self._get_geometry_argument(mask, valid_geojson_types=valid_geojson_types, crs=srs)

tests/rest/datacube/test_datacube100.py

Lines changed: 149 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -347,21 +347,47 @@ def test_filter_bbox_args_and_kwargs_conflict(con100: Connection, args, kwargs,
347347
con100.load_collection("S2").filter_bbox(*args, **kwargs)
348348

349349

350-
def test_filter_spatial(con100: Connection, recwarn):
351-
img = con100.load_collection("S2")
350+
def test_filter_spatial(con100: Connection):
351+
cube = con100.load_collection("S2")
352352
polygon = shapely.geometry.box(0, 0, 1, 1)
353-
masked = img.filter_spatial(geometries=polygon)
354-
assert sorted(masked.flat_graph().keys()) == ["filterspatial1", "loadcollection1"]
355-
assert masked.flat_graph()["filterspatial1"] == {
356-
"process_id": "filter_spatial",
357-
"arguments": {
358-
"data": {"from_node": "loadcollection1"},
359-
"geometries": {
360-
"type": "Polygon",
361-
"coordinates": (((1.0, 0.0), (1.0, 1.0), (0.0, 1.0), (0.0, 0.0), (1.0, 0.0)),),
362-
}
353+
masked = cube.filter_spatial(geometries=polygon)
354+
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
355+
"filterspatial1": {
356+
"process_id": "filter_spatial",
357+
"arguments": {
358+
"data": {"from_node": "loadcollection1"},
359+
"geometries": {
360+
"type": "Polygon",
361+
"coordinates": [[[1.0, 0.0], [1.0, 1.0], [0.0, 1.0], [0.0, 0.0], [1.0, 0.0]]],
362+
},
363+
},
364+
}
365+
}
366+
367+
368+
@pytest.mark.parametrize(
369+
["url", "expected_format"],
370+
[
371+
("https://example.com/geometry.json", "GeoJSON"),
372+
("https://example.com/geometry.geojson", "GeoJSON"),
373+
("https://example.com/geometry.GeoJSON", "GeoJSON"),
374+
("https://example.com/geometry.pq", "Parquet"),
375+
("https://example.com/geometry.parquet", "Parquet"),
376+
("https://example.com/geometry.GeoParquet", "Parquet"),
377+
],
378+
)
379+
def test_filter_spatial_geometry_url(con100: Connection, url, expected_format):
380+
cube = con100.load_collection("S2")
381+
masked = cube.filter_spatial(geometries=url)
382+
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
383+
"loadurl1": {
384+
"process_id": "load_url",
385+
"arguments": {"url": url, "format": expected_format},
386+
},
387+
"filterspatial1": {
388+
"process_id": "filter_spatial",
389+
"arguments": {"data": {"from_node": "loadcollection1"}, "geometries": {"from_node": "loadurl1"}},
363390
},
364-
"result": True
365391
}
366392

367393

@@ -595,6 +621,44 @@ def test_aggregate_spatial_geometry_from_node(con100: Connection, get_geometries
595621
}
596622

597623

624+
@pytest.mark.parametrize(
625+
["url", "expected_format"],
626+
[
627+
("https://example.com/geometry.json", "GeoJSON"),
628+
("https://example.com/geometry.geojson", "GeoJSON"),
629+
("https://example.com/geometry.GeoJSON", "GeoJSON"),
630+
("https://example.com/geometry.pq", "Parquet"),
631+
("https://example.com/geometry.parquet", "Parquet"),
632+
("https://example.com/geometry.GeoParquet", "Parquet"),
633+
],
634+
)
635+
def test_aggregate_spatial_geometry_url(con100: Connection, url, expected_format):
636+
cube = con100.load_collection("S2")
637+
result = cube.aggregate_spatial(geometries=url, reducer="mean")
638+
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
639+
"loadurl1": {
640+
"process_id": "load_url",
641+
"arguments": {"url": url, "format": expected_format},
642+
},
643+
"aggregatespatial1": {
644+
"process_id": "aggregate_spatial",
645+
"arguments": {
646+
"data": {"from_node": "loadcollection1"},
647+
"geometries": {"from_node": "loadurl1"},
648+
"reducer": {
649+
"process_graph": {
650+
"mean1": {
651+
"process_id": "mean",
652+
"arguments": {"data": {"from_parameter": "data"}},
653+
"result": True,
654+
}
655+
}
656+
},
657+
},
658+
},
659+
}
660+
661+
598662
def test_aggregate_spatial_window(con100: Connection):
599663
img = con100.load_collection("S2")
600664
size = [5, 3]
@@ -810,6 +874,35 @@ def test_mask_polygon_from_node(con100: Connection, get_geometries):
810874
}
811875

812876

877+
@pytest.mark.parametrize(
878+
["url", "expected_format"],
879+
[
880+
("https://example.com/geometry.json", "GeoJSON"),
881+
("https://example.com/geometry.geojson", "GeoJSON"),
882+
("https://example.com/geometry.GeoJSON", "GeoJSON"),
883+
("https://example.com/geometry.pq", "Parquet"),
884+
("https://example.com/geometry.parquet", "Parquet"),
885+
("https://example.com/geometry.GeoParquet", "Parquet"),
886+
],
887+
)
888+
def test_mask_polygon_geometry_url(con100: Connection, url, expected_format):
889+
cube = con100.load_collection("S2")
890+
masked = cube.mask_polygon(mask=url)
891+
assert get_download_graph(masked, drop_save_result=True, drop_load_collection=True) == {
892+
"loadurl1": {
893+
"process_id": "load_url",
894+
"arguments": {"url": url, "format": expected_format},
895+
},
896+
"maskpolygon1": {
897+
"process_id": "mask_polygon",
898+
"arguments": {
899+
"data": {"from_node": "loadcollection1"},
900+
"mask": {"from_node": "loadurl1"},
901+
},
902+
},
903+
}
904+
905+
813906
def test_mask_raster(con100: Connection):
814907
img = con100.load_collection("S2")
815908
mask = con100.load_collection("MASK")
@@ -1768,6 +1861,49 @@ def test_apply_polygon_context(con100: Connection, geometries_argument, geometri
17681861
}
17691862

17701863

1864+
@pytest.mark.parametrize(
1865+
["url", "expected_format"],
1866+
[
1867+
("https://example.com/geometry.json", "GeoJSON"),
1868+
("https://example.com/geometry.geojson", "GeoJSON"),
1869+
("https://example.com/geometry.GeoJSON", "GeoJSON"),
1870+
("https://example.com/geometry.pq", "Parquet"),
1871+
("https://example.com/geometry.parquet", "Parquet"),
1872+
("https://example.com/geometry.GeoParquet", "Parquet"),
1873+
],
1874+
)
1875+
def test_apply_polygon_geometry_url(con100: Connection, url, expected_format):
1876+
cube = con100.load_collection("S2")
1877+
process = UDF(code="myfancycode", runtime="Python")
1878+
result = cube.apply_polygon(geometries=url, process=process)
1879+
assert get_download_graph(result, drop_save_result=True, drop_load_collection=True) == {
1880+
"loadurl1": {
1881+
"process_id": "load_url",
1882+
"arguments": {"url": url, "format": expected_format},
1883+
},
1884+
"applypolygon1": {
1885+
"process_id": "apply_polygon",
1886+
"arguments": {
1887+
"data": {"from_node": "loadcollection1"},
1888+
"geometries": {"from_node": "loadurl1"},
1889+
"process": {
1890+
"process_graph": {
1891+
"runudf1": {
1892+
"process_id": "run_udf",
1893+
"arguments": {
1894+
"data": {"from_parameter": "data"},
1895+
"runtime": "Python",
1896+
"udf": "myfancycode",
1897+
},
1898+
"result": True,
1899+
}
1900+
}
1901+
},
1902+
},
1903+
},
1904+
}
1905+
1906+
17711907
def test_metadata_load_collection_100(con100, requests_mock):
17721908
requests_mock.get(API_URL + "/collections/SENTINEL2", json={
17731909
"cube:dimensions": {

0 commit comments

Comments
 (0)