7
7
import io
8
8
9
9
import geopandas as gpd
10
- import numpy as np
10
+ import numpy
11
11
import pyproj
12
12
import shapely .geometry
13
13
import shapely .geometry .base
17
17
import requests
18
18
19
19
from openeo .metadata import CollectionMetadata
20
- from openeo .util import ensure_dir
20
+ from openeo .util import ensure_dir , str_truncate
21
+ import openeo .udf
21
22
from openeo_driver .datastructs import SarBackscatterArgs , ResolutionMergeArgs , StacAsset
22
23
from openeo_driver .errors import FeatureUnsupportedException , InternalException
23
24
from openeo_driver .util .geometry import GeometryBufferer , validate_geojson_coordinates
24
25
from openeo_driver .util .ioformats import IOFORMATS
26
+ from openeo_driver .util .pgparsing import SingleRunUDFProcessGraph
25
27
from openeo_driver .util .utm import area_in_square_meters
26
28
from openeo_driver .utils import EvalEnv
27
29
@@ -214,13 +216,15 @@ class DriverVectorCube:
214
216
These components are "joined" on the GeoPandas dataframe's index and DataArray first dimension
215
217
"""
216
218
DIM_GEOMETRIES = "geometries"
217
- FLATTEN_PREFIX = "vc"
219
+ DIM_BANDS = "bands"
220
+ DIM_PROPERTIES = "properties"
221
+ COLUMN_SELECTION_ALL = "all"
222
+ COLUMN_SELECTION_NUMERICAL = "numerical"
218
223
219
224
def __init__ (
220
225
self ,
221
226
geometries : gpd .GeoDataFrame ,
222
227
cube : Optional [xarray .DataArray ] = None ,
223
- flatten_prefix : str = FLATTEN_PREFIX ,
224
228
):
225
229
"""
226
230
@@ -234,18 +238,77 @@ def __init__(
234
238
log .error (f"First cube dim should be { self .DIM_GEOMETRIES !r} but got dims { cube .dims !r} " )
235
239
raise VectorCubeError ("Cube's first dimension is invalid." )
236
240
if not geometries .index .equals (cube .indexes [cube .dims [0 ]]):
237
- log .error (f"Invalid VectorCube components { geometries .index !r } != { cube .indexes [cube .dims [0 ]]!r } " )
241
+ log .error (f"Invalid VectorCube components { geometries .index = } != { cube .indexes [cube .dims [0 ]]= } " )
238
242
raise VectorCubeError ("Incompatible vector cube components" )
239
243
self ._geometries : gpd .GeoDataFrame = geometries
240
244
self ._cube = cube
241
- self ._flatten_prefix = flatten_prefix
242
245
243
- def with_cube (self , cube : xarray .DataArray , flatten_prefix : str = FLATTEN_PREFIX ) -> "DriverVectorCube" :
246
+ def with_cube (self , cube : xarray .DataArray ) -> "DriverVectorCube" :
244
247
"""Create new vector cube with same geometries but new cube"""
245
248
log .info (f"Creating vector cube with new cube { cube .name !r} " )
246
- return type (self )(
247
- geometries = self ._geometries , cube = cube , flatten_prefix = flatten_prefix
248
- )
249
+ return type (self )(geometries = self ._geometries , cube = cube )
250
+
251
+ @classmethod
252
+ def from_geodataframe (
253
+ cls ,
254
+ data : gpd .GeoDataFrame ,
255
+ * ,
256
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
257
+ dimension_name : str = DIM_PROPERTIES ,
258
+ ) -> "DriverVectorCube" :
259
+ """
260
+ Build a DriverVectorCube from given GeoPandas data frame,
261
+ using the data frame geometries as vector cube geometries
262
+ and other columns (as specified) as cube values along a "bands" dimension
263
+
264
+ :param data: geopandas data frame
265
+ :param columns_for_cube: which data frame columns to use as cube values.
266
+ One of:
267
+ - "numerical": automatically pick numerical columns
268
+ - "all": use all columns as cube values
269
+ - list of column names
270
+ :param dimension_name: name of the "bands" dimension
271
+ :return: vector cube
272
+ """
273
+ available_columns = [c for c in data .columns if c != "geometry" ]
274
+
275
+ if columns_for_cube is None :
276
+ # TODO #114: what should default selection be?
277
+ columns_for_cube = cls .COLUMN_SELECTION_NUMERICAL
278
+
279
+ if columns_for_cube == cls .COLUMN_SELECTION_NUMERICAL :
280
+ columns_for_cube = [c for c in available_columns if numpy .issubdtype (data [c ].dtype , numpy .number )]
281
+ elif columns_for_cube == cls .COLUMN_SELECTION_ALL :
282
+ columns_for_cube = available_columns
283
+ elif isinstance (columns_for_cube , list ):
284
+ # TODO #114 limit to subset with available columns (and automatically fill in missing columns with nodata)?
285
+ columns_for_cube = columns_for_cube
286
+ else :
287
+ raise ValueError (columns_for_cube )
288
+ assert isinstance (columns_for_cube , list )
289
+
290
+ if columns_for_cube :
291
+ cube_df = data [columns_for_cube ]
292
+ # TODO: remove `columns_for_cube` from geopandas data frame?
293
+ # Enabling that triggers failure of som existing tests that use `aggregate_spatial`
294
+ # to "enrich" a vector cube with pre-existing properties
295
+ # Also see https://github.com/Open-EO/openeo-api/issues/504
296
+ # geometries_df = data.drop(columns=columns_for_cube)
297
+ geometries_df = data
298
+
299
+ # TODO: leverage pandas `to_xarray` and xarray `to_array` instead of this manual building?
300
+ cube : xarray .DataArray = xarray .DataArray (
301
+ data = cube_df .values ,
302
+ dims = [cls .DIM_GEOMETRIES , dimension_name ],
303
+ coords = {
304
+ cls .DIM_GEOMETRIES : data .geometry .index .to_list (),
305
+ dimension_name : cube_df .columns ,
306
+ },
307
+ )
308
+ return cls (geometries = geometries_df , cube = cube )
309
+
310
+ else :
311
+ return cls (geometries = data )
249
312
250
313
@classmethod
251
314
def from_fiona (
@@ -258,15 +321,21 @@ def from_fiona(
258
321
if len (paths ) != 1 :
259
322
# TODO #114 EP-3981: support multiple paths
260
323
raise FeatureUnsupportedException (message = "Loading a vector cube from multiple files is not supported" )
324
+ columns_for_cube = (options or {}).get ("columns_for_cube" , cls .COLUMN_SELECTION_NUMERICAL )
261
325
# TODO #114 EP-3981: lazy loading like/with DelayedVector
262
326
# note for GeoJSON: will consider Feature.id as well as Feature.properties.id
263
327
if "parquet" == driver :
264
- return cls .from_parquet (paths = paths )
328
+ return cls .from_parquet (paths = paths , columns_for_cube = columns_for_cube )
265
329
else :
266
- return cls (geometries = gpd .read_file (paths [0 ], driver = driver ))
330
+ gdf = gpd .read_file (paths [0 ], driver = driver )
331
+ return cls .from_geodataframe (gdf , columns_for_cube = columns_for_cube )
267
332
268
333
@classmethod
269
- def from_parquet (cls , paths : List [Union [str , Path ]]):
334
+ def from_parquet (
335
+ cls ,
336
+ paths : List [Union [str , Path ]],
337
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
338
+ ):
270
339
if len (paths ) != 1 :
271
340
# TODO #114 EP-3981: support multiple paths
272
341
raise FeatureUnsupportedException (
@@ -284,10 +353,14 @@ def from_parquet(cls, paths: List[Union[str, Path]]):
284
353
if "OGC:CRS84" in str (df .crs ) or "WGS 84 (CRS84)" in str (df .crs ):
285
354
# workaround for not being able to decode ogc:crs84
286
355
df .crs = CRS .from_epsg (4326 )
287
- return cls ( geometries = df )
356
+ return cls . from_geodataframe ( df , columns_for_cube = columns_for_cube )
288
357
289
358
@classmethod
290
- def from_geojson (cls , geojson : dict ) -> "DriverVectorCube" :
359
+ def from_geojson (
360
+ cls ,
361
+ geojson : dict ,
362
+ columns_for_cube : Union [List [str ], str ] = COLUMN_SELECTION_NUMERICAL ,
363
+ ) -> "DriverVectorCube" :
291
364
"""Construct vector cube from GeoJson dict structure"""
292
365
validate_geojson_coordinates (geojson )
293
366
# TODO support more geojson types?
@@ -305,7 +378,8 @@ def from_geojson(cls, geojson: dict) -> "DriverVectorCube":
305
378
raise FeatureUnsupportedException (
306
379
f"Can not construct DriverVectorCube from { geojson .get ('type' , type (geojson ))!r} "
307
380
)
308
- return cls (geometries = gpd .GeoDataFrame .from_features (features ))
381
+ gdf = gpd .GeoDataFrame .from_features (features )
382
+ return cls .from_geodataframe (gdf , columns_for_cube = columns_for_cube )
309
383
310
384
@classmethod
311
385
def from_geometry (
@@ -320,7 +394,9 @@ def from_geometry(
320
394
geometry = [geometry ]
321
395
return cls (geometries = gpd .GeoDataFrame (geometry = geometry ))
322
396
323
- def _as_geopandas_df (self ) -> gpd .GeoDataFrame :
397
+ def _as_geopandas_df (
398
+ self , flatten_prefix : Optional [str ] = None , flatten_name_joiner : str = "~"
399
+ ) -> gpd .GeoDataFrame :
324
400
"""Join geometries and cube as a geopandas dataframe"""
325
401
# TODO: avoid copy?
326
402
df = self ._geometries .copy (deep = True )
@@ -331,18 +407,20 @@ def _as_geopandas_df(self) -> gpd.GeoDataFrame:
331
407
if self ._cube .dims [1 :]:
332
408
stacked = self ._cube .stack (prop = self ._cube .dims [1 :])
333
409
log .info (f"Flattened cube component of vector cube to { stacked .shape [1 ]} properties" )
410
+ name_prefix = [flatten_prefix ] if flatten_prefix else []
334
411
for p in stacked .indexes ["prop" ]:
335
- name = "~" .join (str (x ) for x in [ self . _flatten_prefix ] + list (p ))
412
+ name = flatten_name_joiner .join (str (x ) for x in name_prefix + list (p ))
336
413
# TODO: avoid column collisions?
337
414
df [name ] = stacked .sel (prop = p )
338
415
else :
339
- df [self ._flatten_prefix ] = self ._cube
416
+ # TODO: better fallback column/property name in this case?
417
+ df [flatten_prefix or "_vc" ] = self ._cube
340
418
341
419
return df
342
420
343
- def to_geojson (self ) -> dict :
421
+ def to_geojson (self , flatten_prefix : Optional [ str ] = None ) -> dict :
344
422
"""Export as GeoJSON FeatureCollection."""
345
- return shapely .geometry .mapping (self ._as_geopandas_df ())
423
+ return shapely .geometry .mapping (self ._as_geopandas_df (flatten_prefix = flatten_prefix ))
346
424
347
425
def to_wkt (self ) -> List [str ]:
348
426
wkts = [str (g ) for g in self ._geometries .geometry ]
@@ -366,7 +444,8 @@ def write_assets(
366
444
)
367
445
return self .to_legacy_save_result ().write_assets (directory )
368
446
369
- self ._as_geopandas_df ().to_file (path , driver = format_info .fiona_driver )
447
+ gdf = self ._as_geopandas_df (flatten_prefix = options .get ("flatten_prefix" ))
448
+ gdf .to_file (path , driver = format_info .fiona_driver )
370
449
371
450
if not format_info .multi_file :
372
451
# single file format
@@ -461,6 +540,9 @@ def geometry_count(self) -> int:
461
540
def get_geometries (self ) -> Sequence [shapely .geometry .base .BaseGeometry ]:
462
541
return self ._geometries .geometry
463
542
543
+ def get_cube (self ) -> Optional [xarray .DataArray ]:
544
+ return self ._cube
545
+
464
546
def get_ids (self ) -> Optional [Sequence ]:
465
547
return self ._geometries .get ("id" )
466
548
@@ -471,8 +553,9 @@ def get_xarray_cube_basics(self) -> Tuple[tuple, dict]:
471
553
return dims , coords
472
554
473
555
def __eq__ (self , other ):
474
- return (isinstance (other , DriverVectorCube )
475
- and np .array_equal (self ._as_geopandas_df ().values , other ._as_geopandas_df ().values ))
556
+ return isinstance (other , DriverVectorCube ) and numpy .array_equal (
557
+ self ._as_geopandas_df ().values , other ._as_geopandas_df ().values
558
+ )
476
559
477
560
def fit_class_random_forest (
478
561
self ,
@@ -504,6 +587,49 @@ def buffer_points(self, distance: float = 10) -> "DriverVectorCube":
504
587
]
505
588
)
506
589
590
+ def apply_dimension (
591
+ self ,
592
+ process : dict ,
593
+ * ,
594
+ dimension : str ,
595
+ target_dimension : Optional [str ] = None ,
596
+ context : Optional [dict ] = None ,
597
+ env : EvalEnv ,
598
+ ) -> "DriverVectorCube" :
599
+ single_run_udf = SingleRunUDFProcessGraph .parse_or_none (process )
600
+
601
+ if single_run_udf :
602
+ # Process with single "run_udf" node
603
+ # TODO: check provided dimension with actual dimension of the cube
604
+ if dimension in (self .DIM_BANDS , self .DIM_PROPERTIES ) and target_dimension is None :
605
+ log .warning (
606
+ f"Using experimental feature: DriverVectorCube.apply_dimension along dim { dimension } and empty cube"
607
+ )
608
+ # TODO: this is non-standard special case: vector cube with only geometries, but no "cube" data
609
+ gdf = self ._as_geopandas_df ()
610
+ feature_collection = openeo .udf .FeatureCollection (id = "_" , data = gdf )
611
+ udf_data = openeo .udf .UdfData (
612
+ proj = {"EPSG" : self ._geometries .crs .to_epsg ()},
613
+ feature_collection_list = [feature_collection ],
614
+ user_context = context ,
615
+ )
616
+ log .info (f"[run_udf] Running UDF { str_truncate (single_run_udf .udf , width = 256 )!r} on { udf_data !r} " )
617
+ result_data = env .backend_implementation .processing .run_udf (udf = single_run_udf .udf , data = udf_data )
618
+ log .info (f"[run_udf] UDF resulted in { result_data !r} " )
619
+
620
+ if not isinstance (result_data , openeo .udf .UdfData ):
621
+ raise ValueError (f"UDF should return UdfData, but got { type (result_data )} " )
622
+ result_features = result_data .get_feature_collection_list ()
623
+ if not (result_features and len (result_features ) == 1 ):
624
+ raise ValueError (
625
+ f"UDF should return single feature collection but got { result_features and len (result_features )} "
626
+ )
627
+ return DriverVectorCube (geometries = result_features [0 ].data )
628
+
629
+ raise FeatureUnsupportedException (
630
+ message = f"DriverVectorCube.apply_dimension with { dimension = } and { bool (single_run_udf )= } "
631
+ )
632
+
507
633
508
634
class DriverMlModel :
509
635
"""Base class for driver-side 'ml-model' data structures"""
0 commit comments