diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 7b6f4c55..50647ec2 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -112,10 +112,10 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None } if types.issubset({"Point", "MultiPoint"}): ds = points_to_cf(geometries) - elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset( - {"LineString", "MultiLineString"} - ): - raise NotImplementedError("Only point geometries conversion is implemented.") + elif types.issubset({"LineString", "MultiLineString"}): + ds = lines_to_cf(geometries) + elif types.issubset({"Polygon", "MultiPolygon"}): + raise NotImplementedError("Polygon geometry conversion is not implemented.") else: raise ValueError( f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}" @@ -165,8 +165,10 @@ def cf_to_shapely(ds: xr.Dataset): geom_type = ds.geometry_container.attrs["geometry_type"] if geom_type == "point": geometries = cf_to_points(ds) - elif geom_type in ["line", "polygon"]: - raise NotImplementedError("Only point geometries conversion is implemented.") + elif geom_type == "line": + geometries = cf_to_lines(ds) + elif geom_type == "polygon": + raise NotImplementedError("Polygon geometry conversion is not implemented.") else: raise ValueError( f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}" @@ -295,3 +297,144 @@ def cf_to_points(ds: xr.Dataset): j += n return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) + + +def lines_to_cf(lines: xr.DataArray | Sequence): + """Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset. + + Parameters + ---------- + lines : sequence of shapely.geometry.Line or MultiLine + The sequence of [multi]lines to translate to a CF dataset. + + Returns + ------- + xr.Dataset + A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container' + and optionally 'part_node_count'. + """ + from shapely import to_ragged_array + + if isinstance(lines, xr.DataArray): + dim = lines.dims[0] + coord = lines[dim] if dim in lines.coords else None + lines_ = lines.values + else: + dim = "index" + coord = None + lines_ = np.array(lines) + + _, arr, offsets = to_ragged_array(lines_) + x = arr[:, 0] + y = arr[:, 1] + + node_count = np.diff(offsets[0]) + if len(offsets) == 1: + indices = offsets[0] + part_node_count = node_count + else: + indices = np.take(offsets[0], offsets[1]) + part_node_count = np.diff(indices) + + geom_coords = arr.take(indices[:-1], 0) + crdX = geom_coords[:, 0] + crdY = geom_coords[:, 1] + + ds = xr.Dataset( + data_vars={ + "node_count": xr.DataArray(node_count, dims=("segment",)), + "part_node_count": xr.DataArray(part_node_count, dims=(dim,)), + "geometry_container": xr.DataArray( + attrs={ + "geometry_type": "line", + "node_count": "node_count", + "part_node_count": "part_node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + }, + coords={ + "x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}), + "y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}), + "crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}), + "crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}), + }, + ) + + if coord is not None: + ds = ds.assign_coords({dim: coord}) + + # Special case when we have no MultiLines + if len(ds.part_node_count) == len(ds.node_count): + ds = ds.drop_vars("part_node_count") + del ds.geometry_container.attrs["part_node_count"] + return ds + + +def cf_to_lines(ds: xr.Dataset): + """Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable. + + Parameters + ---------- + ds : xr.Dataset + A dataset with CF-compliant line geometries. + Must have a "geometry_container" variable with at least a 'node_coordinates' attribute. + Must also have the two 1D variables listed by this attribute. + + Returns + ------- + geometry : xr.DataArray + A 1D array of shapely.geometry.[Multi]Line objects. + It has the same dimension as the ``part_node_count`` or the coordinates variables, or + ``'features'`` if those were not present in ``ds``. + """ + from shapely import GeometryType, from_ragged_array + + # Shorthand for convenience + geo = ds.geometry_container.attrs + + # The features dimension name, defaults to the one of 'part_node_count' + # or the dimension of the coordinates, if present. + feat_dim = None + if "coordinates" in geo: + xcoord_name, _ = geo["coordinates"].split(" ") + (feat_dim,) = ds[xcoord_name].dims + + x_name, y_name = geo["node_coordinates"].split(" ") + xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) + + node_count_name = geo.get("node_count") + part_node_count_name = geo.get("part_node_count") + if node_count_name is None: + raise ValueError("'node_count' must be provided for line geometries") + else: + node_count = ds[node_count_name] + + offset1 = np.insert(np.cumsum(node_count.values), 0, 0) + lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,)) + + if part_node_count_name is None: + # No part_node_count means there are no multilines + # And if we had no coordinates, then the dimension defaults to "index" + feat_dim = feat_dim or "index" + part_node_count = xr.DataArray(node_count.values, dims=(feat_dim,)) + if feat_dim in ds.coords: + part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]}) + + geoms = lines + else: + part_node_count = ds[part_node_count_name] + + # get index of offset1 values that are edges for part_node_count + offset2 = np.nonzero( + np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0)) + )[0] + multilines = from_ragged_array( + GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2) + ) + + # get items from lines or multilines depending on number of segments + geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) + + return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index 79106c69..bcb521b4 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -49,6 +49,83 @@ def geometry_ds(): return cf_ds, shp_ds +@pytest.fixture +def geometry_line_ds(): + from shapely.geometry import LineString, MultiLineString + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(3, dtype=object) + geoms[:] = [ + MultiLineString([[[0, 0], [1, 2]], [[4, 4], [5, 6]]]), + LineString([[0, 0], [1, 0], [1, 1]]), + LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"} + ), + y=xr.DataArray( + [0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"} + ), + part_node_count=xr.DataArray([4, 3, 3], dims=("index",)), + node_count=xr.DataArray([2, 2, 3, 3], dims=("segment",)), + crd_x=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "line", + "node_count": "node_count", + "part_node_count": "part_node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + +@pytest.fixture +def geometry_line_without_multilines_ds(): + from shapely.geometry import LineString + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + LineString([[0, 0], [1, 0], [1, 1]]), + LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray([0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}), + y=xr.DataArray([0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}), + node_count=xr.DataArray([3, 3], dims=("segment",)), + crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "line", + "node_count": "node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + @requires_shapely def test_shapely_to_cf(geometry_ds): from shapely.geometry import Point @@ -87,12 +164,45 @@ def test_shapely_to_cf(geometry_ds): assert set(out.dims) == {"features", "node"} +@requires_shapely +def test_shapely_to_cf_for_lines_as_da(geometry_line_ds): + expected, in_da = geometry_line_ds + + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + in_da = in_da.assign_coords(index=["a", "b", "c"]) + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b", "c"])) + + +@requires_shapely +def test_shapely_to_cf_for_lines_as_sequence(geometry_line_ds): + expected, in_da = geometry_line_ds + actual = cfxr.shapely_to_cf(in_da.values) + xr.testing.assert_identical(actual, expected) + + +@requires_shapely +def test_shapely_to_cf_for_lines_without_multilines( + geometry_line_without_multilines_ds, +): + expected, in_da = geometry_line_without_multilines_ds + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + @requires_shapely def test_shapely_to_cf_errors(): - from shapely.geometry import LineString, Point + from shapely.geometry import Point, Polygon - geoms = [LineString([[1, 2], [2, 3]]), LineString([[2, 3, 4], [4, 3, 2]])] - with pytest.raises(NotImplementedError, match="Only point geometries conversion"): + geoms = [ + Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]), + Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]), + ] + with pytest.raises( + NotImplementedError, match="Polygon geometry conversion is not implemented" + ): cfxr.shapely_to_cf(geoms) geoms.append(Point(1, 2)) @@ -122,16 +232,47 @@ def test_cf_to_shapely(geometry_ds): @requires_shapely -def test_cf_to_shapely_errors(geometry_ds): - in_ds, expected = geometry_ds - in_ds.geometry_container.attrs["geometry_type"] = "line" - with pytest.raises(NotImplementedError, match="Only point geometries conversion"): +def test_cf_to_shapely_for_lines(geometry_line_ds): + in_ds, expected = geometry_line_ds + + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + +@requires_shapely +def test_cf_to_shapely_for_lines_without_multilines( + geometry_line_without_multilines_ds, +): + in_ds, expected = geometry_line_without_multilines_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual, expected) + + in_ds = in_ds.assign_coords(index=["b", "c"]) + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical( + actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"]) + ) + + +@requires_shapely +def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds): + in_ds, _ = geometry_ds + in_ds.geometry_container.attrs["geometry_type"] = "polygon" + with pytest.raises(NotImplementedError, match="Polygon geometry"): cfxr.cf_to_shapely(in_ds) in_ds.geometry_container.attrs["geometry_type"] = "punkt" with pytest.raises(ValueError, match="Valid CF geometry types are "): cfxr.cf_to_shapely(in_ds) + in_ds, _ = geometry_line_ds + del in_ds.geometry_container.attrs["node_count"] + with pytest.raises(ValueError, match="'node_count' must be provided"): + cfxr.cf_to_shapely(in_ds) + @requires_shapely def test_reshape_unique_geometries(geometry_ds): diff --git a/pyproject.toml b/pyproject.toml index 54485025..c072f2a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,6 +132,7 @@ module=[ "pint", "matplotlib", "pytest", + "shapely", "shapely.geometry", "xarray.core.pycompat", ]