From 923d72e321b5afbafff61b099e6a87a45f8fc60f Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Sun, 16 Jul 2023 15:30:27 -0400 Subject: [PATCH 01/12] Add conversion between cf and shapely for lines --- cf_xarray/geometry.py | 67 +++++++++++++++++++++++++++++++- cf_xarray/tests/test_geometry.py | 53 +++++++++++++++++++++++++ 2 files changed, 119 insertions(+), 1 deletion(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 148a8d1b..314a876e 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -164,7 +164,7 @@ def cf_to_shapely(ds: xr.Dataset): if geom_type == "point": geometries = cf_to_points(ds) elif geom_type in ["line", "polygon"]: - raise NotImplementedError("Only point geometries conversion is implemented.") + geometries = cf_to_lines(ds) else: raise ValueError( f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}" @@ -293,3 +293,68 @@ def cf_to_points(ds: xr.Dataset): j += n return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) + + +def cf_to_lines(ds: xr.Dataset): + """Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable. + + Parameters + ---------- + ds : xr.Dataset + A dataset with CF-compliant line geometries. + Must have a "geometry_container" variable with at least a 'node_coordinates' attribute. + Must also have the two 1D variables listed by this attribute. + + Returns + ------- + geometry : xr.DataArray + A 1D array of shapely.geometry.[Multi]Point objects. + It has the same dimension as the ``node_count`` or the coordinates variables, or + ``'features'`` if those were not present in ``ds``. + """ + from shapely.geometry import LineString, MultiLineString + + # Shorthand for convenience + geo = ds.geometry_container.attrs + + # The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present. + feat_dim = None + if "coordinates" in geo and feat_dim is None: + xcoord_name, _ = geo["coordinates"].split(" ") + (feat_dim,) = ds[xcoord_name].dims + + x_name, y_name = geo["node_coordinates"].split(" ") + xy = np.stack([ds[x_name].values, ds[y_name].values], axis=-1) + + node_count_name = geo.get("node_count") + part_node_count_name = geo.get("part_node_count") + if node_count_name is None: + raise ValueError("'node_count' must be provided for line geometries") + else: + node_count = ds[node_count_name] + + if part_node_count_name is None: + # No part_node_count means there is only one line + # And if we had no coordinates, then the dimension defaults to "features" + feat_dim = feat_dim or "features" + part_node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,)) + if feat_dim in ds.coords: + part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]}) + else: + part_node_count = ds[part_node_count_name] + + j = 0 # The index of the first node. + geoms = np.empty(part_node_count.shape, dtype=object) + node_count_cumsum = np.insert(np.cumsum(node_count), 0, 0) + + # i is the feature index, n its number of nodes + for i, n in enumerate(part_node_count.values): + subset = node_count_cumsum[(j <= node_count_cumsum) & (node_count_cumsum <= n)] + indexes = np.stack([subset[:-1], np.roll(subset, 2)[:-1]]).transpose() + if indexes.shape == (0, 2): + geoms[i] = LineString(xy[j : j + n, :]) + else: + geoms[i] = MultiLineString([xy[ii[0]: ii[1], :] for ii in indexes]) + j += n + + return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) \ No newline at end of file diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index 79106c69..4e4df43c 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -49,6 +49,49 @@ def geometry_ds(): return cf_ds, shp_ds +@pytest.fixture +def geometry_line_ds(): + from shapely.geometry import MultiLineString, LineString + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(3, dtype=object) + geoms[:] = [ + MultiLineString([[[0, 0], [1, 2]], [[4, 4], [5, 6]]]), + LineString([[0, 0], [1, 0], [1, 1]]), + LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]), + ] + + ds = xr.Dataset( + { + "data": xr.DataArray(range(len(geoms)), dims=("index",)), + "time": xr.DataArray([0, 1, 2], dims=("index",)), + } + ) + shp_ds = ds.assign(geometry=xr.DataArray(geoms, dims=("index",))) + + cf_ds = ds.assign( + x=xr.DataArray([0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}), + y=xr.DataArray([0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}), + part_node_count=xr.DataArray([4, 3, 3], dims=("index",)), + node_count=xr.DataArray([2, 2, 3, 3], dims=("counter",)), + crd_x=xr.DataArray([1.0, 3.0, 4.0], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([2.0, 4.0, 5.0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "line", + "node_count": "node_count", + "part_node_count": "part_node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_ds + + @requires_shapely def test_shapely_to_cf(geometry_ds): from shapely.geometry import Point @@ -121,6 +164,16 @@ def test_cf_to_shapely(geometry_ds): assert out.dims == ("index",) +@requires_shapely +def test_cf_to_shapely_for_line(geometry_line_ds): + in_ds, expected = geometry_line_ds + + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected.geometry) + + @requires_shapely def test_cf_to_shapely_errors(geometry_ds): in_ds, expected = geometry_ds From 1fcdf22313e908a9fa38deda6712cd124c6b617f Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 17 Jul 2023 08:47:12 -0400 Subject: [PATCH 02/12] Make polygons raise --- cf_xarray/geometry.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 314a876e..95807328 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -163,8 +163,10 @@ def cf_to_shapely(ds: xr.Dataset): geom_type = ds.geometry_container.attrs["geometry_type"] if geom_type == "point": geometries = cf_to_points(ds) - elif geom_type in ["line", "polygon"]: + elif geom_type == "line": geometries = cf_to_lines(ds) + elif geom_type == "polygon": + raise NotImplementedError("Polygon geometry conversion is not implemented.") else: raise ValueError( f"Valid CF geometry types are 'point', 'line' and 'polygon'. Got {geom_type}" @@ -317,7 +319,8 @@ def cf_to_lines(ds: xr.Dataset): # Shorthand for convenience geo = ds.geometry_container.attrs - # The features dimension name, defaults to the one of 'node_count' or the dimension of the coordinates, if present. + # The features dimension name, defaults to the one of 'part_node_count' + # or the dimension of the coordinates, if present. feat_dim = None if "coordinates" in geo and feat_dim is None: xcoord_name, _ = geo["coordinates"].split(" ") @@ -357,4 +360,4 @@ def cf_to_lines(ds: xr.Dataset): geoms[i] = MultiLineString([xy[ii[0]: ii[1], :] for ii in indexes]) j += n - return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) \ No newline at end of file + return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) From 06488ba25bd193a6f5530036c8d966c7c4a1e5e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 17 Jul 2023 12:49:28 +0000 Subject: [PATCH 03/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cf_xarray/geometry.py | 2 +- cf_xarray/tests/test_geometry.py | 10 +++++++--- 2 files changed, 8 insertions(+), 4 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 95807328..8314c38c 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -357,7 +357,7 @@ def cf_to_lines(ds: xr.Dataset): if indexes.shape == (0, 2): geoms[i] = LineString(xy[j : j + n, :]) else: - geoms[i] = MultiLineString([xy[ii[0]: ii[1], :] for ii in indexes]) + geoms[i] = MultiLineString([xy[ii[0] : ii[1], :] for ii in indexes]) j += n return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index 4e4df43c..dac90e52 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -51,7 +51,7 @@ def geometry_ds(): @pytest.fixture def geometry_line_ds(): - from shapely.geometry import MultiLineString, LineString + from shapely.geometry import LineString, MultiLineString # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. geoms = np.empty(3, dtype=object) @@ -70,8 +70,12 @@ def geometry_line_ds(): shp_ds = ds.assign(geometry=xr.DataArray(geoms, dims=("index",))) cf_ds = ds.assign( - x=xr.DataArray([0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}), - y=xr.DataArray([0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}), + x=xr.DataArray( + [0, 1, 4, 5, 0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"} + ), + y=xr.DataArray( + [0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"} + ), part_node_count=xr.DataArray([4, 3, 3], dims=("index",)), node_count=xr.DataArray([2, 2, 3, 3], dims=("counter",)), crd_x=xr.DataArray([1.0, 3.0, 4.0], dims=("index",), attrs={"nodes": "x"}), From 1346464431299c3ab7239478bcb460408a11b9a5 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 21 Jul 2023 14:42:00 -0400 Subject: [PATCH 04/12] Switch to using `linestrings` and `multilinestrings` --- cf_xarray/geometry.py | 38 +++++++++++++++++++++----------- cf_xarray/tests/test_geometry.py | 6 ++--- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 8314c38c..3481a83e 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -314,7 +314,7 @@ def cf_to_lines(ds: xr.Dataset): It has the same dimension as the ``node_count`` or the coordinates variables, or ``'features'`` if those were not present in ``ds``. """ - from shapely.geometry import LineString, MultiLineString + from shapely import linestrings, multilinestrings # Shorthand for convenience geo = ds.geometry_container.attrs @@ -346,18 +346,30 @@ def cf_to_lines(ds: xr.Dataset): else: part_node_count = ds[part_node_count_name] - j = 0 # The index of the first node. - geoms = np.empty(part_node_count.shape, dtype=object) - node_count_cumsum = np.insert(np.cumsum(node_count), 0, 0) + # create all the line segments + line_indices = np.repeat(np.arange(len(node_count)), node_count) + lines = linestrings(xy, indices=line_indices) + + # contruct an array where every row corresponds to a geometry object + # and each value indicates whether the line at that index should + # be included in that object. + maxes = np.cumsum(part_node_count) + mins = np.insert(maxes[:-1], 0, 0) + dummy = np.broadcast_to(np.cumsum(node_count), (len(part_node_count), len(node_count))) + is_multi = np.where( + (dummy <= np.expand_dims(maxes, axis=1)) & (dummy > np.expand_dims(mins, axis=1)), + np.ones_like(dummy), + np.zeros_like(dummy) + ) - # i is the feature index, n its number of nodes - for i, n in enumerate(part_node_count.values): - subset = node_count_cumsum[(j <= node_count_cumsum) & (node_count_cumsum <= n)] - indexes = np.stack([subset[:-1], np.roll(subset, 2)[:-1]]).transpose() - if indexes.shape == (0, 2): - geoms[i] = LineString(xy[j : j + n, :]) - else: - geoms[i] = MultiLineString([xy[ii[0] : ii[1], :] for ii in indexes]) - j += n + # create multilinestrings for each set of lines. Note that we create multiline strings + # even for cases where there is only one line. We will pick which ones we need later on + multiline_indices = np.where(is_multi == 1)[0] + multilines = multilinestrings(lines, indices=multiline_indices) + + # construct the final geometry by pulling items from the lines or multilines based on + # the number of lines that are mapped to each object. + _, index, counts = np.unique(multiline_indices, return_counts=True, return_index=True) + geoms = np.where(counts == 1, np.array(lines[index]), multilines) return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index dac90e52..4c29fab8 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -180,9 +180,9 @@ def test_cf_to_shapely_for_line(geometry_line_ds): @requires_shapely def test_cf_to_shapely_errors(geometry_ds): - in_ds, expected = geometry_ds - in_ds.geometry_container.attrs["geometry_type"] = "line" - with pytest.raises(NotImplementedError, match="Only point geometries conversion"): + in_ds, _ = geometry_ds + in_ds.geometry_container.attrs["geometry_type"] = "polygon" + with pytest.raises(NotImplementedError, match="Polygon geometry"): cfxr.cf_to_shapely(in_ds) in_ds.geometry_container.attrs["geometry_type"] = "punkt" From 3e21f12a2e2852cdf1e8af9ca41ed35c850c7ea2 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 3 Nov 2023 21:39:31 +0000 Subject: [PATCH 05/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cf_xarray/geometry.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 3481a83e..6ce2c341 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -355,11 +355,14 @@ def cf_to_lines(ds: xr.Dataset): # be included in that object. maxes = np.cumsum(part_node_count) mins = np.insert(maxes[:-1], 0, 0) - dummy = np.broadcast_to(np.cumsum(node_count), (len(part_node_count), len(node_count))) + dummy = np.broadcast_to( + np.cumsum(node_count), (len(part_node_count), len(node_count)) + ) is_multi = np.where( - (dummy <= np.expand_dims(maxes, axis=1)) & (dummy > np.expand_dims(mins, axis=1)), + (dummy <= np.expand_dims(maxes, axis=1)) + & (dummy > np.expand_dims(mins, axis=1)), np.ones_like(dummy), - np.zeros_like(dummy) + np.zeros_like(dummy), ) # create multilinestrings for each set of lines. Note that we create multiline strings @@ -369,7 +372,9 @@ def cf_to_lines(ds: xr.Dataset): # construct the final geometry by pulling items from the lines or multilines based on # the number of lines that are mapped to each object. - _, index, counts = np.unique(multiline_indices, return_counts=True, return_index=True) + _, index, counts = np.unique( + multiline_indices, return_counts=True, return_index=True + ) geoms = np.where(counts == 1, np.array(lines[index]), multilines) return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) From 0b2e075fd513905877ed191fbf632877528cc0a3 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 6 Nov 2023 14:53:12 -0500 Subject: [PATCH 06/12] Add shapely to CF for lines --- cf_xarray/geometry.py | 89 +++++++++++++++++++++++++++++--- cf_xarray/tests/test_geometry.py | 34 ++++++------ 2 files changed, 100 insertions(+), 23 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 6ce2c341..2d40b5a0 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -111,10 +111,10 @@ def shapely_to_cf(geometries: xr.DataArray | Sequence, grid_mapping: str | None } if types.issubset({"Point", "MultiPoint"}): ds = points_to_cf(geometries) - elif types.issubset({"Polygon", "MultiPolygon"}) or types.issubset( - {"LineString", "MultiLineString"} - ): - raise NotImplementedError("Only point geometries conversion is implemented.") + elif types.issubset({"LineString", "MultiLineString"}): + ds = lines_to_cf(geometries) + elif types.issubset({"Polygon", "MultiPolygon"}): + raise NotImplementedError("Polygon geometry conversion is not implemented.") else: raise ValueError( f"Mixed geometry types are not supported in CF-compliant datasets. Got {types}" @@ -297,6 +297,79 @@ def cf_to_points(ds: xr.Dataset): return xr.DataArray(geoms, dims=node_count.dims, coords=node_count.coords) +def lines_to_cf(lines: xr.DataArray | Sequence): + """Convert an iterable of lines (shapely.geometry.[Multi]Line) into a CF-compliant geometry dataset. + + Parameters + ---------- + lines : sequence of shapely.geometry.Line or MultiLine + The sequence of [multi]lines to translate to a CF dataset. + + Returns + ------- + xr.Dataset + A Dataset with variables 'x', 'y', 'crd_x', 'crd_y', 'node_count' and 'geometry_container' + and optionally 'part_node_count'. + """ + from shapely import to_ragged_array + + if isinstance(lines, xr.DataArray): + dim = lines.dims[0] + coord = lines[dim] if dim in lines.coords else None + lines_ = lines.values + else: + dim = "index" + coord = None + lines_ = np.array(lines) + + _, arr, offsets = to_ragged_array(lines_) + x = arr[:, 0] + y = arr[:, 1] + + node_count = np.diff(offsets[0]) + if len(offsets) == 1: + indices = offsets[0] + part_node_count = node_count + else: + indices = np.take(offsets[0], offsets[1]) + part_node_count = np.diff(indices) + + geom_coords = arr.take(indices[:-1], 0) + crdX = geom_coords[:, 0] + crdY = geom_coords[:, 1] + + ds = xr.Dataset( + data_vars={ + "node_count": xr.DataArray(node_count, dims=("segment",)), + "part_node_count": xr.DataArray(part_node_count, dims=(dim,)), + "geometry_container": xr.DataArray( + attrs={ + "geometry_type": "line", + "node_count": "node_count", + "part_node_count": "part_node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + }, + coords={ + "x": xr.DataArray(x, dims=("node",), attrs={"axis": "X"}), + "y": xr.DataArray(y, dims=("node",), attrs={"axis": "Y"}), + "crd_x": xr.DataArray(crdX, dims=(dim,), attrs={"nodes": "x"}), + "crd_y": xr.DataArray(crdY, dims=(dim,), attrs={"nodes": "y"}), + }, + ) + + if coord is not None: + ds = ds.assign_coords({dim: coord}) + + # Special case when we have no MultiLines + if len(ds.part_node_count) == len(ds.node_count): + ds = ds.drop_vars("part_node_count") + del ds.geometry_container.attrs["part_node_count"] + return ds + + def cf_to_lines(ds: xr.Dataset): """Convert line geometries stored in a CF-compliant way to shapely lines stored in a single variable. @@ -310,8 +383,8 @@ def cf_to_lines(ds: xr.Dataset): Returns ------- geometry : xr.DataArray - A 1D array of shapely.geometry.[Multi]Point objects. - It has the same dimension as the ``node_count`` or the coordinates variables, or + A 1D array of shapely.geometry.[Multi]Line objects. + It has the same dimension as the ``part_node_count`` or the coordinates variables, or ``'features'`` if those were not present in ``ds``. """ from shapely import linestrings, multilinestrings @@ -350,10 +423,10 @@ def cf_to_lines(ds: xr.Dataset): line_indices = np.repeat(np.arange(len(node_count)), node_count) lines = linestrings(xy, indices=line_indices) - # contruct an array where every row corresponds to a geometry object + # construct an array where every row corresponds to a geometry object # and each value indicates whether the line at that index should # be included in that object. - maxes = np.cumsum(part_node_count) + maxes = np.cumsum(part_node_count.values) mins = np.insert(maxes[:-1], 0, 0) dummy = np.broadcast_to( np.cumsum(node_count), (len(part_node_count), len(node_count)) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index 4c29fab8..2ce1e10c 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -61,13 +61,8 @@ def geometry_line_ds(): LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]), ] - ds = xr.Dataset( - { - "data": xr.DataArray(range(len(geoms)), dims=("index",)), - "time": xr.DataArray([0, 1, 2], dims=("index",)), - } - ) - shp_ds = ds.assign(geometry=xr.DataArray(geoms, dims=("index",))) + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") cf_ds = ds.assign( x=xr.DataArray( @@ -77,9 +72,9 @@ def geometry_line_ds(): [0, 2, 4, 6, 0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"} ), part_node_count=xr.DataArray([4, 3, 3], dims=("index",)), - node_count=xr.DataArray([2, 2, 3, 3], dims=("counter",)), - crd_x=xr.DataArray([1.0, 3.0, 4.0], dims=("index",), attrs={"nodes": "x"}), - crd_y=xr.DataArray([2.0, 4.0, 5.0], dims=("index",), attrs={"nodes": "y"}), + node_count=xr.DataArray([2, 2, 3, 3], dims=("segment",)), + crd_x=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0.0, 0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), geometry_container=xr.DataArray( attrs={ "geometry_type": "line", @@ -93,7 +88,7 @@ def geometry_line_ds(): cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) - return cf_ds, shp_ds + return cf_ds, shp_da @requires_shapely @@ -136,10 +131,10 @@ def test_shapely_to_cf(geometry_ds): @requires_shapely def test_shapely_to_cf_errors(): - from shapely.geometry import LineString, Point + from shapely.geometry import Polygon, Point - geoms = [LineString([[1, 2], [2, 3]]), LineString([[2, 3, 4], [4, 3, 2]])] - with pytest.raises(NotImplementedError, match="Only point geometries conversion"): + geoms = [Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]), Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]])] + with pytest.raises(NotImplementedError, match="Polygon geometry conversion is not implemented"): cfxr.shapely_to_cf(geoms) geoms.append(Point(1, 2)) @@ -175,7 +170,16 @@ def test_cf_to_shapely_for_line(geometry_line_ds): actual = cfxr.cf_to_shapely(in_ds) assert actual.dims == ("index",) - xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected.geometry) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) + + +@requires_shapely +def test_shapely_to_cf_for_line(geometry_line_ds): + expected, in_da = geometry_line_ds + + actual = cfxr.shapely_to_cf(in_da) + + xr.testing.assert_identical(actual, expected) @requires_shapely From dd93e372446744c8ec47ac6e982955e9e8ae0c55 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Nov 2023 19:54:43 +0000 Subject: [PATCH 07/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cf_xarray/geometry.py | 2 +- cf_xarray/tests/test_geometry.py | 11 ++++++++--- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 2d40b5a0..f8ba4686 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -322,7 +322,7 @@ def lines_to_cf(lines: xr.DataArray | Sequence): coord = None lines_ = np.array(lines) - _, arr, offsets = to_ragged_array(lines_) + _, arr, offsets = to_ragged_array(lines_) x = arr[:, 0] y = arr[:, 1] diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index 2ce1e10c..0f3d199b 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -131,10 +131,15 @@ def test_shapely_to_cf(geometry_ds): @requires_shapely def test_shapely_to_cf_errors(): - from shapely.geometry import Polygon, Point + from shapely.geometry import Point, Polygon - geoms = [Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]), Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]])] - with pytest.raises(NotImplementedError, match="Polygon geometry conversion is not implemented"): + geoms = [ + Polygon([[1, 1], [1, 3], [3, 3], [1, 1]]), + Polygon([[1, 1, 4], [1, 3, 4], [3, 3, 3], [1, 1, 4]]), + ] + with pytest.raises( + NotImplementedError, match="Polygon geometry conversion is not implemented" + ): cfxr.shapely_to_cf(geoms) geoms.append(Point(1, 2)) From 1a474df885750e7a99036a2a523f4d9bb7592c5b Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Mon, 6 Nov 2023 15:54:44 -0500 Subject: [PATCH 08/12] Use `from_ragged_array` --- cf_xarray/geometry.py | 49 +++++++++++++------------------------------ 1 file changed, 15 insertions(+), 34 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index f8ba4686..c2a5a899 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -387,7 +387,7 @@ def cf_to_lines(ds: xr.Dataset): It has the same dimension as the ``part_node_count`` or the coordinates variables, or ``'features'`` if those were not present in ``ds``. """ - from shapely import linestrings, multilinestrings + from shapely import from_ragged_array, GeometryType # Shorthand for convenience geo = ds.geometry_container.attrs @@ -409,45 +409,26 @@ def cf_to_lines(ds: xr.Dataset): else: node_count = ds[node_count_name] + offset1 = np.insert(np.cumsum(node_count.values), 0, 0) + lines = from_ragged_array(GeometryType.LINESTRING, xy, offsets=(offset1,)) + if part_node_count_name is None: - # No part_node_count means there is only one line - # And if we had no coordinates, then the dimension defaults to "features" - feat_dim = feat_dim or "features" + # No part_node_count means there are no multilines + # And if we had no coordinates, then the dimension defaults to "index" + feat_dim = feat_dim or "index" part_node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,)) if feat_dim in ds.coords: part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]}) + + geoms = lines else: part_node_count = ds[part_node_count_name] - # create all the line segments - line_indices = np.repeat(np.arange(len(node_count)), node_count) - lines = linestrings(xy, indices=line_indices) - - # construct an array where every row corresponds to a geometry object - # and each value indicates whether the line at that index should - # be included in that object. - maxes = np.cumsum(part_node_count.values) - mins = np.insert(maxes[:-1], 0, 0) - dummy = np.broadcast_to( - np.cumsum(node_count), (len(part_node_count), len(node_count)) - ) - is_multi = np.where( - (dummy <= np.expand_dims(maxes, axis=1)) - & (dummy > np.expand_dims(mins, axis=1)), - np.ones_like(dummy), - np.zeros_like(dummy), - ) - - # create multilinestrings for each set of lines. Note that we create multiline strings - # even for cases where there is only one line. We will pick which ones we need later on - multiline_indices = np.where(is_multi == 1)[0] - multilines = multilinestrings(lines, indices=multiline_indices) - - # construct the final geometry by pulling items from the lines or multilines based on - # the number of lines that are mapped to each object. - _, index, counts = np.unique( - multiline_indices, return_counts=True, return_index=True - ) - geoms = np.where(counts == 1, np.array(lines[index]), multilines) + # get index of offset1 values that are edges for part_node_count + offset2 = np.nonzero(np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0)))[0] + multilines = from_ragged_array(GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2)) + + # get items from lines or multilines depending on number of segments + geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) return xr.DataArray(geoms, dims=part_node_count.dims, coords=part_node_count.coords) From 94fe4b039dc9caef946e29c396c6a598f34a8dee Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 6 Nov 2023 20:55:11 +0000 Subject: [PATCH 09/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cf_xarray/geometry.py | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index c2a5a899..1a17d55c 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -387,7 +387,7 @@ def cf_to_lines(ds: xr.Dataset): It has the same dimension as the ``part_node_count`` or the coordinates variables, or ``'features'`` if those were not present in ``ds``. """ - from shapely import from_ragged_array, GeometryType + from shapely import GeometryType, from_ragged_array # Shorthand for convenience geo = ds.geometry_container.attrs @@ -419,15 +419,19 @@ def cf_to_lines(ds: xr.Dataset): part_node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,)) if feat_dim in ds.coords: part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]}) - + geoms = lines else: part_node_count = ds[part_node_count_name] # get index of offset1 values that are edges for part_node_count - offset2 = np.nonzero(np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0)))[0] - multilines = from_ragged_array(GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2)) - + offset2 = np.nonzero( + np.isin(offset1, np.insert(np.cumsum(part_node_count), 0, 0)) + )[0] + multilines = from_ragged_array( + GeometryType.MULTILINESTRING, xy, offsets=(offset1, offset2) + ) + # get items from lines or multilines depending on number of segments geoms = np.where(np.diff(offset2) == 1, lines[offset2[:-1]], multilines) From 6b228e18257d0345d1b9f65e986a0b09d697a53a Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Thu, 30 Nov 2023 09:46:12 -0500 Subject: [PATCH 10/12] Fix mypy --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 54485025..c072f2a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -132,6 +132,7 @@ module=[ "pint", "matplotlib", "pytest", + "shapely", "shapely.geometry", "xarray.core.pycompat", ] From 61fd246605c5933fad6c9971568978d8a51de1e1 Mon Sep 17 00:00:00 2001 From: Julia Signell Date: Fri, 1 Dec 2023 12:47:50 -0500 Subject: [PATCH 11/12] Full test coverage for patch --- cf_xarray/geometry.py | 4 +- cf_xarray/tests/test_geometry.py | 90 ++++++++++++++++++++++++++++---- 2 files changed, 83 insertions(+), 11 deletions(-) diff --git a/cf_xarray/geometry.py b/cf_xarray/geometry.py index 38b88815..50647ec2 100644 --- a/cf_xarray/geometry.py +++ b/cf_xarray/geometry.py @@ -397,7 +397,7 @@ def cf_to_lines(ds: xr.Dataset): # The features dimension name, defaults to the one of 'part_node_count' # or the dimension of the coordinates, if present. feat_dim = None - if "coordinates" in geo and feat_dim is None: + if "coordinates" in geo: xcoord_name, _ = geo["coordinates"].split(" ") (feat_dim,) = ds[xcoord_name].dims @@ -418,7 +418,7 @@ def cf_to_lines(ds: xr.Dataset): # No part_node_count means there are no multilines # And if we had no coordinates, then the dimension defaults to "index" feat_dim = feat_dim or "index" - part_node_count = xr.DataArray([1] * xy.shape[0], dims=(feat_dim,)) + part_node_count = xr.DataArray(node_count.values, dims=(feat_dim,)) if feat_dim in ds.coords: part_node_count = part_node_count.assign_coords({feat_dim: ds[feat_dim]}) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index 0f3d199b..c9140df2 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -48,7 +48,6 @@ def geometry_ds(): return cf_ds, shp_ds - @pytest.fixture def geometry_line_ds(): from shapely.geometry import LineString, MultiLineString @@ -90,6 +89,45 @@ def geometry_line_ds(): return cf_ds, shp_da +@pytest.fixture +def geometry_line_without_multilines_ds(): + from shapely.geometry import LineString + + # empty/fill workaround to avoid numpy deprecation(warning) due to the array interface of shapely geometries. + geoms = np.empty(2, dtype=object) + geoms[:] = [ + LineString([[0, 0], [1, 0], [1, 1]]), + LineString([[1.0, 1.0], [2.0, 2.0], [1.7, 9.5]]), + ] + + ds = xr.Dataset() + shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") + + cf_ds = ds.assign( + x=xr.DataArray( + [0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"} + ), + y=xr.DataArray( + [0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"} + ), + node_count=xr.DataArray([3, 3], dims=("segment",)), + crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), + crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), + geometry_container=xr.DataArray( + attrs={ + "geometry_type": "line", + "node_count": "node_count", + "node_coordinates": "x y", + "coordinates": "crd_x crd_y", + } + ), + ) + + cf_ds = cf_ds.set_coords(["x", "y", "crd_x", "crd_y"]) + + return cf_ds, shp_da + + @requires_shapely def test_shapely_to_cf(geometry_ds): @@ -129,6 +167,32 @@ def test_shapely_to_cf(geometry_ds): assert set(out.dims) == {"features", "node"} +@requires_shapely +def test_shapely_to_cf_for_lines_as_da(geometry_line_ds): + expected, in_da = geometry_line_ds + + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + in_da = in_da.assign_coords(index=["a", "b", "c"]) + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected.assign_coords(index=["a", "b", "c"])) + + +@requires_shapely +def test_shapely_to_cf_for_lines_as_sequence(geometry_line_ds): + expected, in_da = geometry_line_ds + actual = cfxr.shapely_to_cf(in_da.values) + xr.testing.assert_identical(actual, expected) + + +@requires_shapely +def test_shapely_to_cf_for_lines_without_multilines(geometry_line_without_multilines_ds): + expected, in_da = geometry_line_without_multilines_ds + actual = cfxr.shapely_to_cf(in_da) + xr.testing.assert_identical(actual, expected) + + @requires_shapely def test_shapely_to_cf_errors(): from shapely.geometry import Point, Polygon @@ -169,26 +233,29 @@ def test_cf_to_shapely(geometry_ds): @requires_shapely -def test_cf_to_shapely_for_line(geometry_line_ds): +def test_cf_to_shapely_for_lines(geometry_line_ds): in_ds, expected = geometry_line_ds actual = cfxr.cf_to_shapely(in_ds) assert actual.dims == ("index",) - xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected) @requires_shapely -def test_shapely_to_cf_for_line(geometry_line_ds): - expected, in_da = geometry_line_ds - - actual = cfxr.shapely_to_cf(in_da) - +def test_cf_to_shapely_for_lines_without_multilines(geometry_line_without_multilines_ds): + in_ds, expected = geometry_line_without_multilines_ds + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) xr.testing.assert_identical(actual, expected) + in_ds = in_ds.assign_coords(index=["b", "c"]) + actual = cfxr.cf_to_shapely(in_ds) + assert actual.dims == ("index",) + xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"])) + @requires_shapely -def test_cf_to_shapely_errors(geometry_ds): +def test_cf_to_shapely_errors(geometry_ds, geometry_line_ds): in_ds, _ = geometry_ds in_ds.geometry_container.attrs["geometry_type"] = "polygon" with pytest.raises(NotImplementedError, match="Polygon geometry"): @@ -198,6 +265,11 @@ def test_cf_to_shapely_errors(geometry_ds): with pytest.raises(ValueError, match="Valid CF geometry types are "): cfxr.cf_to_shapely(in_ds) + in_ds, _ = geometry_line_ds + del in_ds.geometry_container.attrs["node_count"] + with pytest.raises(ValueError, match="'node_count' must be provided"): + cfxr.cf_to_shapely(in_ds) + @requires_shapely def test_reshape_unique_geometries(geometry_ds): From f9f00cd6456142deaf7d389967399fb5be70367d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 1 Dec 2023 17:48:17 +0000 Subject: [PATCH 12/12] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- cf_xarray/tests/test_geometry.py | 23 +++++++++++++---------- 1 file changed, 13 insertions(+), 10 deletions(-) diff --git a/cf_xarray/tests/test_geometry.py b/cf_xarray/tests/test_geometry.py index c9140df2..bcb521b4 100644 --- a/cf_xarray/tests/test_geometry.py +++ b/cf_xarray/tests/test_geometry.py @@ -48,6 +48,7 @@ def geometry_ds(): return cf_ds, shp_ds + @pytest.fixture def geometry_line_ds(): from shapely.geometry import LineString, MultiLineString @@ -89,6 +90,7 @@ def geometry_line_ds(): return cf_ds, shp_da + @pytest.fixture def geometry_line_without_multilines_ds(): from shapely.geometry import LineString @@ -104,12 +106,8 @@ def geometry_line_without_multilines_ds(): shp_da = xr.DataArray(geoms, dims=("index",), name="geometry") cf_ds = ds.assign( - x=xr.DataArray( - [0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"} - ), - y=xr.DataArray( - [0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"} - ), + x=xr.DataArray([0, 1, 1, 1.0, 2.0, 1.7], dims=("node",), attrs={"axis": "X"}), + y=xr.DataArray([0, 0, 1, 1.0, 2.0, 9.5], dims=("node",), attrs={"axis": "Y"}), node_count=xr.DataArray([3, 3], dims=("segment",)), crd_x=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "x"}), crd_y=xr.DataArray([0.0, 1.0], dims=("index",), attrs={"nodes": "y"}), @@ -128,7 +126,6 @@ def geometry_line_without_multilines_ds(): return cf_ds, shp_da - @requires_shapely def test_shapely_to_cf(geometry_ds): from shapely.geometry import Point @@ -187,7 +184,9 @@ def test_shapely_to_cf_for_lines_as_sequence(geometry_line_ds): @requires_shapely -def test_shapely_to_cf_for_lines_without_multilines(geometry_line_without_multilines_ds): +def test_shapely_to_cf_for_lines_without_multilines( + geometry_line_without_multilines_ds, +): expected, in_da = geometry_line_without_multilines_ds actual = cfxr.shapely_to_cf(in_da) xr.testing.assert_identical(actual, expected) @@ -242,7 +241,9 @@ def test_cf_to_shapely_for_lines(geometry_line_ds): @requires_shapely -def test_cf_to_shapely_for_lines_without_multilines(geometry_line_without_multilines_ds): +def test_cf_to_shapely_for_lines_without_multilines( + geometry_line_without_multilines_ds, +): in_ds, expected = geometry_line_without_multilines_ds actual = cfxr.cf_to_shapely(in_ds) assert actual.dims == ("index",) @@ -251,7 +252,9 @@ def test_cf_to_shapely_for_lines_without_multilines(geometry_line_without_multil in_ds = in_ds.assign_coords(index=["b", "c"]) actual = cfxr.cf_to_shapely(in_ds) assert actual.dims == ("index",) - xr.testing.assert_identical(actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"])) + xr.testing.assert_identical( + actual.drop_vars(["crd_x", "crd_y"]), expected.assign_coords(index=["b", "c"]) + ) @requires_shapely