diff --git a/bigframes/bigquery/__init__.py b/bigframes/bigquery/__init__.py index a35e898c2d..c04350275d 100644 --- a/bigframes/bigquery/__init__.py +++ b/bigframes/bigquery/__init__.py @@ -27,7 +27,7 @@ unix_millis, unix_seconds, ) -from bigframes.bigquery._operations.geo import st_area, st_difference +from bigframes.bigquery._operations.geo import st_area, st_difference, st_intersection from bigframes.bigquery._operations.json import ( json_extract, json_extract_array, @@ -49,6 +49,7 @@ # geo ops "st_area", "st_difference", + "st_intersection", # json ops "json_set", "json_extract", diff --git a/bigframes/bigquery/_operations/geo.py b/bigframes/bigquery/_operations/geo.py index 6501c84b6f..f2d8b7b577 100644 --- a/bigframes/bigquery/_operations/geo.py +++ b/bigframes/bigquery/_operations/geo.py @@ -211,3 +211,95 @@ def st_difference( in other. """ return series._apply_binary_op(other, ops.geo_st_difference_op) + + +def st_intersection( + series: bigframes.series.Series, other: bigframes.series.Series +) -> bigframes.series.Series: + """ + Returns a `GEOGRAPHY` that represents the point set intersection of the two + input `GEOGRAPHYs`. Thus, every point in the intersection appears in both + `geography_1` and `geography_2`. + + .. note:: + BigQuery's Geography functions, like `st_intersection`, interpret the geometry + data type as a point set on the Earth's surface. A point set is a set + of points, lines, and polygons on the WGS84 reference spheroid, with + geodesic edges. See: https://cloud.google.com/bigquery/docs/geospatial-data + + **Examples:** + + >>> import bigframes as bpd + >>> import bigframes.bigquery as bbq + >>> import bigframes.geopandas + >>> from shapely.geometry import Polygon, LineString, Point + >>> bpd.options.display.progress_bar = None + + We can check two GeoSeries against each other, row by row. + + >>> s1 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... LineString([(0, 0), (2, 2)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(0, 1), + ... ], + ... ) + >>> s2 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... LineString([(1, 0), (1, 3)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(1, 1), + ... Point(0, 1), + ... ], + ... index=range(1, 6), + ... ) + + >>> s1 + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 2 LINESTRING (0 0, 2 2) + 3 LINESTRING (2 0, 0 2) + 4 POINT (0 1) + dtype: geometry + + >>> s2 + 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 2 LINESTRING (1 0, 1 3) + 3 LINESTRING (2 0, 0 2) + 4 POINT (1 1) + 5 POINT (0 1) + dtype: geometry + + >>> bbq.st_intersection(s1, s2) + 0 None + 1 POLYGON ((0 0, 0.99954 1, 0 1, 0 0)) + 2 POINT (1 1.00046) + 3 LINESTRING (2 0, 0 2) + 4 GEOMETRYCOLLECTION EMPTY + 5 None + dtype: geometry + + We can also do intersection of each geometry and a single shapely geometry: + + >>> bbq.st_intersection(s1, bigframes.geopandas.GeoSeries([Polygon([(0, 0), (1, 1), (0, 1)])])) + 0 POLYGON ((0 0, 0.99954 1, 0 1, 0 0)) + 1 None + 2 None + 3 None + 4 None + dtype: geometry + + Args: + other (GeoSeries or geometric object): + The Geoseries (elementwise) or geometric object to find the + intersection with. + + Returns: + bigframes.geopandas.GeoSeries: + The Geoseries (elementwise) of the intersection of points in + each aligned geometry with other. + """ + return series._apply_binary_op(other, ops.geo_st_intersection_op) diff --git a/bigframes/core/compile/scalar_op_compiler.py b/bigframes/core/compile/scalar_op_compiler.py index ce0cd6c37a..0296762447 100644 --- a/bigframes/core/compile/scalar_op_compiler.py +++ b/bigframes/core/compile/scalar_op_compiler.py @@ -1037,6 +1037,13 @@ def geo_st_geogpoint_op_impl(x: ibis_types.Value, y: ibis_types.Value): ) +@scalar_op_compiler.register_binary_op(ops.geo_st_intersection_op, pass_op=False) +def geo_st_intersection_op_impl(x: ibis_types.Value, y: ibis_types.Value): + return typing.cast(ibis_types.GeoSpatialValue, x).intersection( + typing.cast(ibis_types.GeoSpatialValue, y) + ) + + @scalar_op_compiler.register_unary_op(ops.geo_x_op) def geo_x_op_impl(x: ibis_types.Value): return typing.cast(ibis_types.GeoSpatialValue, x).x() diff --git a/bigframes/geopandas/geoseries.py b/bigframes/geopandas/geoseries.py index 6c9cb77a08..c93a02deb8 100644 --- a/bigframes/geopandas/geoseries.py +++ b/bigframes/geopandas/geoseries.py @@ -96,3 +96,6 @@ def to_wkt(self: GeoSeries) -> bigframes.series.Series: def difference(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore return self._apply_binary_op(other, ops.geo_st_difference_op) + + def intersection(self: GeoSeries, other: GeoSeries) -> bigframes.series.Series: # type: ignore + return self._apply_binary_op(other, ops.geo_st_intersection_op) diff --git a/bigframes/operations/__init__.py b/bigframes/operations/__init__.py index 2b4c9ca892..3e0ebd5089 100644 --- a/bigframes/operations/__init__.py +++ b/bigframes/operations/__init__.py @@ -93,6 +93,7 @@ geo_st_difference_op, geo_st_geogfromtext_op, geo_st_geogpoint_op, + geo_st_intersection_op, geo_x_op, geo_y_op, ) @@ -371,6 +372,7 @@ "geo_st_astext_op", "geo_st_geogfromtext_op", "geo_st_geogpoint_op", + "geo_st_intersection_op", "geo_x_op", "geo_y_op", # Numpy ops mapping diff --git a/bigframes/operations/geo_ops.py b/bigframes/operations/geo_ops.py index 3cf248bddb..6f988c2585 100644 --- a/bigframes/operations/geo_ops.py +++ b/bigframes/operations/geo_ops.py @@ -65,3 +65,7 @@ dtypes.is_geo_like, dtypes.FLOAT_DTYPE, description="geo-like" ), ) + +geo_st_intersection_op = base_ops.create_binary_op( + name="geo_st_intersection", type_signature=op_typing.BinaryGeo() +) diff --git a/notebooks/geo/geoseries.ipynb b/notebooks/geo/geoseries.ipynb index e7566f3fa6..953fc8f45f 100644 --- a/notebooks/geo/geoseries.ipynb +++ b/notebooks/geo/geoseries.ipynb @@ -56,10 +56,10 @@ "name": "stderr", "output_type": "stream", "text": [ - "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: \u001b[93mTable 'bigquery-public-data.geo_us_boundaries.counties' is clustered\n", + "/usr/local/google/home/arwas/src1/python-bigquery-dataframes/bigframes/session/_io/bigquery/read_gbq_table.py:280: DefaultIndexWarning: Table 'bigquery-public-data.geo_us_boundaries.counties' is clustered\n", "and/or partitioned, but BigQuery DataFrames was not able to find a\n", "suitable index. To avoid this warning, set at least one of:\n", - "`index_col` or `filters`.\u001b[0m\n", + "`index_col` or `filters`.\n", " warnings.warn(msg, category=bfe.DefaultIndexWarning)\n" ] } @@ -106,11 +106,11 @@ { "data": { "text/plain": [ - "217 POINT (-86.80185 38.70532)\n", - "16 POINT (-83.47042 30.44723)\n", - "40 POINT (-94.33925 38.25722)\n", - "139 POINT (-78.88532 38.50758)\n", - "400 POINT (-95.6191 41.0337)\n", + "18 POINT (-83.91172 42.60253)\n", + "86 POINT (-90.13369 43.00102)\n", + "177 POINT (-117.23219 48.54382)\n", + "208 POINT (-84.50352 36.43523)\n", + "300 POINT (-91.85079 43.29299)\n", "Name: int_point_geom, dtype: geometry" ] }, @@ -139,11 +139,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 POINT (-83.47042 30.44723)\n", - "2 POINT (-94.33925 38.25722)\n", - "3 POINT (-78.88532 38.50758)\n", - "4 POINT (-95.6191 41.0337)\n", + "0 POINT (-83.91172 42.60253)\n", + "1 POINT (-90.13369 43.00102)\n", + "2 POINT (-117.23219 48.54382)\n", + "3 POINT (-84.50352 36.43523)\n", + "4 POINT (-91.85079 43.29299)\n", "dtype: geometry" ] }, @@ -188,11 +188,11 @@ { "data": { "text/plain": [ - "0 -86.801847\n", - "1 -83.470416\n", - "2 -94.339246\n", - "3 -78.885321\n", - "4 -95.619101\n", + "0 -83.911718\n", + "1 -90.133691\n", + "2 -117.232191\n", + "3 -84.50352\n", + "4 -91.850788\n", "dtype: Float64" ] }, @@ -220,11 +220,11 @@ { "data": { "text/plain": [ - "0 38.705322\n", - "1 30.447232\n", - "2 38.257217\n", - "3 38.507585\n", - "4 41.033703\n", + "0 42.602532\n", + "1 43.001021\n", + "2 48.543825\n", + "3 36.435234\n", + "4 43.292989\n", "dtype: Float64" ] }, @@ -370,11 +370,11 @@ { "data": { "text/plain": [ - "214 POLYGON ((-79.36704 34.96248, -79.36696 34.962...\n", - "161 POLYGON ((-89.08844 33.53252, -89.08843 33.532...\n", - "57 POLYGON ((-110.75069 35.50001, -110.75069 35.4...\n", - "46 POLYGON ((-94.6865 39.04405, -94.68764 39.0440...\n", - "260 POLYGON ((-100.53965 34.99391, -100.53966 34.9...\n", + "304 POLYGON ((-88.69875 38.56219, -88.69876 38.562...\n", + "288 POLYGON ((-100.55792 46.24588, -100.5579 46.24...\n", + "42 POLYGON ((-98.09779 30.49744, -98.0978 30.4971...\n", + "775 POLYGON ((-90.33573 41.67043, -90.33592 41.669...\n", + "83 POLYGON ((-85.98402 35.6552, -85.98402 35.6551...\n", "Name: county_geom, dtype: geometry" ] }, @@ -403,11 +403,11 @@ { "data": { "text/plain": [ - "0 POLYGON ((-79.36704 34.96248, -79.36696 34.962...\n", - "1 POLYGON ((-89.08844 33.53252, -89.08843 33.532...\n", - "2 POLYGON ((-110.75069 35.50001, -110.75069 35.4...\n", - "3 POLYGON ((-94.6865 39.04405, -94.68764 39.0440...\n", - "4 POLYGON ((-100.53965 34.99391, -100.53966 34.9...\n", + "0 POLYGON ((-88.69875 38.56219, -88.69876 38.562...\n", + "1 POLYGON ((-100.55792 46.24588, -100.5579 46.24...\n", + "2 POLYGON ((-98.09779 30.49744, -98.0978 30.4971...\n", + "3 POLYGON ((-90.33573 41.67043, -90.33592 41.669...\n", + "4 POLYGON ((-85.98402 35.6552, -85.98402 35.6551...\n", "dtype: geometry" ] }, @@ -445,14 +445,14 @@ "outputs": [ { "ename": "NotImplementedError", - "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0.", + "evalue": "GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.41.0.", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNotImplementedError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[13], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m \u001b[43mfive_geom\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43marea\u001b[49m\n", "File \u001b[0;32m~/src1/python-bigquery-dataframes/bigframes/geopandas/geoseries.py:67\u001b[0m, in \u001b[0;36mGeoSeries.area\u001b[0;34m(self, crs)\u001b[0m\n\u001b[1;32m 48\u001b[0m \u001b[38;5;129m@property\u001b[39m\n\u001b[1;32m 49\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m\u001b[38;5;250m \u001b[39m\u001b[38;5;21marea\u001b[39m(\u001b[38;5;28mself\u001b[39m, crs\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mNone\u001b[39;00m) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m bigframes\u001b[38;5;241m.\u001b[39mseries\u001b[38;5;241m.\u001b[39mSeries: \u001b[38;5;66;03m# type: ignore\u001b[39;00m\n\u001b[1;32m 50\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"Returns a Series containing the area of each geometry in the GeoSeries\u001b[39;00m\n\u001b[1;32m 51\u001b[0m \u001b[38;5;124;03m expressed in the units of the CRS.\u001b[39;00m\n\u001b[1;32m 52\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 65\u001b[0m \u001b[38;5;124;03m GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead.\u001b[39;00m\n\u001b[1;32m 66\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m---> 67\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mNotImplementedError\u001b[39;00m(\n\u001b[1;32m 68\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mGeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mconstants\u001b[38;5;241m.\u001b[39mFEEDBACK_LINK\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 69\u001b[0m )\n", - "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.40.0." + "\u001b[0;31mNotImplementedError\u001b[0m: GeoSeries.area is not supported. Use bigframes.bigquery.st_area(series), instead. Share your usecase with the BigQuery DataFrames team at the https://bit.ly/bigframes-feedback survey. You are currently running BigFrames version 1.41.0." ] } ], @@ -484,11 +484,11 @@ { "data": { "text/plain": [ - "0 1014426111.476457\n", - "1 1196896004.730286\n", - "2 25794235993.165642\n", - "3 1242002056.351685\n", - "4 2381217221.963739\n", + "0 1851741847.416806\n", + "1 4018075889.856168\n", + "2 2652483302.084653\n", + "3 1167209931.07698\n", + "4 1124055521.2818\n", "dtype: Float64" ] }, @@ -524,11 +524,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 POINT (-83.47042 30.44723)\n", - "2 POINT (-94.33925 38.25722)\n", - "3 POINT (-78.88532 38.50758)\n", - "4 POINT (-95.6191 41.0337)\n", + "0 POINT (-83.91172 42.60253)\n", + "1 POINT (-90.13369 43.00102)\n", + "2 POINT (-117.23219 48.54382)\n", + "3 POINT (-84.50352 36.43523)\n", + "4 POINT (-91.85079 43.29299)\n", "dtype: geometry" ] }, @@ -563,11 +563,11 @@ { "data": { "text/plain": [ - "0 POINT(-86.8018468 38.705322)\n", - "1 POINT(-83.4704159 30.4472325)\n", - "2 POINT(-94.3392459 38.2572171)\n", - "3 POINT(-78.8853213 38.5075848)\n", - "4 POINT(-95.619101 41.0337028)\n", + "0 POINT(-83.9117183 42.6025316)\n", + "1 POINT(-90.1336915 43.0010208)\n", + "2 POINT(-117.2321913 48.5438247)\n", + "3 POINT(-84.50352 36.435234)\n", + "4 POINT(-91.850788 43.2929889)\n", "dtype: string" ] }, @@ -603,11 +603,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 POINT (-83.47042 30.44723)\n", - "2 POINT (-94.33925 38.25722)\n", - "3 POINT (-78.88532 38.50758)\n", - "4 POINT (-95.6191 41.0337)\n", + "0 POINT (-83.91172 42.60253)\n", + "1 POINT (-90.13369 43.00102)\n", + "2 POINT (-117.23219 48.54382)\n", + "3 POINT (-84.50352 36.43523)\n", + "4 POINT (-91.85079 43.29299)\n", "dtype: geometry" ] }, @@ -699,7 +699,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "#### Reuse `wkts_from_geo` and `geom_obj` to find the difference between the geometry objects" + "#### Reuse `five_geom` and `geom_obj` to find the difference between the geometry objects" ] }, { @@ -714,11 +714,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 POINT (-83.47042 30.44723)\n", - "2 GEOMETRYCOLLECTION EMPTY\n", - "3 POINT (-78.88532 38.50758)\n", - "4 POINT (-95.6191 41.0337)\n", + "0 POLYGON ((-88.69875 38.56219, -88.69876 38.562...\n", + "1 POLYGON ((-100.55792 46.24588, -100.5579 46.24...\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 POLYGON ((-90.33573 41.67043, -90.33592 41.669...\n", + "4 POLYGON ((-85.98402 35.6552, -85.98402 35.6551...\n", "dtype: geometry" ] }, @@ -728,7 +728,7 @@ } ], "source": [ - "wkts_from_geo.difference(geom_obj)" + "five_geom.difference(geom_obj)" ] }, { @@ -746,11 +746,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 None\n", - "2 None\n", - "3 None\n", - "4 None\n", + "0 POLYGON ((-88.69875 38.56219, -88.69876 38.562...\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", "dtype: geometry" ] }, @@ -760,7 +760,7 @@ } ], "source": [ - "wkts_from_geo.difference([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" + "five_geom.difference([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" ] }, { @@ -792,7 +792,7 @@ } ], "source": [ - "geom_obj.difference(geom_obj)" + "five_geom.difference(five_geom)" ] }, { @@ -810,11 +810,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 POINT (-83.47042 30.44723)\n", - "2 GEOMETRYCOLLECTION EMPTY\n", - "3 POINT (-78.88532 38.50758)\n", - "4 POINT (-95.6191 41.0337)\n", + "0 POLYGON ((-88.69875 38.56219, -88.69876 38.562...\n", + "1 POLYGON ((-100.55792 46.24588, -100.5579 46.24...\n", + "2 GEOMETRYCOLLECTION EMPTY\n", + "3 POLYGON ((-90.33573 41.67043, -90.33592 41.669...\n", + "4 POLYGON ((-85.98402 35.6552, -85.98402 35.6551...\n", "dtype: geometry" ] }, @@ -824,7 +824,7 @@ } ], "source": [ - "bbq.st_difference(wkts_from_geo, geom_obj)" + "bbq.st_difference(five_geom, geom_obj)" ] }, { @@ -842,11 +842,11 @@ { "data": { "text/plain": [ - "0 POINT (-86.80185 38.70532)\n", - "1 None\n", - "2 None\n", - "3 None\n", - "4 None\n", + "0 POLYGON ((-88.69875 38.56219, -88.69876 38.562...\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", "dtype: geometry" ] }, @@ -856,7 +856,7 @@ } ], "source": [ - "bbq.st_difference(wkts_from_geo, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" + "bbq.st_difference(five_geom, [Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" ] }, { @@ -890,6 +890,141 @@ "source": [ "bbq.st_difference(geom_obj, geom_obj)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Use `GeoSeries.intersection()` to find the intersecting points in two geometry shapes " + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "#### Reuse `wkts_from_geo` and `geom_obj`" + ] + }, + { + "cell_type": "code", + "execution_count": 27, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 GEOMETRYCOLLECTION EMPTY\n", + "1 GEOMETRYCOLLECTION EMPTY\n", + "2 POLYGON ((-98.09779 30.49744, -98.0978 30.4971...\n", + "3 GEOMETRYCOLLECTION EMPTY\n", + "4 GEOMETRYCOLLECTION EMPTY\n", + "dtype: geometry" + ] + }, + "execution_count": 27, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "five_geom.intersection(geom_obj)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the difference between a `GeoSeries` and a single geometry shape." + ] + }, + { + "cell_type": "code", + "execution_count": 28, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 GEOMETRYCOLLECTION EMPTY\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", + "dtype: geometry" + ] + }, + "execution_count": 28, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "five_geom.intersection([Polygon([(0, 0), (10, 0), (10, 10), (0, 0)])])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## You can also use`BigQuery.st_intersection()` to find the intersecting points between two `GeoSeries`. See, https://cloud.google.com/bigquery/docs/reference/standard-sql/geography_functions#st_intersection" + ] + }, + { + "cell_type": "code", + "execution_count": 29, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 GEOMETRYCOLLECTION EMPTY\n", + "1 GEOMETRYCOLLECTION EMPTY\n", + "2 POLYGON ((-98.09779 30.49744, -98.0978 30.4971...\n", + "3 GEOMETRYCOLLECTION EMPTY\n", + "4 GEOMETRYCOLLECTION EMPTY\n", + "dtype: geometry" + ] + }, + "execution_count": 29, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_intersection(five_geom, geom_obj)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Find the difference between a `GeoSeries` and a single geometry shape." + ] + }, + { + "cell_type": "code", + "execution_count": 30, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0 GEOMETRYCOLLECTION EMPTY\n", + "1 None\n", + "2 None\n", + "3 None\n", + "4 None\n", + "dtype: geometry" + ] + }, + "execution_count": 30, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "bbq.st_intersection(five_geom, [Polygon([(0, 0), (1, 0), (10, 10), (0, 0)])])" + ] } ], "metadata": { diff --git a/tests/system/small/bigquery/test_geo.py b/tests/system/small/bigquery/test_geo.py index 538099e80a..c842f1c99d 100644 --- a/tests/system/small/bigquery/test_geo.py +++ b/tests/system/small/bigquery/test_geo.py @@ -145,3 +145,96 @@ def test_geo_st_difference_with_similar_geometry_objects(): assert expected.iloc[0].equals(geobf_s_result.iloc[0]) assert expected.iloc[1].equals(geobf_s_result.iloc[1]) assert expected.iloc[2].equals(geobf_s_result.iloc[2]) + + +def test_geo_st_intersection_with_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + Point(0, 1), + ] + + data2 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + LineString([(2, 0), (0, 2)]), + ] + + geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) + geobf_s2 = bigframes.geopandas.GeoSeries(data=data2) + geobf_s_result = bbq.st_intersection(geobf_s1, geobf_s2).to_pandas() + + expected = bigframes.series.Series( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + GeometryCollection([]), + ], + index=[0, 1, 2], + dtype=geopandas.array.GeometryDtype(), + ).to_pandas() + + assert geobf_s_result.dtype == "geometry" + assert expected.iloc[0].equals(geobf_s_result.iloc[0]) + assert expected.iloc[1].equals(geobf_s_result.iloc[1]) + assert expected.iloc[2].equals(geobf_s_result.iloc[2]) + + +def test_geo_st_intersection_with_single_geometry_object(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), + Point(0, 1), + ] + + geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) + geobf_s_result = bbq.st_intersection( + geobf_s1, + bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(1, 0), (0, 5), (0, 0), (1, 0)]), + ] + ), + ).to_pandas() + + expected = bigframes.series.Series( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + GeometryCollection([]), + None, + ], + index=[0, 1, 2], + dtype=geopandas.array.GeometryDtype(), + ).to_pandas() + + assert geobf_s_result.dtype == "geometry" + assert (expected.iloc[0]).equals(geobf_s_result.iloc[0]) + assert expected.iloc[1] == geobf_s_result.iloc[1] + assert expected.iloc[2] == geobf_s_result.iloc[2] + + +def test_geo_st_intersection_with_similar_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + + geobf_s1 = bigframes.geopandas.GeoSeries(data=data1) + geobf_s_result = bbq.st_intersection(geobf_s1, geobf_s1).to_pandas() + + expected = bigframes.series.Series( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ], + index=[0, 1, 2], + dtype=geopandas.array.GeometryDtype(), + ).to_pandas() + + assert geobf_s_result.dtype == "geometry" + assert expected.iloc[0].equals(geobf_s_result.iloc[0]) + assert expected.iloc[1].equals(geobf_s_result.iloc[1]) + assert expected.iloc[2].equals(geobf_s_result.iloc[2]) diff --git a/tests/system/small/geopandas/test_geoseries.py b/tests/system/small/geopandas/test_geoseries.py index fb101dea89..b075817b07 100644 --- a/tests/system/small/geopandas/test_geoseries.py +++ b/tests/system/small/geopandas/test_geoseries.py @@ -306,3 +306,96 @@ def test_geo_drop_duplicates(): pd.testing.assert_series_equal( geopandas.GeoSeries(bf_result), pd_result, check_index=False ) + + +# the GeoSeries and GeoPandas results are not always the same. +# For example, when the intersection between two polygons is empty, +# GeoPandas returns 'POLYGON EMPTY' while GeoSeries returns 'GeometryCollection([])'. +# This is why we are hard-coding the expected results. +def test_geo_intersection_with_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + Point(0, 1), + ] + + data2 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + LineString([(2, 0), (0, 2)]), + ] + + bf_s1 = bigframes.geopandas.GeoSeries(data=data1) + bf_s2 = bigframes.geopandas.GeoSeries(data=data2) + + bf_result = bf_s1.intersection(bf_s2).to_pandas() + + expected = bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1), (0, 0)]), + GeometryCollection([]), + ], + ).to_pandas() + + assert bf_result.dtype == "geometry" + assert expected.iloc[0].equals(bf_result.iloc[0]) + assert expected.iloc[1].equals(bf_result.iloc[1]) + assert expected.iloc[2].equals(bf_result.iloc[2]) + + +def test_geo_intersection_with_single_geometry_object(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(4, 2), (6, 2), (8, 6), (4, 2)]), + Point(0, 1), + ] + + bf_s1 = bigframes.geopandas.GeoSeries(data=data1) + bf_result = bf_s1.intersection( + bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(1, 0), (0, 5), (0, 0), (1, 0)]), + ] + ), + ).to_pandas() + + expected = bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + GeometryCollection([]), + None, + ], + index=[0, 1, 2], + ).to_pandas() + + assert bf_result.dtype == "geometry" + assert (expected.iloc[0]).equals(bf_result.iloc[0]) + assert expected.iloc[1] == bf_result.iloc[1] + assert expected.iloc[2] == bf_result.iloc[2] + + +def test_geo_intersection_with_similar_geometry_objects(): + data1 = [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ] + + bf_s1 = bigframes.geopandas.GeoSeries(data=data1) + bf_result = bf_s1.intersection(bf_s1).to_pandas() + + expected = bigframes.geopandas.GeoSeries( + [ + Polygon([(0, 0), (10, 0), (10, 10), (0, 0)]), + Polygon([(0, 0), (1, 1), (0, 1)]), + Point(0, 1), + ], + index=[0, 1, 2], + ).to_pandas() + + assert bf_result.dtype == "geometry" + assert expected.iloc[0].equals(bf_result.iloc[0]) + assert expected.iloc[1].equals(bf_result.iloc[1]) + assert expected.iloc[2].equals(bf_result.iloc[2]) diff --git a/third_party/bigframes_vendored/geopandas/geoseries.py b/third_party/bigframes_vendored/geopandas/geoseries.py index 0d6b74671e..e75bdf81e0 100644 --- a/third_party/bigframes_vendored/geopandas/geoseries.py +++ b/third_party/bigframes_vendored/geopandas/geoseries.py @@ -347,3 +347,88 @@ def difference(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore in other. """ raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE) + + def intersection(self: GeoSeries, other: GeoSeries) -> GeoSeries: # type: ignore + """ + Returns a GeoSeries of the intersection of points in each aligned + geometry with other. + + The operation works on a 1-to-1 row-wise manner. + + **Examples:** + + >>> import bigframes as bpd + >>> import bigframes.geopandas + >>> from shapely.geometry import Polygon, LineString, Point + >>> bpd.options.display.progress_bar = None + + We can check two GeoSeries against each other, row by row. + + >>> s1 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... Polygon([(0, 0), (2, 2), (0, 2)]), + ... LineString([(0, 0), (2, 2)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(0, 1), + ... ], + ... ) + >>> s2 = bigframes.geopandas.GeoSeries( + ... [ + ... Polygon([(0, 0), (1, 1), (0, 1)]), + ... LineString([(1, 0), (1, 3)]), + ... LineString([(2, 0), (0, 2)]), + ... Point(1, 1), + ... Point(0, 1), + ... ], + ... index=range(1, 6), + ... ) + + >>> s1 + 0 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 1 POLYGON ((0 0, 2 2, 0 2, 0 0)) + 2 LINESTRING (0 0, 2 2) + 3 LINESTRING (2 0, 0 2) + 4 POINT (0 1) + dtype: geometry + + >>> s2 + 1 POLYGON ((0 0, 1 1, 0 1, 0 0)) + 2 LINESTRING (1 0, 1 3) + 3 LINESTRING (2 0, 0 2) + 4 POINT (1 1) + 5 POINT (0 1) + dtype: geometry + + >>> s1.intersection(s2) + 0 None + 1 POLYGON ((0 0, 0.99954 1, 0 1, 0 0)) + 2 POINT (1 1.00046) + 3 LINESTRING (2 0, 0 2) + 4 GEOMETRYCOLLECTION EMPTY + 5 None + dtype: geometry + + + We can also do intersection of each geometry and a single shapely geometry: + + >>> s1.intersection(bigframes.geopandas.GeoSeries([Polygon([(0, 0), (1, 1), (0, 1)])])) + 0 POLYGON ((0 0, 0.99954 1, 0 1, 0 0)) + 1 None + 2 None + 3 None + 4 None + dtype: geometry + + + Args: + other (GeoSeries or geometric object): + The Geoseries (elementwise) or geometric object to find the + intersection with. + + Returns: + bigframes.geopandas.GeoSeries: + The Geoseries (elementwise) of the intersection of points in + each aligned geometry with other. + """ + raise NotImplementedError(constants.ABSTRACT_METHOD_ERROR_MESSAGE)