Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 19 additions & 19 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

111 changes: 56 additions & 55 deletions python/rustac/rustac.pyi
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
"""The power of Rust for the Python STAC ecosystem."""

from collections.abc import AsyncIterator
from pathlib import Path
from typing import Any, AsyncIterator, Literal, Optional, Tuple
from typing import Any, Literal

import arro3.core

Expand Down Expand Up @@ -45,18 +46,18 @@ class DuckdbClient:
self,
href: str,
*,
ids: Optional[str | list[str]] = None,
collections: Optional[str | list[str]] = None,
intersects: Optional[str | dict[str, Any]] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
bbox: Optional[list[float]] = None,
datetime: Optional[str] = None,
include: Optional[str | list[str]] = None,
exclude: Optional[str | list[str]] = None,
sortby: Optional[str | list[str | dict[str, str]]] = None,
filter: Optional[str | dict[str, Any]] = None,
query: Optional[dict[str, Any]] = None,
ids: str | list[str] | None = None,
collections: str | list[str] | None = None,
intersects: str | dict[str, Any] | None = None,
limit: int | None = None,
offset: int | None = None,
bbox: list[float] | None = None,
datetime: str | None = None,
include: str | list[str] | None = None,
exclude: str | list[str] | None = None,
sortby: str | list[str | dict[str, str]] | None = None,
filter: str | dict[str, Any] | None = None,
query: dict[str, Any] | None = None,
**kwargs: str,
) -> list[dict[str, Any]]:
"""Search a stac-geoparquet file with duckdb, returning a list of items.
Expand Down Expand Up @@ -94,18 +95,18 @@ class DuckdbClient:
self,
href: str,
*,
ids: Optional[str | list[str]] = None,
collections: Optional[str | list[str]] = None,
intersects: Optional[str | dict[str, Any]] = None,
limit: Optional[int] = None,
offset: Optional[int] = None,
bbox: Optional[list[float]] = None,
datetime: Optional[str] = None,
include: Optional[str | list[str]] = None,
exclude: Optional[str | list[str]] = None,
sortby: Optional[str | list[str | dict[str, str]]] = None,
filter: Optional[str | dict[str, Any]] = None,
query: Optional[dict[str, Any]] = None,
ids: str | list[str] | None = None,
collections: str | list[str] | None = None,
intersects: str | dict[str, Any] | None = None,
limit: int | None = None,
offset: int | None = None,
bbox: list[float] | None = None,
datetime: str | None = None,
include: str | list[str] | None = None,
exclude: str | list[str] | None = None,
sortby: str | list[str | dict[str, str]] | None = None,
filter: str | dict[str, Any] | None = None,
query: dict[str, Any] | None = None,
**kwargs: str,
) -> arro3.core.Table | None:
"""Search a stac-geoparquet file with duckdb, returning an arrow table
Expand Down Expand Up @@ -179,7 +180,7 @@ def collection_from_id_and_items(id: str, items: list[Item]) -> Collection:
A STAC collection
"""

def migrate(value: dict[str, Any], version: Optional[str] = None) -> dict[str, Any]:
def migrate(value: dict[str, Any], version: str | None = None) -> dict[str, Any]:
"""
Migrates a STAC dictionary to another version.

Expand Down Expand Up @@ -264,19 +265,19 @@ def to_arrow(
async def search(
href: str,
*,
intersects: Optional[str | dict[str, Any]] = None,
ids: Optional[str | list[str]] = None,
collections: Optional[str | list[str]] = None,
max_items: Optional[int] = None,
limit: Optional[int] = None,
bbox: Optional[list[float]] = None,
datetime: Optional[str] = None,
include: Optional[str | list[str]] = None,
exclude: Optional[str | list[str]] = None,
sortby: Optional[str | list[str | dict[str, str]]] = None,
filter: Optional[str | dict[str, Any]] = None,
query: Optional[dict[str, Any]] = None,
use_duckdb: Optional[bool] = None,
intersects: str | dict[str, Any] | None = None,
ids: str | list[str] | None = None,
collections: str | list[str] | None = None,
max_items: int | None = None,
limit: int | None = None,
bbox: list[float] | None = None,
datetime: str | None = None,
include: str | list[str] | None = None,
exclude: str | list[str] | None = None,
sortby: str | list[str | dict[str, str]] | None = None,
filter: str | dict[str, Any] | None = None,
query: dict[str, Any] | None = None,
use_duckdb: bool | None = None,
**kwargs: str,
) -> list[dict[str, Any]]:
"""
Expand Down Expand Up @@ -333,21 +334,21 @@ async def search_to(
outfile: str,
href: str,
*,
intersects: Optional[str | dict[str, Any]] = None,
ids: Optional[str | list[str]] = None,
collections: Optional[str | list[str]] = None,
max_items: Optional[int] = None,
limit: Optional[int] = None,
bbox: Optional[list[float]] = None,
datetime: Optional[str] = None,
include: Optional[str | list[str]] = None,
exclude: Optional[str | list[str]] = None,
sortby: Optional[str | list[str | dict[str, str]]] = None,
filter: Optional[str | dict[str, Any]] = None,
query: Optional[dict[str, Any]] = None,
format: Optional[str] = None,
options: Optional[list[Tuple[str, str]]] = None,
use_duckdb: Optional[bool] = None,
intersects: str | dict[str, Any] | None = None,
ids: str | list[str] | None = None,
collections: str | list[str] | None = None,
max_items: int | None = None,
limit: int | None = None,
bbox: list[float] | None = None,
datetime: str | None = None,
include: str | list[str] | None = None,
exclude: str | list[str] | None = None,
sortby: str | list[str | dict[str, str]] | None = None,
filter: str | dict[str, Any] | None = None,
query: dict[str, Any] | None = None,
format: str | None = None,
store: ObjectStore | None = None,
use_duckdb: bool | None = None,
) -> int:
"""
Searches a STAC API server and saves the result to an output file.
Expand Down Expand Up @@ -385,7 +386,7 @@ async def search_to(
It is recommended to use filter instead, if possible.
format: The output format. If none, will be inferred from
the outfile extension, and if that fails will fall back to compact JSON.
options: Configuration values to pass to the object store backend.
store: An optional [ObjectStore][]
use_duckdb: Query with DuckDB. If None and the href has a
'parquet' or 'geoparquet' extension, will be set to True. Defaults
to None.
Expand Down
45 changes: 27 additions & 18 deletions src/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ use crate::{Error, Json, Result};
use geojson::Geometry;
use pyo3::prelude::*;
use pyo3::{Bound, FromPyObject, PyErr, PyResult, exceptions::PyValueError, types::PyDict};
use pyo3_object_store::AnyObjectStore;
use stac::Bbox;
use stac::Format;
use stac_api::{Fields, Filter, Items, Search, Sortby};
Expand Down Expand Up @@ -57,7 +58,7 @@ pub fn search<'py>(
}

#[pyfunction]
#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, options=None, use_duckdb=None, **kwargs))]
#[pyo3(signature = (outfile, href, *, intersects=None, ids=None, collections=None, max_items=None, limit=None, bbox=None, datetime=None, include=None, exclude=None, sortby=None, filter=None, query=None, format=None, store=None, use_duckdb=None, **kwargs))]
#[allow(clippy::too_many_arguments)]
pub fn search_to<'py>(
py: Python<'py>,
Expand All @@ -76,7 +77,7 @@ pub fn search_to<'py>(
filter: Option<StringOrDict>,
query: Option<Bound<'py, PyDict>>,
format: Option<String>,
options: Option<Vec<(String, String)>>,
store: Option<AnyObjectStore>,
use_duckdb: Option<bool>,
kwargs: Option<Bound<'_, PyDict>>,
) -> PyResult<Bound<'py, PyAny>> {
Expand Down Expand Up @@ -106,28 +107,36 @@ pub fn search_to<'py>(
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let value = search_duckdb(href, search, max_items)?;
let count = value.items.len();
let _ = format
.put_opts(
outfile,
serde_json::to_value(value).map_err(Error::from)?,
options.unwrap_or_default(),
)
.await
.map_err(Error::from)?;
let value = serde_json::to_value(value).map_err(Error::from)?;
if let Some(store) = store {
format
.put_store(store.into_dyn(), outfile, value)
.await
.map_err(Error::from)?;
} else {
format
.put_opts(outfile, value, [] as [(&str, &str); 0])
.await
.map_err(Error::from)?;
}
Ok(count)
})
} else {
pyo3_async_runtimes::tokio::future_into_py(py, async move {
let value = search_api(href, search, max_items).await?;
let count = value.items.len();
let _ = format
.put_opts(
outfile,
serde_json::to_value(value).map_err(Error::from)?,
options.unwrap_or_default(),
)
.await
.map_err(Error::from)?;
let value = serde_json::to_value(value).map_err(Error::from)?;
if let Some(store) = store {
format
.put_store(store.into_dyn(), outfile, value)
.await
.map_err(Error::from)?;
} else {
format
.put_opts(outfile, value, [] as [(&str, &str); 0])
.await
.map_err(Error::from)?;
}
Ok(count)
})
}
Expand Down
11 changes: 11 additions & 0 deletions tests/test_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import pyarrow.parquet
import rustac
import stac_geoparquet.arrow
from rustac.store import MemoryStore


async def test_search() -> None:
Expand Down Expand Up @@ -64,3 +65,13 @@ async def test_sortby_list_of_dict() -> None:

async def test_proj_geometry(maxar_items: list[dict[str, Any]], tmp_path: Path) -> None:
await rustac.write(str(tmp_path / "out.parquet"), maxar_items)


async def test_search_to_store(data: Path) -> None:
store = MemoryStore()
count = await rustac.search_to(
"items.json", str(data / "100-sentinel-2-items.parquet"), store=store
)
assert count == 100
item_collection = await rustac.read("items.json", store=store)
assert len(item_collection["features"]) == 100
Loading