Skip to content

Commit 55dd87e

Browse files
pedro-cfjonhealy1
andauthored
Paginated search queries now don't return a token on the last page (#243)
**Related Issue(s):** - #242 **Merge dependencie(s):** - #241 **Description:** - Paginated search queries now don't return a token on the last page. - Made some fixes to the respective tests. In particular `test_pagination_token_idempotent` had and indentation issue - Improved `execute_search` to make use of `es_response["hits"]["total"]["value"]` **PR Checklist:** - [x] Code is formatted and linted (run `pre-commit run --all-files`) - [x] Tests pass (run `make test`) - [x] Documentation has been updated to reflect changes, if applicable - [x] Changes are added to the changelog --------- Co-authored-by: Jonathan Healy <[email protected]>
1 parent c5c96c9 commit 55dd87e

File tree

4 files changed

+56
-39
lines changed

4 files changed

+56
-39
lines changed

CHANGELOG.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
1515

1616
### Fixed
1717

18+
- Fixed issue where paginated search queries would return a `next_token` on the last page [#243](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/243)
1819
- Fixed issue where searches return an empty `links` array [#241](https://github.com/stac-utils/stac-fastapi-elasticsearch-opensearch/pull/241)
1920

2021
## [v2.4.0]

stac_fastapi/elasticsearch/stac_fastapi/elasticsearch/database_logic.py

Lines changed: 21 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@
88
import attr
99
from elasticsearch_dsl import Q, Search
1010

11+
import stac_fastapi.types.search
1112
from elasticsearch import exceptions, helpers # type: ignore
1213
from stac_fastapi.core.extensions import filter
1314
from stac_fastapi.core.serializers import CollectionSerializer, ItemSerializer
@@ -552,21 +553,26 @@ async def execute_search(
552553
NotFoundError: If the collections specified in `collection_ids` do not exist.
553554
"""
554555
search_after = None
556+
555557
if token:
556558
search_after = urlsafe_b64decode(token.encode()).decode().split(",")
557559

558560
query = search.query.to_dict() if search.query else None
559561

560562
index_param = indices(collection_ids)
561563

564+
max_result_window = stac_fastapi.types.search.Limit.le
565+
566+
size_limit = min(limit + 1, max_result_window)
567+
562568
search_task = asyncio.create_task(
563569
self.client.search(
564570
index=index_param,
565571
ignore_unavailable=ignore_unavailable,
566572
query=query,
567573
sort=sort or DEFAULT_SORT,
568574
search_after=search_after,
569-
size=limit,
575+
size=size_limit,
570576
)
571577
)
572578

@@ -584,24 +590,27 @@ async def execute_search(
584590
raise NotFoundError(f"Collections '{collection_ids}' do not exist")
585591

586592
hits = es_response["hits"]["hits"]
587-
items = (hit["_source"] for hit in hits)
593+
items = (hit["_source"] for hit in hits[:limit])
588594

589595
next_token = None
590-
if hits and (sort_array := hits[-1].get("sort")):
591-
next_token = urlsafe_b64encode(
592-
",".join([str(x) for x in sort_array]).encode()
593-
).decode()
594-
595-
# (1) count should not block returning results, so don't wait for it to be done
596-
# (2) don't cancel the task so that it will populate the ES cache for subsequent counts
597-
maybe_count = None
596+
if len(hits) > limit and limit < max_result_window:
597+
if hits and (sort_array := hits[limit - 1].get("sort")):
598+
next_token = urlsafe_b64encode(
599+
",".join([str(x) for x in sort_array]).encode()
600+
).decode()
601+
602+
matched = (
603+
es_response["hits"]["total"]["value"]
604+
if es_response["hits"]["total"]["relation"] == "eq"
605+
else None
606+
)
598607
if count_task.done():
599608
try:
600-
maybe_count = count_task.result().get("count")
609+
matched = count_task.result().get("count")
601610
except Exception as e:
602611
logger.error(f"Count task failed: {e}")
603612

604-
return items, maybe_count, next_token
613+
return items, matched, next_token
605614

606615
""" TRANSACTION LOGIC """
607616

stac_fastapi/opensearch/stac_fastapi/opensearch/database_logic.py

Lines changed: 25 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
from opensearchpy.helpers.query import Q
1212
from opensearchpy.helpers.search import Search
1313

14+
import stac_fastapi.types.search
1415
from stac_fastapi.core import serializers
1516
from stac_fastapi.core.extensions import filter
1617
from stac_fastapi.core.utilities import bbox2polygon
@@ -582,19 +583,28 @@ async def execute_search(
582583
query = search.query.to_dict() if search.query else None
583584
if query:
584585
search_body["query"] = query
586+
587+
search_after = None
588+
585589
if token:
586590
search_after = urlsafe_b64decode(token.encode()).decode().split(",")
591+
if search_after:
587592
search_body["search_after"] = search_after
593+
588594
search_body["sort"] = sort if sort else DEFAULT_SORT
589595

590596
index_param = indices(collection_ids)
591597

598+
max_result_window = stac_fastapi.types.search.Limit.le
599+
600+
size_limit = min(limit + 1, max_result_window)
601+
592602
search_task = asyncio.create_task(
593603
self.client.search(
594604
index=index_param,
595605
ignore_unavailable=ignore_unavailable,
596606
body=search_body,
597-
size=limit,
607+
size=size_limit,
598608
)
599609
)
600610

@@ -612,24 +622,27 @@ async def execute_search(
612622
raise NotFoundError(f"Collections '{collection_ids}' do not exist")
613623

614624
hits = es_response["hits"]["hits"]
615-
items = (hit["_source"] for hit in hits)
625+
items = (hit["_source"] for hit in hits[:limit])
616626

617627
next_token = None
618-
if hits and (sort_array := hits[-1].get("sort")):
619-
next_token = urlsafe_b64encode(
620-
",".join([str(x) for x in sort_array]).encode()
621-
).decode()
622-
623-
# (1) count should not block returning results, so don't wait for it to be done
624-
# (2) don't cancel the task so that it will populate the ES cache for subsequent counts
625-
maybe_count = None
628+
if len(hits) > limit and limit < max_result_window:
629+
if hits and (sort_array := hits[limit - 1].get("sort")):
630+
next_token = urlsafe_b64encode(
631+
",".join([str(x) for x in sort_array]).encode()
632+
).decode()
633+
634+
matched = (
635+
es_response["hits"]["total"]["value"]
636+
if es_response["hits"]["total"]["relation"] == "eq"
637+
else None
638+
)
626639
if count_task.done():
627640
try:
628-
maybe_count = count_task.result().get("count")
641+
matched = count_task.result().get("count")
629642
except Exception as e:
630643
logger.error(f"Count task failed: {e}")
631644

632-
return items, maybe_count, next_token
645+
return items, matched, next_token
633646

634647
""" TRANSACTION LOGIC """
635648

stac_fastapi/tests/resources/test_item.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -492,12 +492,9 @@ async def test_item_search_temporal_window_timezone_get(app_client, ctx):
492492
"datetime": f"{datetime_to_str(item_date_before)}/{datetime_to_str(item_date_after)}",
493493
}
494494
resp = await app_client.get("/search", params=params)
495-
resp_json = resp.json()
496-
next_link = next(link for link in resp_json["links"] if link["rel"] == "next")[
497-
"href"
498-
]
499-
resp = await app_client.get(next_link)
500495
assert resp.status_code == 200
496+
resp_json = resp.json()
497+
assert resp_json["features"][0]["id"] == test_item["id"]
501498

502499

503500
@pytest.mark.asyncio
@@ -632,18 +629,17 @@ async def test_pagination_item_collection(app_client, ctx, txn_client):
632629
await create_item(txn_client, item=ctx.item)
633630
ids.append(ctx.item["id"])
634631

635-
# Paginate through all 6 items with a limit of 1 (expecting 7 requests)
632+
# Paginate through all 6 items with a limit of 1 (expecting 6 requests)
636633
page = await app_client.get(
637634
f"/collections/{ctx.item['collection']}/items", params={"limit": 1}
638635
)
639636

640637
item_ids = []
641-
idx = 0
642-
for idx in range(100):
638+
for idx in range(1, 100):
643639
page_data = page.json()
644640
next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"]))
645641
if not next_link:
646-
assert not page_data["features"]
642+
assert idx == 6
647643
break
648644

649645
assert len(page_data["features"]) == 1
@@ -672,10 +668,8 @@ async def test_pagination_post(app_client, ctx, txn_client):
672668
# Paginate through all 5 items with a limit of 1 (expecting 5 requests)
673669
request_body = {"ids": ids, "limit": 1}
674670
page = await app_client.post("/search", json=request_body)
675-
idx = 0
676671
item_ids = []
677-
for _ in range(100):
678-
idx += 1
672+
for idx in range(1, 100):
679673
page_data = page.json()
680674
next_link = list(filter(lambda link: link["rel"] == "next", page_data["links"]))
681675
if not next_link:
@@ -688,7 +682,7 @@ async def test_pagination_post(app_client, ctx, txn_client):
688682
page = await app_client.post("/search", json=request_body)
689683

690684
# Our limit is 1, so we expect len(ids) number of requests before we run out of pages
691-
assert idx == len(ids) + 1
685+
assert idx == len(ids)
692686

693687
# Confirm we have paginated through all items
694688
assert not set(item_ids) - set(ids)
@@ -702,8 +696,8 @@ async def test_pagination_token_idempotent(app_client, ctx, txn_client):
702696
# Ingest 5 items
703697
for _ in range(5):
704698
ctx.item["id"] = str(uuid.uuid4())
705-
await create_item(txn_client, ctx.item)
706-
ids.append(ctx.item["id"])
699+
await create_item(txn_client, ctx.item)
700+
ids.append(ctx.item["id"])
707701

708702
page = await app_client.get("/search", params={"ids": ",".join(ids), "limit": 3})
709703
page_data = page.json()

0 commit comments

Comments
 (0)