From 229c5ca0abb07acc06973d34aadf94186b415652 Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Fri, 16 May 2025 14:29:39 +0200 Subject: [PATCH 1/3] Update specification for Hugging Face rerank --- package.json | 2 +- specification/inference/_types/CommonTypes.ts | 1 + .../put_hugging_face/PutHuggingFaceRequest.ts | 11 ++++++++ .../PutHuggingFaceRequestExample2.yaml | 15 +++++++++++ .../request/RerankRequestExample2.yaml | 11 ++++++++ .../request/RerankRequestExample3.yaml | 11 ++++++++ .../response/RerankResponseExample2.yaml | 18 +++++++++++++ .../response/RerankResponseExample3.yaml | 25 +++++++++++++++++++ 8 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml create mode 100644 specification/inference/rerank/examples/request/RerankRequestExample2.yaml create mode 100644 specification/inference/rerank/examples/request/RerankRequestExample3.yaml create mode 100644 specification/inference/rerank/examples/response/RerankResponseExample2.yaml create mode 100644 specification/inference/rerank/examples/response/RerankResponseExample3.yaml diff --git a/package.json b/package.json index 65aafaa259..3284efbed5 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "dependencies": { - "@redocly/cli": "^1.34.1", + "@redocly/cli": "^1.34.3", "@stoplight/spectral-cli": "^6.14.2" } } diff --git a/specification/inference/_types/CommonTypes.ts b/specification/inference/_types/CommonTypes.ts index d99ff4b33c..ac5ff2bf2b 100644 --- a/specification/inference/_types/CommonTypes.ts +++ b/specification/inference/_types/CommonTypes.ts @@ -892,6 +892,7 @@ export class HuggingFaceServiceSettings { } export enum HuggingFaceTaskType { + rerank, text_embedding } diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 8229d3c32e..8fb6d2981a 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -44,6 +44,17 @@ import { Id } from '@_types/common' * * `e5-small-v2` * * `multilingual-e5-base` * * `multilingual-e5-small` + * + * For Elastic's `rerank` task: + * The selected model must support the `sentence-ranking` task and expose OpenAI API. + * HuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far. + * After the endpoint is initialized, copy the full endpoint URL for use. + * Must include task_settings. + * Tested models for `rerank` task: + * + * * `bge-reranker-base` + * * `jina-reranker-v1-turbo-en-GGUF` + * * @rest_spec_name inference.put_hugging_face * @availability stack since=8.12.0 stability=stable visibility=public * @availability serverless stability=stable visibility=public diff --git a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml new file mode 100644 index 0000000000..e34223fc5f --- /dev/null +++ b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml @@ -0,0 +1,15 @@ +summary: A text embedding task +description: Run `PUT _inference/rerank/hugging-face-rerank` to create an inference endpoint that performs a `rerank` task type. +# method_request: "PUT _inference/rerank/hugging-face-rerank" +# type: "request" +value: |- + { + "service": "hugging_face", + "service_settings": { + "api_key": "hugging-face-access-token", + "url": "url-endpoint" + }, + "task_settings": { + "return_text": true + } + } diff --git a/specification/inference/rerank/examples/request/RerankRequestExample2.yaml b/specification/inference/rerank/examples/request/RerankRequestExample2.yaml new file mode 100644 index 0000000000..4489ae9045 --- /dev/null +++ b/specification/inference/rerank/examples/request/RerankRequestExample2.yaml @@ -0,0 +1,11 @@ +summary: Rerank task +description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face +# method_request: "POST _inference/rerank/bge-reranker-base-mkn" +# type: "request" +value: |- + { + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character", + "return_documents": false, + "top_n": 2 + } diff --git a/specification/inference/rerank/examples/request/RerankRequestExample3.yaml b/specification/inference/rerank/examples/request/RerankRequestExample3.yaml new file mode 100644 index 0000000000..f7cca4324d --- /dev/null +++ b/specification/inference/rerank/examples/request/RerankRequestExample3.yaml @@ -0,0 +1,11 @@ +summary: Rerank task +description: Run `POST _inference/rerank/bge-reranker-base-mkn` to perform reranking on the example input via Hugging Face +# method_request: "POST _inference/rerank/bge-reranker-base-mkn" +# type: "request" +value: |- + { + "input": ["luke", "like", "leia", "chewy","r2d2", "star", "wars"], + "query": "star wars main character", + "return_documents": true, + "top_n": 3 + } diff --git a/specification/inference/rerank/examples/response/RerankResponseExample2.yaml b/specification/inference/rerank/examples/response/RerankResponseExample2.yaml new file mode 100644 index 0000000000..98af71f40a --- /dev/null +++ b/specification/inference/rerank/examples/response/RerankResponseExample2.yaml @@ -0,0 +1,18 @@ +summary: Rerank task +description: > + A successful response from `POST _inference/rerank/bge-reranker-base-mkn`. +# type: "response" +# response_code: +value: |- + { + "rerank": [ + { + "index": 6, + "relevance_score": 0.50955844 + }, + { + "index": 5, + "relevance_score": 0.084341794 + } + ] + } diff --git a/specification/inference/rerank/examples/response/RerankResponseExample3.yaml b/specification/inference/rerank/examples/response/RerankResponseExample3.yaml new file mode 100644 index 0000000000..a8790df956 --- /dev/null +++ b/specification/inference/rerank/examples/response/RerankResponseExample3.yaml @@ -0,0 +1,25 @@ +summary: Rerank task +description: > + A successful response from `POST _inference/rerank/bge-reranker-base-mkn`. +# type: "response" +# response_code: +value: |- + { + "rerank": [ + { + "index": 6, + "relevance_score": 0.50955844, + "text": "wars" + }, + { + "index": 5, + "relevance_score": 0.084341794, + "text": "star" + }, + { + "index": 3, + "relevance_score": 0.004520818, + "text": "chewy" + } + ] + } From a408ec180854c6f0f35e278145400492ff3f385b Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Wed, 21 May 2025 16:05:10 +0200 Subject: [PATCH 2/3] Update PutHuggingFaceRequest ts - task settings optional --- .../inference/put_hugging_face/PutHuggingFaceRequest.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts index 8fb6d2981a..0f00ec1119 100644 --- a/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts +++ b/specification/inference/put_hugging_face/PutHuggingFaceRequest.ts @@ -49,7 +49,7 @@ import { Id } from '@_types/common' * The selected model must support the `sentence-ranking` task and expose OpenAI API. * HuggingFace supports only dedicated (not serverless) endpoints for `Rerank` so far. * After the endpoint is initialized, copy the full endpoint URL for use. - * Must include task_settings. + * Tested models for `rerank` task: * * * `bge-reranker-base` From fcc897e47f862f0fe1b2de677010d13499361b98 Mon Sep 17 00:00:00 2001 From: Evgenii_Kazannik Date: Thu, 22 May 2025 04:39:38 +0200 Subject: [PATCH 3/3] Update PutHuggingFaceRequestExample2 yaml - task settings optional --- .../examples/request/PutHuggingFaceRequestExample2.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml index e34223fc5f..bc7d80d377 100644 --- a/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml +++ b/specification/inference/put_hugging_face/examples/request/PutHuggingFaceRequestExample2.yaml @@ -10,6 +10,7 @@ value: |- "url": "url-endpoint" }, "task_settings": { - "return_text": true + "return_documents": true, + "top_n": 3 } }