56
56
PoolingChatRequest ,
57
57
PoolingCompletionRequest ,
58
58
PoolingRequest , PoolingResponse ,
59
+ RerankRequest , RerankResponse ,
59
60
ScoreRequest , ScoreResponse ,
60
61
TokenizeRequest ,
61
62
TokenizeResponse ,
68
69
from vllm .entrypoints .openai .serving_models import (BaseModelPath ,
69
70
OpenAIServingModels )
70
71
from vllm .entrypoints .openai .serving_pooling import OpenAIServingPooling
72
+ from vllm .entrypoints .openai .serving_rerank import JinaAIServingRerank
71
73
from vllm .entrypoints .openai .serving_score import OpenAIServingScores
72
74
from vllm .entrypoints .openai .serving_tokenization import (
73
75
OpenAIServingTokenization )
@@ -306,6 +308,10 @@ def score(request: Request) -> Optional[OpenAIServingScores]:
306
308
return request .app .state .openai_serving_scores
307
309
308
310
311
+ def rerank (request : Request ) -> Optional [JinaAIServingRerank ]:
312
+ return request .app .state .jinaai_serving_reranking
313
+
314
+
309
315
def tokenization (request : Request ) -> OpenAIServingTokenization :
310
316
return request .app .state .openai_serving_tokenization
311
317
@@ -502,6 +508,40 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request):
502
508
return await create_score (request , raw_request )
503
509
504
510
511
+ @router .post ("/rerank" )
512
+ @with_cancellation
513
+ async def do_rerank (request : RerankRequest , raw_request : Request ):
514
+ handler = rerank (raw_request )
515
+ if handler is None :
516
+ return base (raw_request ).create_error_response (
517
+ message = "The model does not support Rerank (Score) API" )
518
+ generator = await handler .do_rerank (request , raw_request )
519
+ if isinstance (generator , ErrorResponse ):
520
+ return JSONResponse (content = generator .model_dump (),
521
+ status_code = generator .code )
522
+ elif isinstance (generator , RerankResponse ):
523
+ return JSONResponse (content = generator .model_dump ())
524
+
525
+ assert_never (generator )
526
+
527
+
528
+ @router .post ("/v1/rerank" )
529
+ @with_cancellation
530
+ async def do_rerank_v1 (request : RerankRequest , raw_request : Request ):
531
+ logger .warning (
532
+ "To indicate that the rerank API is not part of the standard OpenAI"
533
+ " API, we have located it at `/rerank`. Please update your client"
534
+ "accordingly. (Note: Conforms to JinaAI rerank API)" )
535
+
536
+ return await do_rerank (request , raw_request )
537
+
538
+
539
+ @router .post ("/v2/rerank" )
540
+ @with_cancellation
541
+ async def do_rerank_v2 (request : RerankRequest , raw_request : Request ):
542
+ return await do_rerank (request , raw_request )
543
+
544
+
505
545
TASK_HANDLERS : Dict [str , Dict [str , tuple ]] = {
506
546
"generate" : {
507
547
"messages" : (ChatCompletionRequest , create_chat_completion ),
@@ -512,7 +552,10 @@ async def create_score_v1(request: ScoreRequest, raw_request: Request):
512
552
"default" : (EmbeddingCompletionRequest , create_embedding ),
513
553
},
514
554
"score" : {
515
- "default" : (ScoreRequest , create_score ),
555
+ "default" : (RerankRequest , do_rerank )
556
+ },
557
+ "rerank" : {
558
+ "default" : (RerankRequest , do_rerank )
516
559
},
517
560
"reward" : {
518
561
"messages" : (PoolingChatRequest , create_pooling ),
@@ -759,6 +802,12 @@ async def init_app_state(
759
802
state .openai_serving_models ,
760
803
request_logger = request_logger
761
804
) if model_config .task == "score" else None
805
+ state .jinaai_serving_reranking = JinaAIServingRerank (
806
+ engine_client ,
807
+ model_config ,
808
+ state .openai_serving_models ,
809
+ request_logger = request_logger
810
+ ) if model_config .task == "score" else None
762
811
state .openai_serving_tokenization = OpenAIServingTokenization (
763
812
engine_client ,
764
813
model_config ,
0 commit comments