Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 3 additions & 2 deletions tensorrt_llm/serve/openai_disagg_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import copy
import os
import signal
import traceback
from contextlib import asynccontextmanager
from http import HTTPStatus
from typing import List, Optional, Type, Union
Expand Down Expand Up @@ -168,12 +169,12 @@ async def openai_chat_completion(self, req: ChatCompletionRequest) -> Response:

async def _handle_exception(self, exception):
if isinstance(exception, CppExecutorError):
logger.error(exception)
logger.error(traceback.format_exc())
signal.raise_signal(signal.SIGINT)
elif isinstance(exception, HTTPException):
raise exception # Re-raise HTTP exceptions properly
else:
logger.error(exception)
logger.error(traceback.format_exc())
raise HTTPException(status_code=500, detail=f"Internal server error {str(exception)}")

async def _send_context_request(self, ctx_server: str, ctx_req: Union[CompletionRequest, ChatCompletionRequest]):
Expand Down
5 changes: 4 additions & 1 deletion tensorrt_llm/serve/openai_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,9 +310,11 @@ async def create_chat_response(
response = await create_chat_response(promise, postproc_params, disaggregated_params)
return JSONResponse(content=response.model_dump())
except CppExecutorError:
logger.error(traceback.format_exc())
# If internal executor error is raised, shutdown the server
signal.raise_signal(signal.SIGINT)
except Exception as e:
logger.error(traceback.format_exc())
return self.create_error_response(str(e))

async def openai_completion(self, request: CompletionRequest, raw_request: Request) -> Response:
Expand Down Expand Up @@ -436,10 +438,11 @@ async def create_completion_response(
generator, disaggregated_params)
return JSONResponse(content=response.model_dump())
except CppExecutorError:
logger.error(traceback.format_exc())
# If internal executor error is raised, shutdown the server
signal.raise_signal(signal.SIGINT)
except Exception as e:
traceback.print_exc()
logger.error(traceback.format_exc())
return self.create_error_response(str(e))

async def __call__(self, host, port):
Expand Down