diff --git a/vllm/v1/executor/uniproc_executor.py b/vllm/v1/executor/uniproc_executor.py index be058318de58..9b7b25196ff5 100644 --- a/vllm/v1/executor/uniproc_executor.py +++ b/vllm/v1/executor/uniproc_executor.py @@ -70,6 +70,7 @@ def execute_model( scheduler_output, ) -> ModelRunnerOutput: output = self.worker.execute_model(scheduler_output) + assert output is not None return output def profile(self, is_start: bool = True): diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index e83bce428355..eb2f39e00a99 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -199,7 +199,7 @@ def compile_or_warm_up_model(self) -> None: def execute_model( self, scheduler_output: "SchedulerOutput", - ) -> ModelRunnerOutput: + ) -> Optional[ModelRunnerOutput]: output = self.model_runner.execute_model(scheduler_output) return output if self.rank == 0 else None