diff --git a/tests/lora/test_long_context.py b/tests/lora/test_long_context.py index e7a34f2ced7e..756af145f165 100644 --- a/tests/lora/test_long_context.py +++ b/tests/lora/test_long_context.py @@ -1,4 +1,5 @@ import ast +import os from typing import List, Optional, Tuple import numpy as np @@ -113,7 +114,10 @@ def lora_llm(long_context_infos): context_len_to_scaling_factor[info["context_length"]] for info in long_context_infos.values() ] - + # Since dist_init sets CUDA_VISIBLE_DEVICES and affects LLM initialization, + # remove this env if it exists. + if "CUDA_VISIBLE_DEVICES" in os.environ: + del os.environ["CUDA_VISIBLE_DEVICES"] llm = vllm.LLM( "meta-llama/Llama-2-13b-chat-hf", enable_lora=True, diff --git a/vllm/model_executor/layers/activation.py b/vllm/model_executor/layers/activation.py index af7894b42c56..d55dc935d794 100644 --- a/vllm/model_executor/layers/activation.py +++ b/vllm/model_executor/layers/activation.py @@ -30,7 +30,7 @@ class FatreluAndMul(CustomOp): def __init__(self, threshold: float = 0.): super().__init__() self.threshold = threshold - if current_platform.is_cuda_alike() or current_platform.is_cpu(): + if current_platform.is_cuda_alike(): self.op = torch.ops._C.fatrelu_and_mul def forward_native(self, x: torch.Tensor) -> torch.Tensor: @@ -100,7 +100,7 @@ class MulAndSilu(CustomOp): def __init__(self): super().__init__() - if current_platform.is_cuda_alike() or current_platform.is_cpu(): + if current_platform.is_cuda_alike(): self.op = torch.ops._C.mul_and_silu elif current_platform.is_xpu(): from vllm._ipex_ops import ipex_ops