diff --git a/vllm/config.py b/vllm/config.py index b51f9783008b..b0ed88cb7f42 100644 --- a/vllm/config.py +++ b/vllm/config.py @@ -1015,11 +1015,6 @@ def _verify_args(self) -> None: raise ValueError( "GPU memory utilization must be less than 1.0. Got " f"{self.gpu_memory_utilization}.") - from vllm.platforms import current_platform - if (current_platform.is_cuda() and self.block_size is not None - and self.block_size > 32): - raise ValueError("CUDA Paged Attention kernel only supports " - f"block sizes up to 32. Got {self.block_size}.") def _verify_cache_dtype(self) -> None: if self.cache_dtype == "auto":