Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion vllm/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -757,7 +757,7 @@ def is_pin_memory_available() -> bool:
return True


class CudaMemoryProfiler:
class DeviceMemoryProfiler:

def __init__(self, device: Optional[torch.types.Device] = None):
self.device = device
Expand Down
4 changes: 2 additions & 2 deletions vllm/worker/model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@
LRUCacheWorkerPromptAdapterManager)
from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
from vllm.utils import (CudaMemoryProfiler, PyObjectCache, async_tensor_h2d,
from vllm.utils import (DeviceMemoryProfiler, PyObjectCache, async_tensor_h2d,
flatten_2d_lists, is_hip, is_pin_memory_available,
supports_dynamo)
from vllm.worker.model_runner_base import (
Expand Down Expand Up @@ -1012,7 +1012,7 @@ def __init__(

def load_model(self) -> None:
logger.info("Starting to load model %s...", self.model_config.model)
with CudaMemoryProfiler() as m:
with DeviceMemoryProfiler() as m:
self.model = get_model(model_config=self.model_config,
device_config=self.device_config,
load_config=self.load_config,
Expand Down
4 changes: 2 additions & 2 deletions vllm/worker/xpu_model_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
MultiModalInputs, MultiModalRegistry)
from vllm.sampling_params import SamplingParams
from vllm.sequence import IntermediateTensors, SequenceGroupMetadata
from vllm.utils import CudaMemoryProfiler, make_tensor_with_pad
from vllm.utils import DeviceMemoryProfiler, make_tensor_with_pad
from vllm.worker.model_runner import AttentionMetadata, SamplingMetadata
from vllm.worker.model_runner_base import (
ModelRunnerBase, ModelRunnerInputBase, ModelRunnerInputBuilderBase,
Expand Down Expand Up @@ -391,7 +391,7 @@ def __init__(
self.model: nn.Module # Set after init_Model

def load_model(self) -> None:
with CudaMemoryProfiler() as m:
with DeviceMemoryProfiler() as m:
self.model = get_model(
model_config=self.model_config,
device_config=self.device_config,
Expand Down
Loading