Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 29 additions & 8 deletions vllm/model_executor/layers/fused_moe/layer.py
Original file line number Diff line number Diff line change
Expand Up @@ -372,10 +372,18 @@
logical_replica_count: Optional[torch.Tensor] = None,
) -> torch.Tensor:
if enable_eplb:
assert expert_load_view is not None
assert logical_to_physical_map is not None
assert logical_replica_count is not None
assert isinstance(layer, FusedMoE)
if expert_load_view is None:
raise ValueError(
"expert_load_view must be provided when enable_eplb is True")
if logical_to_physical_map is None:
raise ValueError(
"logical_to_physical_map must be provided when enable_eplb is True")
if logical_replica_count is None:

Check failure on line 381 in vllm/model_executor/layers/fused_moe/layer.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/layers/fused_moe/layer.py:381:81: E501 Line too long (87 > 80)
raise ValueError(
"logical_replica_count must be provided when enable_eplb is True")
if not isinstance(layer, FusedMoE):
raise TypeError(

Check failure on line 385 in vllm/model_executor/layers/fused_moe/layer.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/layers/fused_moe/layer.py:385:81: E501 Line too long (85 > 80)
f"Expected layer to be FusedMoE, but got {type(layer)}")

return self.forward(
x=x,
Expand Down Expand Up @@ -421,8 +429,21 @@
logical_to_physical_map: Optional[torch.Tensor] = None,
logical_replica_count: Optional[torch.Tensor] = None,
) -> torch.Tensor:
if enable_eplb:
if expert_load_view is None:
raise ValueError(
"expert_load_view must be provided when enable_eplb is True")
if logical_to_physical_map is None:
raise ValueError(
"logical_to_physical_map must be provided when enable_eplb is True")
if logical_replica_count is None:
raise ValueError(
"logical_replica_count must be provided when enable_eplb is True")
if not isinstance(layer, FusedMoE):

Check failure on line 442 in vllm/model_executor/layers/fused_moe/layer.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/layers/fused_moe/layer.py:442:81: E501 Line too long (87 > 80)
raise TypeError(
f"Expected layer to be FusedMoE, but got {type(layer)}")

topk_weights, topk_ids = FusedMoE.select_experts(

Check failure on line 446 in vllm/model_executor/layers/fused_moe/layer.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/model_executor/layers/fused_moe/layer.py:446:81: E501 Line too long (85 > 80)
hidden_states=x,
router_logits=router_logits,
use_grouped_topk=use_grouped_topk,
Expand Down Expand Up @@ -1411,10 +1432,10 @@
>= chunk_size)
assert (self.batched_router_logits.size(0) # type: ignore
>= chunk_size)
staged_hidden_states = self.batched_hidden_states[:
chunk_size, :] # type: ignore
staged_router_logits = self.batched_router_logits[:
chunk_size, :] # type: ignore
staged_hidden_states = self.batched_hidden_states[
:chunk_size, :] # type: ignore
staged_router_logits = self.batched_router_logits[
:chunk_size, :] # type: ignore
staged_hidden_states.copy_(hidden_states, non_blocking=True)
staged_router_logits.copy_(router_logits, non_blocking=True)

Expand Down
Loading