|
16 | 16 | from ...utils import AuxStreamType, EventType, Fp4QuantizedTensor
|
17 | 17 | from .deep_ep_utils import buffer_pool, deep_ep_installed
|
18 | 18 | from .interface import MoE
|
19 |
| -from .moe_backend import MoEBackend, MoEBackendSelection |
20 | 19 | from .moe_load_balancer import get_moe_load_balancer
|
| 20 | +from .ops import MoEOp, MoEOpSelector |
21 | 21 | from .quantization import (DeepSeekFP8BlockScalesFusedMoEMethod,
|
22 | 22 | DeepSeekFP8BlockScalesFusedMoEMethodDeepGemm,
|
23 | 23 | FP8QDQFusedMoEMethod, MoEWeightLoadingMode,
|
@@ -233,8 +233,8 @@ def __init__(
|
233 | 233 | self.enable_dummy_allreduce = os.environ.get(
|
234 | 234 | "TRTLLM_ENABLE_DUMMY_ALLREDUCE", "0") == "1"
|
235 | 235 |
|
236 |
| - # MoE backend will be lazily initialized when first accessed (see moe_backend property) |
237 |
| - self._moe_backend_impl = None |
| 236 | + # MoE op will be lazily initialized when first accessed (see moe_op_impl property) |
| 237 | + self._moe_op_impl = None |
238 | 238 |
|
239 | 239 | def _check_configs(self):
|
240 | 240 | assert self._weights_created
|
@@ -352,17 +352,17 @@ def create_weights(self):
|
352 | 352 | self._check_configs()
|
353 | 353 |
|
354 | 354 | @property
|
355 |
| - def moe_backend_impl(self) -> MoEBackend: |
| 355 | + def moe_op_impl(self) -> MoEOp: |
356 | 356 | """
|
357 |
| - Lazily initialize and return the MoE backend. |
| 357 | + Lazily initialize and return the MoE op. |
358 | 358 |
|
359 |
| - The backend is selected based on hardware capabilities and quantization |
| 359 | + The op is selected based on hardware capabilities and quantization |
360 | 360 | configuration, which are only available after weights are created.
|
361 | 361 | """
|
362 |
| - if self._moe_backend_impl is None: |
363 |
| - assert self._weights_created, "Weights must be created before accessing moe_backend" |
364 |
| - self._moe_backend_impl = MoEBackendSelection.select_backend(self) |
365 |
| - return self._moe_backend_impl |
| 362 | + if self._moe_op_impl is None: |
| 363 | + assert self._weights_created, "Weights must be created before accessing moe_op" |
| 364 | + self._moe_op_impl = MoEOpSelector.select_op(self) |
| 365 | + return self._moe_op_impl |
366 | 366 |
|
367 | 367 | def dummy_allreduce(self):
|
368 | 368 | """
|
@@ -658,7 +658,7 @@ def forward_chunk(
|
658 | 658 | f"Not available alltoall method type: {self.alltoall_method_type!r}"
|
659 | 659 | )
|
660 | 660 |
|
661 |
| - final_hidden_states = self.moe_backend_impl.run_moe( |
| 661 | + final_hidden_states = self.moe_op_impl.run_moe( |
662 | 662 | self,
|
663 | 663 | x,
|
664 | 664 | token_selected_slots,
|
|
0 commit comments