[Hardware][Gaudi][BugFix] fix arguments of hpu fused moe (#15945)

Signed-off-by: zhenwei <zhenweiliu@habana.ai>
This commit is contained in:
liuzhenwei
2025-04-05 00:38:55 +08:00
committed by GitHub
parent bf7e3c51ae
commit 0812d8dd41

View File

@ -254,9 +254,12 @@ class UnquantizedFusedMoEMethod(FusedMoEMethodBase, CustomOp):
renormalize: bool,
topk_group: Optional[int] = None,
num_expert_group: Optional[int] = None,
global_num_experts: int = -1,
expert_map: Optional[torch.Tensor] = None,
custom_routing_function: Optional[Callable] = None,
scoring_func: str = "softmax",
e_score_correction_bias: Optional[torch.Tensor] = None
e_score_correction_bias: Optional[torch.Tensor] = None,
activation: str = "silu",
) -> torch.Tensor:
assert not use_grouped_topk
assert num_expert_group is None
@ -472,7 +475,7 @@ class FusedMoE(torch.nn.Module):
"non-grouped topk.")
if current_platform.is_hpu():
from vllm_hpu_extension.ops import DynamicFusedMOE
self.hpu_fused_moe = DynamicFusedMOE(self.num_experts)
self.hpu_fused_moe = DynamicFusedMOE(self.global_num_experts)
# Note: get_quant_method will look at the layer's local_num_experts
# for heuristic purposes, so it must be initialized first.