[Bugfix]: Assertion error when using FlashInfer backend (#25933)

Signed-off-by: simondanielsson <simon.danielsson99@hotmail.com>
Co-authored-by: Cyrus Leung <tlleungac@connect.ust.hk>
This commit is contained in:
Simon Danielsson
2025-10-05 09:46:36 +01:00
committed by GitHub
parent 201c971e96
commit 432e1cbc23

View File

@ -508,7 +508,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
self.layer = layer
self.quant_config = quant_config
self.weight_block_size = self.quant_config.weight_block_size
self.block_quant = self.weight_block_size is not None
self.block_quant: bool = self.weight_block_size is not None
self.fused_experts: Optional[
mk.FusedMoEModularKernel] = None # type: ignore
@ -1094,7 +1094,7 @@ class Fp8MoEMethod(FusedMoEMethodBase):
expert_map=expert_map,
)
elif self.flashinfer_moe_backend == FlashinferMoeBackend.CUTLASS:
assert self.block_quant is None
assert not self.block_quant
assert (not renormalize and custom_routing_function is not None)
assert activation == 'silu', (
f"Expected 'silu' activation but got {activation}")