[NVIDIA] Explicitly disable shuffled weights for flashinfer blockscale moe fp8 kernels (#21411)

Signed-off-by: kaixih <kaixih@nvidia.com>
2025-10-20 14:53:52 +08:00 · 2025-07-26 07:10:36 -07:00
parent e7c4f9ee86
commit de509ae8eb
1 changed files with 1 additions and 0 deletions
--- a/vllm/model_executor/layers/fused_moe/fused_moe.py
+++ b/vllm/model_executor/layers/fused_moe/fused_moe.py
@ -1127,6 +1127,7 @@ def flashinfer_fused_moe_blockscale_fp8(
        tile_tokens_dim=_get_tile_tokens_dim(x.shape[0], top_k,
                                             global_num_experts),
        routing_method_type=2,  # DeepSeek-styled routing method
+        use_shuffled_weight=False,
    )