mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[NVIDIA] Explicitly disable shuffled weights for flashinfer blockscale moe fp8 kernels (#21411)
Signed-off-by: kaixih <kaixih@nvidia.com>
This commit is contained in:
@ -1127,6 +1127,7 @@ def flashinfer_fused_moe_blockscale_fp8(
|
||||
tile_tokens_dim=_get_tile_tokens_dim(x.shape[0], top_k,
|
||||
global_num_experts),
|
||||
routing_method_type=2, # DeepSeek-styled routing method
|
||||
use_shuffled_weight=False,
|
||||
)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user