mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Misc] Expand SUPPORTED_HIDDEN_SIZES for DeepEP low-latency kernels (#21818)
Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
@ -40,7 +40,7 @@ class DeepEPLLPrepareAndFinalize(mk.FusedMoEPrepareAndFinalize):
|
||||
|
||||
# DeepEP low-latency kernels are compiled only for certain
|
||||
# specific hidden sizes.
|
||||
SUPPORTED_HIDDEN_SIZES = [2048, 2560, 4096, 5120, 7168]
|
||||
SUPPORTED_HIDDEN_SIZES = [2048, 2560, 4096, 5120, 6144, 7168]
|
||||
|
||||
def __init__(self,
|
||||
buffer: deep_ep.Buffer,
|
||||
|
Reference in New Issue
Block a user