mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[CPU] Disable oneDNN linear on non-x86 platforms (#25166)
Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
@ -7,7 +7,7 @@ import torch
|
||||
|
||||
from vllm import _custom_ops as ops
|
||||
from vllm import envs
|
||||
from vllm.platforms import current_platform
|
||||
from vllm.platforms import CpuArchEnum, current_platform
|
||||
from vllm.utils import direct_register_custom_op
|
||||
|
||||
|
||||
@ -167,7 +167,8 @@ def dispatch_cpu_unquantized_gemm(
|
||||
if remove_weight:
|
||||
layer.weight = torch.nn.Parameter(torch.empty(0),
|
||||
requires_grad=False)
|
||||
elif ops._supports_onednn:
|
||||
elif (ops._supports_onednn
|
||||
and current_platform.get_cpu_architecture() == CpuArchEnum.X86):
|
||||
origin_weight = layer.weight
|
||||
if remove_weight:
|
||||
layer.weight = torch.nn.Parameter(torch.empty(0),
|
||||
|
Reference in New Issue
Block a user