[CPU] Disable oneDNN linear on non-x86 platforms (#25166)

Signed-off-by: jiang1.li <jiang1.li@intel.com>
This commit is contained in:
Li, Jiang
2025-09-19 15:27:22 +08:00
committed by GitHub
parent 486c5599e3
commit 8c1d4acbfe

View File

@ -7,7 +7,7 @@ import torch
from vllm import _custom_ops as ops
from vllm import envs
from vllm.platforms import current_platform
from vllm.platforms import CpuArchEnum, current_platform
from vllm.utils import direct_register_custom_op
@ -167,7 +167,8 @@ def dispatch_cpu_unquantized_gemm(
if remove_weight:
layer.weight = torch.nn.Parameter(torch.empty(0),
requires_grad=False)
elif ops._supports_onednn:
elif (ops._supports_onednn
and current_platform.get_cpu_architecture() == CpuArchEnum.X86):
origin_weight = layer.weight
if remove_weight:
layer.weight = torch.nn.Parameter(torch.empty(0),