mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Kernel] Disable CUTLASS kernels for fp8 (#5505)
This commit is contained in:
committed by
GitHub
parent
33e3b37242
commit
e38042d4af
@ -257,7 +257,9 @@ class Fp8LinearMethod(LinearMethodBase):
|
||||
# If dynamic, layer.input_scale is None and x_scale computed from x.
|
||||
# If static, layer.input_scale is scalar and x_scale is input_scale.
|
||||
|
||||
if bias is None and self.cutlass_fp8_supported:
|
||||
# Temporarily disable CUTLASS kernels due to an illegal memory access
|
||||
#if bias is None and self.cutlass_fp8_supported:
|
||||
if False:
|
||||
qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale)
|
||||
|
||||
# Fused GEMM_DQ
|
||||
|
Reference in New Issue
Block a user