[Kernel] Disable CUTLASS kernels for fp8 (#5505)

This commit is contained in:
Tyler Michael Smith
2024-06-13 16:38:05 -04:00
committed by GitHub
parent 33e3b37242
commit e38042d4af

View File

@ -257,7 +257,9 @@ class Fp8LinearMethod(LinearMethodBase):
# If dynamic, layer.input_scale is None and x_scale computed from x.
# If static, layer.input_scale is scalar and x_scale is input_scale.
if bias is None and self.cutlass_fp8_supported:
# Temporarily disable CUTLASS kernels due to an illegal memory access
#if bias is None and self.cutlass_fp8_supported:
if False:
qinput, x_scale = ops.scaled_fp8_quant(x, layer.input_scale)
# Fused GEMM_DQ