Reorder kv dtype check to avoid nvcc not found error on AMD platform (#3104)

This commit is contained in:
cloudhan
2024-03-02 14:34:48 +08:00
committed by GitHub
parent 29e70e3e88
commit baee28c46c

View File

@ -330,15 +330,14 @@ class CacheConfig:
if self.cache_dtype == "auto":
pass
elif self.cache_dtype == "fp8_e5m2":
if is_hip():
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
nvcc_cuda_version = get_nvcc_cuda_version()
if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
raise ValueError(
"FP8 is not supported when cuda version is lower than 11.8."
)
device_name = torch.cuda.get_device_name()
if "AMD" in device_name:
raise NotImplementedError(
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
logger.info(
"Using fp8_e5m2 data type to store kv cache. It reduces "
"the GPU memory footprint and boosts the performance. "