mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Reorder kv dtype check to avoid nvcc not found error on AMD platform (#3104)
This commit is contained in:
@ -330,15 +330,14 @@ class CacheConfig:
|
||||
if self.cache_dtype == "auto":
|
||||
pass
|
||||
elif self.cache_dtype == "fp8_e5m2":
|
||||
if is_hip():
|
||||
raise NotImplementedError(
|
||||
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
|
||||
nvcc_cuda_version = get_nvcc_cuda_version()
|
||||
if nvcc_cuda_version and nvcc_cuda_version < Version("11.8"):
|
||||
raise ValueError(
|
||||
"FP8 is not supported when cuda version is lower than 11.8."
|
||||
)
|
||||
device_name = torch.cuda.get_device_name()
|
||||
if "AMD" in device_name:
|
||||
raise NotImplementedError(
|
||||
"FP8_E5M2 KV Cache on AMD GPU has not been supported yet.")
|
||||
logger.info(
|
||||
"Using fp8_e5m2 data type to store kv cache. It reduces "
|
||||
"the GPU memory footprint and boosts the performance. "
|
||||
|
Reference in New Issue
Block a user