[XPU][Bugfix] minor fix for XPU (#15591)

Signed-off-by: yan ma <yan.ma@intel.com>
This commit is contained in:
Yan Ma
2025-04-22 00:02:57 +08:00
committed by GitHub
parent 3b34fd5273
commit fe3462c774
2 changed files with 8 additions and 6 deletions

View File

@ -23,6 +23,8 @@ Currently, there are no pre-built XPU wheels.
- Second, install Python packages for vLLM XPU backend building:
```console
git clone https://github.com/vllm-project/vllm.git
cd vllm
pip install --upgrade pip
pip install -v -r requirements/xpu.txt
```

View File

@ -220,8 +220,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]):
value_cache,
attn_metadata.slot_mapping.flatten(),
self.kv_cache_dtype,
layer._k_scale,
layer._v_scale,
layer._k_scale_float,
layer._v_scale_float,
)
if attn_metadata.is_prompt:
@ -306,8 +306,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]):
max_seq_len,
self.alibi_slopes,
self.kv_cache_dtype,
layer._k_scale,
layer._v_scale,
layer._k_scale_float,
layer._v_scale_float,
)
else:
# Run PagedAttention V2.
@ -339,8 +339,8 @@ class IpexAttnBackendImpl(AttentionImpl[IpexAttnMetadata]):
max_seq_len,
self.alibi_slopes,
self.kv_cache_dtype,
layer._k_scale,
layer._v_scale,
layer._k_scale_float,
layer._v_scale_float,
)
# Reshape the output tensor.