mirror of
https://github.com/vllm-project/vllm-ascend.git
synced 2025-10-20 21:53:54 +08:00
[BugFix] fix qwenVL quant assertion error (#3466)
### What this PR does / why we need it? This PR fixes issues: 1. Solve the problem that multimodal scene cannot do weight prefetching and throw an assertion error exception. 2. Standardize the grid_thw data type of qwen2VL to torch.int32. ### Does this PR introduce _any_ user-facing change? None. ### How was this patch tested? - ci & e2e - vLLM version: v0.11.0rc3 - vLLM main: https://github.com/vllm-project/vllm/commit/v0.11.0 --------- Signed-off-by: elilzhu <2435754260@qq.com> Co-authored-by: zhulei (AK) <z00692222@china.huawei.com>
This commit is contained in:
@ -314,6 +314,7 @@ class AscendQwen2VisionTransformer(Qwen2VisionTransformer):
|
|||||||
x: torch.Tensor,
|
x: torch.Tensor,
|
||||||
grid_thw: torch.Tensor,
|
grid_thw: torch.Tensor,
|
||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
|
grid_thw = torch.tensor(grid_thw, dtype=torch.int32)
|
||||||
# compute cu_seqlens and avoid cumsum to fit operator unpadFA
|
# compute cu_seqlens and avoid cumsum to fit operator unpadFA
|
||||||
cu_seqlens = torch.repeat_interleave(grid_thw[:, 1] * grid_thw[:, 2],
|
cu_seqlens = torch.repeat_interleave(grid_thw[:, 1] * grid_thw[:, 2],
|
||||||
grid_thw[:,
|
grid_thw[:,
|
||||||
|
@ -99,8 +99,11 @@ class AscendW8A8LinearMethod:
|
|||||||
) -> torch.Tensor:
|
) -> torch.Tensor:
|
||||||
if x.dtype != torch.int8:
|
if x.dtype != torch.int8:
|
||||||
layer_cls_name = layer.__class__.__name__
|
layer_cls_name = layer.__class__.__name__
|
||||||
|
try:
|
||||||
weight_prefetch_method = get_forward_context(
|
weight_prefetch_method = get_forward_context(
|
||||||
).weight_prefetch_method
|
).weight_prefetch_method
|
||||||
|
except AssertionError:
|
||||||
|
weight_prefetch_method = None
|
||||||
|
|
||||||
# prefetch qkvo_proj.weight preprocess
|
# prefetch qkvo_proj.weight preprocess
|
||||||
if weight_prefetch_method:
|
if weight_prefetch_method:
|
||||||
|
Reference in New Issue
Block a user