mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
[BugFix] gemma loading after quantization or LoRA. (#3553)
This commit is contained in:
@ -340,6 +340,10 @@ class GemmaForCausalLM(nn.Module):
|
|||||||
weight_loader(param, loaded_weight, shard_id)
|
weight_loader(param, loaded_weight, shard_id)
|
||||||
break
|
break
|
||||||
else:
|
else:
|
||||||
|
# lm_head is not used in vllm as it is tied with embed_token.
|
||||||
|
# To prevent errors, skip loading lm_head.weight.
|
||||||
|
if "lm_head.weight" in name:
|
||||||
|
continue
|
||||||
# Skip loading extra bias for GPTQ models.
|
# Skip loading extra bias for GPTQ models.
|
||||||
if name.endswith(".bias") and name not in params_dict:
|
if name.endswith(".bias") and name not in params_dict:
|
||||||
continue
|
continue
|
||||||
|
Reference in New Issue
Block a user