mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
Fix the bug related to loading GPTP INT3 weights. (#23328)
Signed-off-by: JunHowie <JunHowie@aliyun.com> Co-authored-by: JunHowie <JunHowie@aliyun.com> Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from copy import deepcopy
|
||||
from fractions import Fraction
|
||||
from typing import Optional, Union
|
||||
|
||||
import regex as re
|
||||
@ -29,7 +30,7 @@ def override_config(config: QuantizationConfig, prefix: str):
|
||||
if isinstance(desc_act, bool):
|
||||
config.desc_act = desc_act
|
||||
|
||||
config.pack_factor = 32 // config.weight_bits # packed into int32
|
||||
config.pack_factor = Fraction(32, config.weight_bits) # packed into int32
|
||||
if config.get_name() == "gptq_marlin":
|
||||
is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym)
|
||||
if isinstance(is_sym, bool):
|
||||
|
Reference in New Issue
Block a user