Fix the bug related to loading GPTP INT3 weights. (#23328)

Signed-off-by: JunHowie <JunHowie@aliyun.com>
Co-authored-by: JunHowie <JunHowie@aliyun.com>
Co-authored-by: Isotr0py <mozf@mail2.sysu.edu.cn>
This commit is contained in:
Jun-Howie
2025-09-01 13:39:57 +08:00
committed by GitHub
parent 8c742a66d1
commit acc1a6e10a

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from copy import deepcopy
from fractions import Fraction
from typing import Optional, Union
import regex as re
@ -29,7 +30,7 @@ def override_config(config: QuantizationConfig, prefix: str):
if isinstance(desc_act, bool):
config.desc_act = desc_act
config.pack_factor = 32 // config.weight_bits # packed into int32
config.pack_factor = Fraction(32, config.weight_bits) # packed into int32
if config.get_name() == "gptq_marlin":
is_sym = get_dynamic_override(config, prefix, "sym", config.is_sym)
if isinstance(is_sym, bool):