Enable 4bit bnb prequant MOE (#21548)

Signed-off-by: Jee Jee Li <pandaleefree@gmail.com>
Co-authored-by: Jee Jee Li <pandaleefree@gmail.com>
This commit is contained in:
Andy Chen
2025-08-11 19:02:14 -07:00
committed by GitHub
parent 1891a265d3
commit 9b94d6ec8f
2 changed files with 4 additions and 8 deletions

View File

@ -427,14 +427,10 @@ class BitsAndBytesModelLoader(BaseModelLoader):
elif isinstance(module, FusedMoE) and hasattr(
module.quant_method, "quant_config"):
# TODO: support FusedMoE with prequant and 8bit.
if self.pre_quant:
if self.pre_quant and self.load_8bit:
raise ValueError(
"Prequant BitsAndBytes models with FusedMoE is not "
"supported yet.")
if self.load_8bit:
raise ValueError(
"BitsAndBytes 8bit quantization with FusedMoE is not "
"supported yet.")
"Prequant BitsAndBytes 8bit models with FusedMoE "
"is not supported yet.")
# Get the corresponding weight name using module name and
# expert_params_mapping.

View File

@ -684,4 +684,4 @@ class Qwen3MoeForCausalLM(nn.Module, SupportsPP, SupportsLoRA,
return loader.load_weights(weights)
def get_expert_mapping(self) -> list[tuple[str, str, int, str]]:
return self.model.get_expert_mapping()
return self.model.get_expert_mapping()