mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Bugfix] Fix dynamic FP8 quantization for Mixtral (#4793)
This commit is contained in:
@ -95,7 +95,7 @@ class MixtralMoE(nn.Module):
|
||||
params_dtype=self.params_dtype,
|
||||
quant_config=None)
|
||||
|
||||
if self.use_fp8:
|
||||
if self.use_fp8 and self.quant_config.is_checkpoint_fp8_serialized:
|
||||
params_dtype = torch.float8_e4m3fn
|
||||
|
||||
self.w13_weight = nn.Parameter(
|
||||
|
Reference in New Issue
Block a user