mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Llama4] Update attn_temperature_tuning
(#19997)
Signed-off-by: Brayden Zhong <b8zhong@uwaterloo.ca>
This commit is contained in:
@ -148,9 +148,8 @@ class Llama4Attention(nn.Module):
|
||||
self.q_size = self.num_heads * self.head_dim
|
||||
self.kv_size = self.num_kv_heads * self.head_dim
|
||||
self.scaling = self.head_dim**-0.5
|
||||
# TODO: attn_temperature_tuning should be a bool in huggingface
|
||||
self.attn_temperature_tuning = self.nope and \
|
||||
config.attn_temperature_tuning > 0
|
||||
config.attn_temperature_tuning
|
||||
|
||||
self.floor_scale = getattr(config, "floor_scale", 8192.0)
|
||||
self.attn_scale = getattr(config, "attn_scale", 0.1)
|
||||
|
Reference in New Issue
Block a user