nits

2025-10-20 17:13:56 +08:00 · 2024-01-02 15:50:31 +01:00
3 changed files with 0 additions and 9 deletions
--- a/src/transformers/models/llama/modeling_llama.py
+++ b/src/transformers/models/llama/modeling_llama.py
@ -345,9 +345,6 @@ class LlamaAttention(nn.Module):
            else:
                raise ValueError(f"Unknown RoPE scaling type {scaling_type}")

-    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-
    def forward(
        self,
        hidden_states: torch.Tensor,
--- a/src/transformers/models/mistral/modeling_mistral.py
+++ b/src/transformers/models/mistral/modeling_mistral.py
@ -233,9 +233,6 @@ class MistralAttention(nn.Module):
            base=self.rope_theta,
        )

-    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-
    def forward(
        self,
        hidden_states: torch.Tensor,
--- a/src/transformers/models/mixtral/modeling_mixtral.py
+++ b/src/transformers/models/mixtral/modeling_mixtral.py
@ -281,9 +281,6 @@ class MixtralAttention(nn.Module):
            base=self.rope_theta,
        )

-    def _shape(self, tensor: torch.Tensor, seq_len: int, bsz: int):
-        return tensor.view(bsz, seq_len, self.num_heads, self.head_dim).transpose(1, 2).contiguous()
-
    def forward(
        self,
        hidden_states: torch.Tensor,