From de533ab2a14192e461900a4950e2b426d99a6862 Mon Sep 17 00:00:00 2001 From: Lukas Geiger Date: Fri, 29 Aug 2025 02:26:34 +0100 Subject: [PATCH] [Models] Improve iteration over layers (#19497) Signed-off-by: Lukas Geiger --- vllm/model_executor/models/arcee.py | 3 ++- vllm/model_executor/models/arctic.py | 3 ++- vllm/model_executor/models/baichuan.py | 3 ++- vllm/model_executor/models/bailing_moe.py | 4 ++-- vllm/model_executor/models/bamba.py | 3 +-- vllm/model_executor/models/bloom.py | 3 ++- vllm/model_executor/models/chameleon.py | 3 ++- vllm/model_executor/models/chatglm.py | 3 ++- vllm/model_executor/models/commandr.py | 3 ++- vllm/model_executor/models/dbrx.py | 3 ++- vllm/model_executor/models/deepseek.py | 5 +++-- vllm/model_executor/models/deepseek_v2.py | 3 ++- vllm/model_executor/models/dots1.py | 3 ++- vllm/model_executor/models/ernie45_moe.py | 4 ++-- vllm/model_executor/models/ernie45_vl_moe.py | 4 ++-- vllm/model_executor/models/exaone.py | 3 ++- vllm/model_executor/models/exaone4.py | 3 ++- vllm/model_executor/models/falcon.py | 3 ++- vllm/model_executor/models/gemma.py | 3 ++- vllm/model_executor/models/gemma2.py | 3 ++- vllm/model_executor/models/gemma3.py | 3 ++- vllm/model_executor/models/glm4_moe.py | 4 ++-- vllm/model_executor/models/gpt2.py | 3 ++- vllm/model_executor/models/gpt_bigcode.py | 3 ++- vllm/model_executor/models/gpt_j.py | 5 +++-- vllm/model_executor/models/gpt_neox.py | 3 ++- vllm/model_executor/models/granite.py | 3 ++- vllm/model_executor/models/granitemoe.py | 3 ++- vllm/model_executor/models/granitemoehybrid.py | 3 +-- vllm/model_executor/models/granitemoeshared.py | 4 ++-- vllm/model_executor/models/grok1.py | 4 ++-- vllm/model_executor/models/internlm2.py | 3 ++- vllm/model_executor/models/internlm2_ve.py | 3 ++- vllm/model_executor/models/jais.py | 3 ++- vllm/model_executor/models/jamba.py | 3 ++- vllm/model_executor/models/lfm2.py | 5 +++-- vllm/model_executor/models/llama.py | 3 ++- vllm/model_executor/models/mamba2.py | 4 +--- vllm/model_executor/models/mimo.py | 3 ++- vllm/model_executor/models/minicpm.py | 3 ++- vllm/model_executor/models/minimax_text_01.py | 4 ++-- vllm/model_executor/models/mixtral.py | 3 ++- vllm/model_executor/models/mixtral_quant.py | 3 ++- vllm/model_executor/models/molmo.py | 3 ++- vllm/model_executor/models/mpt.py | 3 ++- vllm/model_executor/models/nemotron.py | 3 ++- vllm/model_executor/models/nemotron_h.py | 3 +-- vllm/model_executor/models/nemotron_nas.py | 4 ++-- vllm/model_executor/models/olmo.py | 3 ++- vllm/model_executor/models/olmo2.py | 3 ++- vllm/model_executor/models/olmoe.py | 3 ++- vllm/model_executor/models/opt.py | 3 ++- vllm/model_executor/models/orion.py | 3 ++- vllm/model_executor/models/persimmon.py | 3 ++- vllm/model_executor/models/phi.py | 3 ++- vllm/model_executor/models/phimoe.py | 3 ++- vllm/model_executor/models/plamo2.py | 3 ++- vllm/model_executor/models/qwen.py | 3 ++- vllm/model_executor/models/qwen2.py | 3 ++- vllm/model_executor/models/qwen2_moe.py | 3 ++- vllm/model_executor/models/qwen3_moe.py | 4 ++-- vllm/model_executor/models/seed_oss.py | 3 ++- vllm/model_executor/models/stablelm.py | 3 ++- vllm/model_executor/models/starcoder2.py | 3 ++- vllm/model_executor/models/step3_text.py | 4 ++-- 65 files changed, 129 insertions(+), 83 deletions(-) diff --git a/vllm/model_executor/models/arcee.py b/vllm/model_executor/models/arcee.py index 4cf73e2e0e..13ed4da060 100644 --- a/vllm/model_executor/models/arcee.py +++ b/vllm/model_executor/models/arcee.py @@ -9,6 +9,7 @@ # activation. from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -243,7 +244,7 @@ class ArceeModel(nn.Module): aux_hidden_states: list[torch.Tensor] = [] for idx, layer in enumerate( - self.layers[self.start_layer:self.end_layer]): + islice(self.layers, self.start_layer, self.end_layer)): if idx in self.aux_hidden_state_layers: aux_hidden_states.append( hidden_states + diff --git a/vllm/model_executor/models/arctic.py b/vllm/model_executor/models/arctic.py index 4693c9487a..c566611266 100644 --- a/vllm/model_executor/models/arctic.py +++ b/vllm/model_executor/models/arctic.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Inference-only Snowflake Arctic model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -403,7 +404,7 @@ class ArcticModel(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/baichuan.py b/vllm/model_executor/models/baichuan.py index 804a2f1785..4563c35666 100644 --- a/vllm/model_executor/models/baichuan.py +++ b/vllm/model_executor/models/baichuan.py @@ -22,6 +22,7 @@ """Inference-only BaiChuan model compatible with HuggingFace weights.""" import math from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -309,7 +310,7 @@ class BaiChuanModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/bailing_moe.py b/vllm/model_executor/models/bailing_moe.py index 23cab3509c..a42640cef9 100644 --- a/vllm/model_executor/models/bailing_moe.py +++ b/vllm/model_executor/models/bailing_moe.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only BailingMoE model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -359,8 +360,7 @@ class BailingMoeModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( hidden_states, position_ids, diff --git a/vllm/model_executor/models/bamba.py b/vllm/model_executor/models/bamba.py index e2cd31af53..a72bbdebe5 100644 --- a/vllm/model_executor/models/bamba.py +++ b/vllm/model_executor/models/bamba.py @@ -345,8 +345,7 @@ class BambaModel(nn.Module): residual = None num_attn = 0 - for i in range(len(self.layers)): - layer = self.layers[i] + for i, layer in enumerate(self.layers): if isinstance(layer, BambaAttentionDecoderLayer): num_attn += 1 diff --git a/vllm/model_executor/models/bloom.py b/vllm/model_executor/models/bloom.py index 1264045848..13ecda0122 100644 --- a/vllm/model_executor/models/bloom.py +++ b/vllm/model_executor/models/bloom.py @@ -20,6 +20,7 @@ """Inference-only BLOOM model compatible with HuggingFace weights.""" import math from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -273,7 +274,7 @@ class BloomModel(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states = layer(position_ids, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/chameleon.py b/vllm/model_executor/models/chameleon.py index e6914ad4c4..28a1a66c23 100644 --- a/vllm/model_executor/models/chameleon.py +++ b/vllm/model_executor/models/chameleon.py @@ -3,6 +3,7 @@ from collections.abc import Iterable, Mapping, Sequence from functools import cached_property +from itertools import islice from typing import Annotated, Any, Literal, Optional, Union import torch @@ -914,7 +915,7 @@ class ChameleonModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/chatglm.py b/vllm/model_executor/models/chatglm.py index 5470ff3e8b..1fc2da3e4d 100644 --- a/vllm/model_executor/models/chatglm.py +++ b/vllm/model_executor/models/chatglm.py @@ -5,6 +5,7 @@ """Inference-only ChatGLM model compatible with THUDM weights.""" import json from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -281,7 +282,7 @@ class GLMTransformer(nn.Module): hidden_states: torch.Tensor, position_ids: torch.Tensor, ) -> Union[torch.Tensor, IntermediateTensors]: - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(hidden_states=hidden_states, position_ids=position_ids) diff --git a/vllm/model_executor/models/commandr.py b/vllm/model_executor/models/commandr.py index 4dd84b8f8f..7f87e31abd 100644 --- a/vllm/model_executor/models/commandr.py +++ b/vllm/model_executor/models/commandr.py @@ -23,6 +23,7 @@ # This file is based on the LLama model definition file in transformers """PyTorch Cohere model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -322,7 +323,7 @@ class CohereModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/dbrx.py b/vllm/model_executor/models/dbrx.py index e74d90e0b1..519cd52221 100644 --- a/vllm/model_executor/models/dbrx.py +++ b/vllm/model_executor/models/dbrx.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -359,7 +360,7 @@ class DbrxModel(nn.Module): else: assert intermediate_tensors hidden_states = intermediate_tensors["hidden_states"] - for block in self.blocks[self.start_layer:self.end_layer]: + for block in islice(self.blocks, self.start_layer, self.end_layer): hidden_states = block(position_ids, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/deepseek.py b/vllm/model_executor/models/deepseek.py index 2f0202f1e0..e815f13d66 100644 --- a/vllm/model_executor/models/deepseek.py +++ b/vllm/model_executor/models/deepseek.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only Deepseek model.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -377,7 +378,7 @@ class DeepseekModel(nn.Module): else: hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ @@ -483,4 +484,4 @@ class DeepseekForCausalLM(nn.Module, SupportsPP): def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: loader = AutoWeightsLoader(self) - return loader.load_weights(weights) \ No newline at end of file + return loader.load_weights(weights) diff --git a/vllm/model_executor/models/deepseek_v2.py b/vllm/model_executor/models/deepseek_v2.py index 7657e7cb00..ed033954f7 100644 --- a/vllm/model_executor/models/deepseek_v2.py +++ b/vllm/model_executor/models/deepseek_v2.py @@ -25,6 +25,7 @@ """Inference-only DeepseekV2/DeepseekV3 model.""" import typing from collections.abc import Callable, Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -712,7 +713,7 @@ class DeepseekV2Model(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/dots1.py b/vllm/model_executor/models/dots1.py index 5f410c0ae5..c386f8db9e 100644 --- a/vllm/model_executor/models/dots1.py +++ b/vllm/model_executor/models/dots1.py @@ -25,6 +25,7 @@ # limitations under the License. """Inference-only dots1 model.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -391,7 +392,7 @@ class Dots1Model(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/ernie45_moe.py b/vllm/model_executor/models/ernie45_moe.py index 4780ea931e..33ec27fc63 100644 --- a/vllm/model_executor/models/ernie45_moe.py +++ b/vllm/model_executor/models/ernie45_moe.py @@ -23,6 +23,7 @@ # limitations under the License. """Inference-only ErineMoE model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -419,8 +420,7 @@ class Ernie4_5_MoeModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/ernie45_vl_moe.py b/vllm/model_executor/models/ernie45_vl_moe.py index f56c098435..780974c3b7 100644 --- a/vllm/model_executor/models/ernie45_vl_moe.py +++ b/vllm/model_executor/models/ernie45_vl_moe.py @@ -23,6 +23,7 @@ # limitations under the License. """Inference-only Erine VL model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -508,8 +509,7 @@ class Ernie4_5_VLMoeModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual, visual_token_mask, **kwargs) diff --git a/vllm/model_executor/models/exaone.py b/vllm/model_executor/models/exaone.py index 8052b6bb82..942db0143a 100644 --- a/vllm/model_executor/models/exaone.py +++ b/vllm/model_executor/models/exaone.py @@ -26,6 +26,7 @@ """Inference-only Exaone model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -371,7 +372,7 @@ class ExaoneModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/exaone4.py b/vllm/model_executor/models/exaone4.py index 827e901418..971fcbd2aa 100644 --- a/vllm/model_executor/models/exaone4.py +++ b/vllm/model_executor/models/exaone4.py @@ -22,6 +22,7 @@ """Inference-only Exaone model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -354,7 +355,7 @@ class Exaone4Model(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/falcon.py b/vllm/model_executor/models/falcon.py index 62a93dabd5..a9fe0924ba 100644 --- a/vllm/model_executor/models/falcon.py +++ b/vllm/model_executor/models/falcon.py @@ -22,6 +22,7 @@ import math from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -389,7 +390,7 @@ class FalconModel(nn.Module): hidden_states = self.get_input_embeddings(input_ids) else: hidden_states = intermediate_tensors["hidden_states"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/gemma.py b/vllm/model_executor/models/gemma.py index 59c3102add..12eb275038 100644 --- a/vllm/model_executor/models/gemma.py +++ b/vllm/model_executor/models/gemma.py @@ -18,6 +18,7 @@ """Inference-only Gemma model compatible with HuggingFace weights.""" from collections.abc import Iterable from functools import cache +from itertools import islice from typing import Optional, Union import torch @@ -308,7 +309,7 @@ class GemmaModel(nn.Module): else: hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/gemma2.py b/vllm/model_executor/models/gemma2.py index 8cfe92c645..0bdb6c6bf7 100644 --- a/vllm/model_executor/models/gemma2.py +++ b/vllm/model_executor/models/gemma2.py @@ -17,6 +17,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -292,7 +293,7 @@ class Gemma2Model(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/gemma3.py b/vllm/model_executor/models/gemma3.py index b762be3c52..410c715d52 100644 --- a/vllm/model_executor/models/gemma3.py +++ b/vllm/model_executor/models/gemma3.py @@ -16,6 +16,7 @@ # See the License for the specific language governing permissions and # limitations under the License. from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -398,7 +399,7 @@ class Gemma3Model(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/glm4_moe.py b/vllm/model_executor/models/glm4_moe.py index fe5e46a998..fcc63815ac 100644 --- a/vllm/model_executor/models/glm4_moe.py +++ b/vllm/model_executor/models/glm4_moe.py @@ -24,6 +24,7 @@ """Inference-only GLM-4.5 model compatible with HuggingFace weights.""" import typing from collections.abc import Callable, Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -440,8 +441,7 @@ class Glm4MoeModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/gpt2.py b/vllm/model_executor/models/gpt2.py index 98d7633739..4446b5ab18 100644 --- a/vllm/model_executor/models/gpt2.py +++ b/vllm/model_executor/models/gpt2.py @@ -20,6 +20,7 @@ # limitations under the License. """Inference-only GPT-2 model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -228,7 +229,7 @@ class GPT2Model(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states = layer(hidden_states) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/gpt_bigcode.py b/vllm/model_executor/models/gpt_bigcode.py index 036ded530f..d5c2604145 100644 --- a/vllm/model_executor/models/gpt_bigcode.py +++ b/vllm/model_executor/models/gpt_bigcode.py @@ -21,6 +21,7 @@ # limitations under the License. """Inference-only GPTBigCode model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -246,7 +247,7 @@ class GPTBigCodeModel(nn.Module): else: hidden_states = intermediate_tensors["hidden_states"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states = layer(hidden_states) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/gpt_j.py b/vllm/model_executor/models/gpt_j.py index bd162a5e57..584c7f5d8a 100644 --- a/vllm/model_executor/models/gpt_j.py +++ b/vllm/model_executor/models/gpt_j.py @@ -19,6 +19,7 @@ # limitations under the License. """Inference-only GPT-J model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -223,7 +224,7 @@ class GPTJModel(nn.Module): hidden_states = self.get_input_embeddings(input_ids) else: hidden_states = intermediate_tensors["hidden_states"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states = layer(position_ids, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) @@ -336,4 +337,4 @@ class GPTJForCausalLM(nn.Module, SupportsPP): def load_weights(self, weights: Iterable[tuple[str, torch.Tensor]]) -> set[str]: loader = AutoWeightsLoader(self) - return loader.load_weights(weights) \ No newline at end of file + return loader.load_weights(weights) diff --git a/vllm/model_executor/models/gpt_neox.py b/vllm/model_executor/models/gpt_neox.py index d418d8bb86..e97db188e2 100644 --- a/vllm/model_executor/models/gpt_neox.py +++ b/vllm/model_executor/models/gpt_neox.py @@ -19,6 +19,7 @@ # limitations under the License. """Inference-only GPT-NeoX model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -235,7 +236,7 @@ class GPTNeoXModel(nn.Module): hidden_states = self.get_input_embeddings(input_ids) else: hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(position_ids, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/granite.py b/vllm/model_executor/models/granite.py index 507a9206c4..f8ba022921 100644 --- a/vllm/model_executor/models/granite.py +++ b/vllm/model_executor/models/granite.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only IBM Granite model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -316,7 +317,7 @@ class GraniteModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/granitemoe.py b/vllm/model_executor/models/granitemoe.py index 7d31854dce..07ad75bcf1 100644 --- a/vllm/model_executor/models/granitemoe.py +++ b/vllm/model_executor/models/granitemoe.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only GraniteMoe model.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional import torch @@ -303,7 +304,7 @@ class GraniteMoeModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/granitemoehybrid.py b/vllm/model_executor/models/granitemoehybrid.py index f451e65338..79c6d8146b 100644 --- a/vllm/model_executor/models/granitemoehybrid.py +++ b/vllm/model_executor/models/granitemoehybrid.py @@ -397,8 +397,7 @@ class GraniteMoeHybridModel(nn.Module): residual = intermediate_tensors["residual"] num_attn = 0 - for i in range(len(self.layers)): - layer = self.layers[i] + for i, layer in enumerate(self.layers): if isinstance(layer, GraniteMoeHybridAttentionDecoderLayer): num_attn += 1 diff --git a/vllm/model_executor/models/granitemoeshared.py b/vllm/model_executor/models/granitemoeshared.py index 1e2e854417..0b568a4b22 100644 --- a/vllm/model_executor/models/granitemoeshared.py +++ b/vllm/model_executor/models/granitemoeshared.py @@ -6,6 +6,7 @@ The architecture is the same as granitemoe but with the addition of shared experts. """ from collections.abc import Iterable +from itertools import islice from typing import Optional import torch @@ -200,8 +201,7 @@ class GraniteMoeSharedModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/grok1.py b/vllm/model_executor/models/grok1.py index 3659249cd8..a591134383 100644 --- a/vllm/model_executor/models/grok1.py +++ b/vllm/model_executor/models/grok1.py @@ -23,6 +23,7 @@ # limitations under the License. """Inference-only Grok1 model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -347,8 +348,7 @@ class Grok1Model(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/internlm2.py b/vllm/model_executor/models/internlm2.py index 26bc48ffbd..320e8d9d48 100644 --- a/vllm/model_executor/models/internlm2.py +++ b/vllm/model_executor/models/internlm2.py @@ -3,6 +3,7 @@ from collections.abc import Iterable from functools import partial +from itertools import islice from typing import Any, Optional, Union import torch @@ -297,7 +298,7 @@ class InternLM2Model(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/internlm2_ve.py b/vllm/model_executor/models/internlm2_ve.py index 4bbb49da0e..d41ac2b70b 100644 --- a/vllm/model_executor/models/internlm2_ve.py +++ b/vllm/model_executor/models/internlm2_ve.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +from itertools import islice from typing import Optional, Union import torch @@ -123,7 +124,7 @@ class InternLM2VEModel(InternLM2Model): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/jais.py b/vllm/model_executor/models/jais.py index bed4a5dff2..91a06dd502 100644 --- a/vllm/model_executor/models/jais.py +++ b/vllm/model_executor/models/jais.py @@ -23,6 +23,7 @@ import math from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -276,7 +277,7 @@ class JAISModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states = layer(hidden_states) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/jamba.py b/vllm/model_executor/models/jamba.py index 3c1a0b68df..aebd2cbe2e 100644 --- a/vllm/model_executor/models/jamba.py +++ b/vllm/model_executor/models/jamba.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Inference-only Jamba model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional import torch @@ -350,7 +351,7 @@ class JambaModel(nn.Module): kv_cache_index = 0 mamba_cache_index = 0 - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): layer_mamba_cache_params = None if isinstance(layer, JambaAttentionDecoderLayer): kv_cache_index += 1 diff --git a/vllm/model_executor/models/lfm2.py b/vllm/model_executor/models/lfm2.py index 5f3148b47e..927f78c4e4 100644 --- a/vllm/model_executor/models/lfm2.py +++ b/vllm/model_executor/models/lfm2.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project from collections.abc import Iterable +from itertools import islice from typing import Any, Optional import torch @@ -374,7 +375,7 @@ class Lfm2Model(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, @@ -554,4 +555,4 @@ class Lfm2ForCausalLM(nn.Module, HasInnerState, SupportsLoRA, SupportsPP, skip_prefixes=(["lm_head."] if self.config.tie_word_embeddings else None), ) - return loader.load_weights(weights) \ No newline at end of file + return loader.load_weights(weights) diff --git a/vllm/model_executor/models/llama.py b/vllm/model_executor/models/llama.py index e39a6df843..a22bde194f 100644 --- a/vllm/model_executor/models/llama.py +++ b/vllm/model_executor/models/llama.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only LLaMA model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -383,7 +384,7 @@ class LlamaModel(nn.Module): aux_hidden_states = [] for idx, layer in enumerate( - self.layers[self.start_layer:self.end_layer]): + islice(self.layers, self.start_layer, self.end_layer)): if idx in self.aux_hidden_state_layers: aux_hidden_states.append(hidden_states + residual) hidden_states, residual = layer(positions, hidden_states, residual) diff --git a/vllm/model_executor/models/mamba2.py b/vllm/model_executor/models/mamba2.py index 3432cf29fe..81b9a12538 100644 --- a/vllm/model_executor/models/mamba2.py +++ b/vllm/model_executor/models/mamba2.py @@ -164,9 +164,7 @@ class Mamba2Model(nn.Module): # v1 get mamba2_metadata from forward_context mamba2_metadata = None - for i in range(len(self.layers)): - layer = self.layers[i] - + for i, layer in enumerate(self.layers): hidden_states, residual = layer( positions=positions, hidden_states=hidden_states, diff --git a/vllm/model_executor/models/mimo.py b/vllm/model_executor/models/mimo.py index 5b497dd9d8..ea5292d0df 100644 --- a/vllm/model_executor/models/mimo.py +++ b/vllm/model_executor/models/mimo.py @@ -26,6 +26,7 @@ # limitations under the License. """Inference-only MiMo model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -74,7 +75,7 @@ class MiMoModel(Qwen2Model): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/minicpm.py b/vllm/model_executor/models/minicpm.py index d398a5d12b..5632f8c8cc 100644 --- a/vllm/model_executor/models/minicpm.py +++ b/vllm/model_executor/models/minicpm.py @@ -25,6 +25,7 @@ """Inference-only MiniCPM model compatible with HuggingFace weights.""" import math from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -414,7 +415,7 @@ class MiniCPMModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/minimax_text_01.py b/vllm/model_executor/models/minimax_text_01.py index 176a40179b..93ef13d5d1 100644 --- a/vllm/model_executor/models/minimax_text_01.py +++ b/vllm/model_executor/models/minimax_text_01.py @@ -3,6 +3,7 @@ """Inference-only MiniMaxText01 model.""" import math from collections.abc import Iterable +from itertools import islice from typing import TYPE_CHECKING, Optional, Union if TYPE_CHECKING: @@ -1019,8 +1020,7 @@ class MiniMaxText01Model(nn.Module): minimax_cache_index = 0 - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): _caches = None if not envs.VLLM_USE_V1 and isinstance( layer.self_attn, MiniMaxText01LinearAttention): diff --git a/vllm/model_executor/models/mixtral.py b/vllm/model_executor/models/mixtral.py index 30de83da49..52fcbbfc58 100644 --- a/vllm/model_executor/models/mixtral.py +++ b/vllm/model_executor/models/mixtral.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only Mixtral model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -307,7 +308,7 @@ class MixtralModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/mixtral_quant.py b/vllm/model_executor/models/mixtral_quant.py index c8ad358c62..692267b4d7 100644 --- a/vllm/model_executor/models/mixtral_quant.py +++ b/vllm/model_executor/models/mixtral_quant.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only Mixtral model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import numpy as np @@ -346,7 +347,7 @@ class MixtralModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/molmo.py b/vllm/model_executor/models/molmo.py index 5fc28ed0e4..b2fc7be1af 100644 --- a/vllm/model_executor/models/molmo.py +++ b/vllm/model_executor/models/molmo.py @@ -5,6 +5,7 @@ import math from collections.abc import Iterable, Mapping, Sequence from dataclasses import dataclass from functools import cached_property, partial +from itertools import islice from typing import Annotated, Optional, Union import numpy as np @@ -842,7 +843,7 @@ class MolmoModel(nn.Module, SupportsQuant): residual = intermediate_tensors["residual"] # Apply blocks one-by-one. - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/mpt.py b/vllm/model_executor/models/mpt.py index 8db52a6992..48ac91fa6d 100644 --- a/vllm/model_executor/models/mpt.py +++ b/vllm/model_executor/models/mpt.py @@ -4,6 +4,7 @@ # Adapted from https://huggingface.co/mosaicml/mpt-7b/tree/main import math from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -260,7 +261,7 @@ class MPTModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for block in self.blocks[self.start_layer:self.end_layer]: + for block in islice(self.blocks, self.start_layer, self.end_layer): hidden_states = block(position_ids, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/nemotron.py b/vllm/model_executor/models/nemotron.py index eabf47b1ae..10adc62d3d 100644 --- a/vllm/model_executor/models/nemotron.py +++ b/vllm/model_executor/models/nemotron.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only Nemotron model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -353,7 +354,7 @@ class NemotronModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/nemotron_h.py b/vllm/model_executor/models/nemotron_h.py index 07cd5a4c6e..8a563288cb 100644 --- a/vllm/model_executor/models/nemotron_h.py +++ b/vllm/model_executor/models/nemotron_h.py @@ -399,8 +399,7 @@ class NemotronHModel(nn.Module): residual = None num_non_mamba_layers = 0 - for i in range(len(self.layers)): - layer = self.layers[i] + for i, layer in enumerate(self.layers): layer_mamba_cache_params = None if isinstance(layer, NemotronHMambaDecoderLayer) and mamba_cache_params: diff --git a/vllm/model_executor/models/nemotron_nas.py b/vllm/model_executor/models/nemotron_nas.py index a766ed9476..f8e38dcd80 100644 --- a/vllm/model_executor/models/nemotron_nas.py +++ b/vllm/model_executor/models/nemotron_nas.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only deci model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -287,8 +288,7 @@ class DeciModel(nn.Module): residual = intermediate_tensors["residual"] kv_cache_index = 0 - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): if not layer._is_no_op_attention: hidden_states, residual = layer(positions, hidden_states, residual) diff --git a/vllm/model_executor/models/olmo.py b/vllm/model_executor/models/olmo.py index 01639d3981..7157598956 100644 --- a/vllm/model_executor/models/olmo.py +++ b/vllm/model_executor/models/olmo.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only OLMo model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -280,7 +281,7 @@ class OlmoModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] # Apply blocks one-by-one. - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): # shape: (batch_size, seq_len, d_model) hidden_states = layer(positions, hidden_states) diff --git a/vllm/model_executor/models/olmo2.py b/vllm/model_executor/models/olmo2.py index 66a0f91155..bccd1b8704 100644 --- a/vllm/model_executor/models/olmo2.py +++ b/vllm/model_executor/models/olmo2.py @@ -26,6 +26,7 @@ from collections.abc import Iterable from functools import partial +from itertools import islice from typing import Optional, Union import torch @@ -305,7 +306,7 @@ class Olmo2Model(nn.Module): assert isinstance(hidden_states, torch.Tensor) # Apply blocks one-by-one. - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): # shape: (batch_size, seq_len, d_model) hidden_states = layer(positions, hidden_states) diff --git a/vllm/model_executor/models/olmoe.py b/vllm/model_executor/models/olmoe.py index a47c3bd416..9b8525bfad 100644 --- a/vllm/model_executor/models/olmoe.py +++ b/vllm/model_executor/models/olmoe.py @@ -15,6 +15,7 @@ """Inference-only OLMoE model compatible with HuggingFace weights.""" from collections.abc import Iterable from functools import partial +from itertools import islice from typing import Any, Optional, Union import torch @@ -314,7 +315,7 @@ class OlmoeModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/opt.py b/vllm/model_executor/models/opt.py index 9eaac1e28d..b92e586f0b 100644 --- a/vllm/model_executor/models/opt.py +++ b/vllm/model_executor/models/opt.py @@ -20,6 +20,7 @@ # limitations under the License. """Inference-only OPT model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -269,7 +270,7 @@ class OPTDecoder(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(hidden_states) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/orion.py b/vllm/model_executor/models/orion.py index d121188ba5..add751ebf0 100644 --- a/vllm/model_executor/models/orion.py +++ b/vllm/model_executor/models/orion.py @@ -7,6 +7,7 @@ # LICENSE: https://huggingface.co/OrionStarAI/Orion-14B-Base/blob/main/LICENSE """Inference-only Orion-14B model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -252,7 +253,7 @@ class OrionModel(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/persimmon.py b/vllm/model_executor/models/persimmon.py index f8db99eb92..6bdd38d068 100644 --- a/vllm/model_executor/models/persimmon.py +++ b/vllm/model_executor/models/persimmon.py @@ -23,6 +23,7 @@ # limitations under the License. """Inference-only persimmon model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -255,7 +256,7 @@ class PersimmonModel(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/phi.py b/vllm/model_executor/models/phi.py index 21d517b3a4..789b24eb0f 100644 --- a/vllm/model_executor/models/phi.py +++ b/vllm/model_executor/models/phi.py @@ -38,6 +38,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. """Inference-only Phi-1.5 model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -240,7 +241,7 @@ class PhiModel(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: diff --git a/vllm/model_executor/models/phimoe.py b/vllm/model_executor/models/phimoe.py index cfe0982204..15ae081a9f 100644 --- a/vllm/model_executor/models/phimoe.py +++ b/vllm/model_executor/models/phimoe.py @@ -24,6 +24,7 @@ # limitations under the License. """Inference-only PhiMoE model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -500,7 +501,7 @@ class PhiMoEModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/plamo2.py b/vllm/model_executor/models/plamo2.py index e5034b5362..7f70e44b10 100644 --- a/vllm/model_executor/models/plamo2.py +++ b/vllm/model_executor/models/plamo2.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Inference-only PLaMo2 model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional import torch @@ -614,7 +615,7 @@ class Plamo2Decoder(torch.nn.Module): mamba2_metadata: Mamba2Metadata, ) -> torch.Tensor: mamba_cache_index = 0 - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): layer_mamba_cache_params = None if layer.is_mamba: layer_mamba_cache_params = mamba_cache_params.at_layer_idx( diff --git a/vllm/model_executor/models/qwen.py b/vllm/model_executor/models/qwen.py index e804f03e01..e32dc51f00 100644 --- a/vllm/model_executor/models/qwen.py +++ b/vllm/model_executor/models/qwen.py @@ -8,6 +8,7 @@ """Inference-only QWen model compatible with HuggingFace weights.""" import json from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -234,7 +235,7 @@ class QWenModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.h[self.start_layer:self.end_layer]: + for layer in islice(self.h, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/qwen2.py b/vllm/model_executor/models/qwen2.py index 27c1e68c67..54dc0bebd9 100644 --- a/vllm/model_executor/models/qwen2.py +++ b/vllm/model_executor/models/qwen2.py @@ -25,6 +25,7 @@ # limitations under the License. """Inference-only Qwen2 model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -358,7 +359,7 @@ class Qwen2Model(nn.Module): aux_hidden_states = [] for idx, layer in enumerate( - self.layers[self.start_layer:self.end_layer]): + islice(self.layers, self.start_layer, self.end_layer)): if idx in self.aux_hidden_state_layers: aux_hidden_states.append(hidden_states + residual) hidden_states, residual = layer(positions, hidden_states, residual) diff --git a/vllm/model_executor/models/qwen2_moe.py b/vllm/model_executor/models/qwen2_moe.py index 5c4ad34246..5551ad8c32 100644 --- a/vllm/model_executor/models/qwen2_moe.py +++ b/vllm/model_executor/models/qwen2_moe.py @@ -25,6 +25,7 @@ # limitations under the License. """Inference-only Qwen2MoE model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -381,7 +382,7 @@ class Qwen2MoeModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/qwen3_moe.py b/vllm/model_executor/models/qwen3_moe.py index 8498f61b35..94e6a66bea 100644 --- a/vllm/model_executor/models/qwen3_moe.py +++ b/vllm/model_executor/models/qwen3_moe.py @@ -24,6 +24,7 @@ """Inference-only Qwen3MoE model compatible with HuggingFace weights.""" import typing from collections.abc import Callable, Iterable +from itertools import islice from typing import Any, Optional, Union import torch @@ -420,8 +421,7 @@ class Qwen3MoeModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: return IntermediateTensors({ diff --git a/vllm/model_executor/models/seed_oss.py b/vllm/model_executor/models/seed_oss.py index 34a87a6a69..e3c7c700f8 100644 --- a/vllm/model_executor/models/seed_oss.py +++ b/vllm/model_executor/models/seed_oss.py @@ -23,6 +23,7 @@ # limitations under the License. """Inference-only SeedOss model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -340,7 +341,7 @@ class SeedOssModel(nn.Module): assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer( positions, hidden_states, diff --git a/vllm/model_executor/models/stablelm.py b/vllm/model_executor/models/stablelm.py index d6ec743ce8..9e880ebd50 100644 --- a/vllm/model_executor/models/stablelm.py +++ b/vllm/model_executor/models/stablelm.py @@ -22,6 +22,7 @@ """Inference-only StabeLM (https://github.com/Stability-AI/StableLM) model compatible with HuggingFace weights.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -247,7 +248,7 @@ class StableLMEpochModel(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/starcoder2.py b/vllm/model_executor/models/starcoder2.py index 9d9a2bff0e..62ff9b6182 100644 --- a/vllm/model_executor/models/starcoder2.py +++ b/vllm/model_executor/models/starcoder2.py @@ -21,6 +21,7 @@ # limitations under the License. """ PyTorch Starcoder2 model.""" from collections.abc import Iterable +from itertools import islice from typing import Optional, Union import torch @@ -250,7 +251,7 @@ class Starcoder2Model(nn.Module): else: assert intermediate_tensors is not None hidden_states = intermediate_tensors["hidden_states"] - for layer in self.layers[self.start_layer:self.end_layer]: + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states = layer(positions, hidden_states) if not get_pp_group().is_last_rank: return IntermediateTensors({"hidden_states": hidden_states}) diff --git a/vllm/model_executor/models/step3_text.py b/vllm/model_executor/models/step3_text.py index 47d2af5c2a..97611d3e14 100644 --- a/vllm/model_executor/models/step3_text.py +++ b/vllm/model_executor/models/step3_text.py @@ -2,6 +2,7 @@ # SPDX-FileCopyrightText: Copyright contributors to the vLLM project """Inference-only Jurassic model.""" from collections.abc import Iterable +from itertools import islice from typing import Any, Optional import torch @@ -346,8 +347,7 @@ class Step3TextModel(nn.Module): hidden_states = intermediate_tensors["hidden_states"] residual = intermediate_tensors["residual"] - for i in range(self.start_layer, self.end_layer): - layer = self.layers[i] + for layer in islice(self.layers, self.start_layer, self.end_layer): hidden_states, residual = layer(positions, hidden_states, residual) if not get_pp_group().is_last_rank: