[Models] Improve iteration over layers (#19497)

Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
Lukas Geiger
2025-08-29 02:26:34 +01:00
committed by GitHub
parent 235c9db8a7
commit de533ab2a1
65 changed files with 129 additions and 83 deletions

View File

@ -9,6 +9,7 @@
# activation.
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -243,7 +244,7 @@ class ArceeModel(nn.Module):
aux_hidden_states: list[torch.Tensor] = []
for idx, layer in enumerate(
self.layers[self.start_layer:self.end_layer]):
islice(self.layers, self.start_layer, self.end_layer)):
if idx in self.aux_hidden_state_layers:
aux_hidden_states.append(
hidden_states +

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Inference-only Snowflake Arctic model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -403,7 +404,7 @@ class ArcticModel(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -22,6 +22,7 @@
"""Inference-only BaiChuan model compatible with HuggingFace weights."""
import math
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -309,7 +310,7 @@ class BaiChuanModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only BailingMoE model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -359,8 +360,7 @@ class BailingMoeModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
hidden_states,
position_ids,

View File

@ -345,8 +345,7 @@ class BambaModel(nn.Module):
residual = None
num_attn = 0
for i in range(len(self.layers)):
layer = self.layers[i]
for i, layer in enumerate(self.layers):
if isinstance(layer, BambaAttentionDecoderLayer):
num_attn += 1

View File

@ -20,6 +20,7 @@
"""Inference-only BLOOM model compatible with HuggingFace weights."""
import math
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -273,7 +274,7 @@ class BloomModel(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states = layer(position_ids, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -3,6 +3,7 @@
from collections.abc import Iterable, Mapping, Sequence
from functools import cached_property
from itertools import islice
from typing import Annotated, Any, Literal, Optional, Union
import torch
@ -914,7 +915,7 @@ class ChameleonModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -5,6 +5,7 @@
"""Inference-only ChatGLM model compatible with THUDM weights."""
import json
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -281,7 +282,7 @@ class GLMTransformer(nn.Module):
hidden_states: torch.Tensor,
position_ids: torch.Tensor,
) -> Union[torch.Tensor, IntermediateTensors]:
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(hidden_states=hidden_states,
position_ids=position_ids)

View File

@ -23,6 +23,7 @@
# This file is based on the LLama model definition file in transformers
"""PyTorch Cohere model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -322,7 +323,7 @@ class CohereModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -359,7 +360,7 @@ class DbrxModel(nn.Module):
else:
assert intermediate_tensors
hidden_states = intermediate_tensors["hidden_states"]
for block in self.blocks[self.start_layer:self.end_layer]:
for block in islice(self.blocks, self.start_layer, self.end_layer):
hidden_states = block(position_ids, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only Deepseek model."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -377,7 +378,7 @@ class DeepseekModel(nn.Module):
else:
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -25,6 +25,7 @@
"""Inference-only DeepseekV2/DeepseekV3 model."""
import typing
from collections.abc import Callable, Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -712,7 +713,7 @@ class DeepseekV2Model(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:

View File

@ -25,6 +25,7 @@
# limitations under the License.
"""Inference-only dots1 model."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -391,7 +392,7 @@ class Dots1Model(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only ErineMoE model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -419,8 +420,7 @@ class Ernie4_5_MoeModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:

View File

@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only Erine VL model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -508,8 +509,7 @@ class Ernie4_5_VLMoeModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual,
visual_token_mask, **kwargs)

View File

@ -26,6 +26,7 @@
"""Inference-only Exaone model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -371,7 +372,7 @@ class ExaoneModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -22,6 +22,7 @@
"""Inference-only Exaone model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -354,7 +355,7 @@ class Exaone4Model(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -22,6 +22,7 @@
import math
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -389,7 +390,7 @@ class FalconModel(nn.Module):
hidden_states = self.get_input_embeddings(input_ids)
else:
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -18,6 +18,7 @@
"""Inference-only Gemma model compatible with HuggingFace weights."""
from collections.abc import Iterable
from functools import cache
from itertools import islice
from typing import Optional, Union
import torch
@ -308,7 +309,7 @@ class GemmaModel(nn.Module):
else:
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -17,6 +17,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -292,7 +293,7 @@ class Gemma2Model(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -16,6 +16,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -398,7 +399,7 @@ class Gemma3Model(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -24,6 +24,7 @@
"""Inference-only GLM-4.5 model compatible with HuggingFace weights."""
import typing
from collections.abc import Callable, Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -440,8 +441,7 @@ class Glm4MoeModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:

View File

@ -20,6 +20,7 @@
# limitations under the License.
"""Inference-only GPT-2 model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -228,7 +229,7 @@ class GPT2Model(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states = layer(hidden_states)
if not get_pp_group().is_last_rank:

View File

@ -21,6 +21,7 @@
# limitations under the License.
"""Inference-only GPTBigCode model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -246,7 +247,7 @@ class GPTBigCodeModel(nn.Module):
else:
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states = layer(hidden_states)
if not get_pp_group().is_last_rank:

View File

@ -19,6 +19,7 @@
# limitations under the License.
"""Inference-only GPT-J model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -223,7 +224,7 @@ class GPTJModel(nn.Module):
hidden_states = self.get_input_embeddings(input_ids)
else:
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states = layer(position_ids, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -19,6 +19,7 @@
# limitations under the License.
"""Inference-only GPT-NeoX model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -235,7 +236,7 @@ class GPTNeoXModel(nn.Module):
hidden_states = self.get_input_embeddings(input_ids)
else:
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(position_ids, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only IBM Granite model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -316,7 +317,7 @@ class GraniteModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only GraniteMoe model."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional
import torch
@ -303,7 +304,7 @@ class GraniteMoeModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -397,8 +397,7 @@ class GraniteMoeHybridModel(nn.Module):
residual = intermediate_tensors["residual"]
num_attn = 0
for i in range(len(self.layers)):
layer = self.layers[i]
for i, layer in enumerate(self.layers):
if isinstance(layer, GraniteMoeHybridAttentionDecoderLayer):
num_attn += 1

View File

@ -6,6 +6,7 @@ The architecture is the same as granitemoe but with the addition of shared
experts.
"""
from collections.abc import Iterable
from itertools import islice
from typing import Optional
import torch
@ -200,8 +201,7 @@ class GraniteMoeSharedModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only Grok1 model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -347,8 +348,7 @@ class Grok1Model(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:

View File

@ -3,6 +3,7 @@
from collections.abc import Iterable
from functools import partial
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -297,7 +298,7 @@ class InternLM2Model(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from itertools import islice
from typing import Optional, Union
import torch
@ -123,7 +124,7 @@ class InternLM2VEModel(InternLM2Model):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -23,6 +23,7 @@
import math
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -276,7 +277,7 @@ class JAISModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states = layer(hidden_states)
if not get_pp_group().is_last_rank:

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Inference-only Jamba model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional
import torch
@ -350,7 +351,7 @@ class JambaModel(nn.Module):
kv_cache_index = 0
mamba_cache_index = 0
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
layer_mamba_cache_params = None
if isinstance(layer, JambaAttentionDecoderLayer):
kv_cache_index += 1

View File

@ -1,6 +1,7 @@
# SPDX-License-Identifier: Apache-2.0
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional
import torch
@ -374,7 +375,7 @@ class Lfm2Model(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions=positions,
hidden_states=hidden_states,

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only LLaMA model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -383,7 +384,7 @@ class LlamaModel(nn.Module):
aux_hidden_states = []
for idx, layer in enumerate(
self.layers[self.start_layer:self.end_layer]):
islice(self.layers, self.start_layer, self.end_layer)):
if idx in self.aux_hidden_state_layers:
aux_hidden_states.append(hidden_states + residual)
hidden_states, residual = layer(positions, hidden_states, residual)

View File

@ -164,9 +164,7 @@ class Mamba2Model(nn.Module):
# v1 get mamba2_metadata from forward_context
mamba2_metadata = None
for i in range(len(self.layers)):
layer = self.layers[i]
for i, layer in enumerate(self.layers):
hidden_states, residual = layer(
positions=positions,
hidden_states=hidden_states,

View File

@ -26,6 +26,7 @@
# limitations under the License.
"""Inference-only MiMo model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -74,7 +75,7 @@ class MiMoModel(Qwen2Model):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -25,6 +25,7 @@
"""Inference-only MiniCPM model compatible with HuggingFace weights."""
import math
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -414,7 +415,7 @@ class MiniCPMModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -3,6 +3,7 @@
"""Inference-only MiniMaxText01 model."""
import math
from collections.abc import Iterable
from itertools import islice
from typing import TYPE_CHECKING, Optional, Union
if TYPE_CHECKING:
@ -1019,8 +1020,7 @@ class MiniMaxText01Model(nn.Module):
minimax_cache_index = 0
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
_caches = None
if not envs.VLLM_USE_V1 and isinstance(
layer.self_attn, MiniMaxText01LinearAttention):

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only Mixtral model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -307,7 +308,7 @@ class MixtralModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only Mixtral model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import numpy as np
@ -346,7 +347,7 @@ class MixtralModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -5,6 +5,7 @@ import math
from collections.abc import Iterable, Mapping, Sequence
from dataclasses import dataclass
from functools import cached_property, partial
from itertools import islice
from typing import Annotated, Optional, Union
import numpy as np
@ -842,7 +843,7 @@ class MolmoModel(nn.Module, SupportsQuant):
residual = intermediate_tensors["residual"]
# Apply blocks one-by-one.
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -4,6 +4,7 @@
# Adapted from https://huggingface.co/mosaicml/mpt-7b/tree/main
import math
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -260,7 +261,7 @@ class MPTModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for block in self.blocks[self.start_layer:self.end_layer]:
for block in islice(self.blocks, self.start_layer, self.end_layer):
hidden_states = block(position_ids, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only Nemotron model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -353,7 +354,7 @@ class NemotronModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:

View File

@ -399,8 +399,7 @@ class NemotronHModel(nn.Module):
residual = None
num_non_mamba_layers = 0
for i in range(len(self.layers)):
layer = self.layers[i]
for i, layer in enumerate(self.layers):
layer_mamba_cache_params = None
if isinstance(layer,
NemotronHMambaDecoderLayer) and mamba_cache_params:

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only deci model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -287,8 +288,7 @@ class DeciModel(nn.Module):
residual = intermediate_tensors["residual"]
kv_cache_index = 0
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
if not layer._is_no_op_attention:
hidden_states, residual = layer(positions, hidden_states,
residual)

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only OLMo model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -280,7 +281,7 @@ class OlmoModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
# Apply blocks one-by-one.
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
# shape: (batch_size, seq_len, d_model)
hidden_states = layer(positions, hidden_states)

View File

@ -26,6 +26,7 @@
from collections.abc import Iterable
from functools import partial
from itertools import islice
from typing import Optional, Union
import torch
@ -305,7 +306,7 @@ class Olmo2Model(nn.Module):
assert isinstance(hidden_states, torch.Tensor)
# Apply blocks one-by-one.
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
# shape: (batch_size, seq_len, d_model)
hidden_states = layer(positions, hidden_states)

View File

@ -15,6 +15,7 @@
"""Inference-only OLMoE model compatible with HuggingFace weights."""
from collections.abc import Iterable
from functools import partial
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -314,7 +315,7 @@ class OlmoeModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -20,6 +20,7 @@
# limitations under the License.
"""Inference-only OPT model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -269,7 +270,7 @@ class OPTDecoder(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(hidden_states)
if not get_pp_group().is_last_rank:

View File

@ -7,6 +7,7 @@
# LICENSE: https://huggingface.co/OrionStarAI/Orion-14B-Base/blob/main/LICENSE
"""Inference-only Orion-14B model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -252,7 +253,7 @@ class OrionModel(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only persimmon model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -255,7 +256,7 @@ class PersimmonModel(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -38,6 +38,7 @@
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
"""Inference-only Phi-1.5 model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -240,7 +241,7 @@ class PhiModel(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:

View File

@ -24,6 +24,7 @@
# limitations under the License.
"""Inference-only PhiMoE model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -500,7 +501,7 @@ class PhiMoEModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Inference-only PLaMo2 model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional
import torch
@ -614,7 +615,7 @@ class Plamo2Decoder(torch.nn.Module):
mamba2_metadata: Mamba2Metadata,
) -> torch.Tensor:
mamba_cache_index = 0
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
layer_mamba_cache_params = None
if layer.is_mamba:
layer_mamba_cache_params = mamba_cache_params.at_layer_idx(

View File

@ -8,6 +8,7 @@
"""Inference-only QWen model compatible with HuggingFace weights."""
import json
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -234,7 +235,7 @@ class QWenModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.h[self.start_layer:self.end_layer]:
for layer in islice(self.h, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -25,6 +25,7 @@
# limitations under the License.
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -358,7 +359,7 @@ class Qwen2Model(nn.Module):
aux_hidden_states = []
for idx, layer in enumerate(
self.layers[self.start_layer:self.end_layer]):
islice(self.layers, self.start_layer, self.end_layer)):
if idx in self.aux_hidden_state_layers:
aux_hidden_states.append(hidden_states + residual)
hidden_states, residual = layer(positions, hidden_states, residual)

View File

@ -25,6 +25,7 @@
# limitations under the License.
"""Inference-only Qwen2MoE model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -381,7 +382,7 @@ class Qwen2MoeModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -24,6 +24,7 @@
"""Inference-only Qwen3MoE model compatible with HuggingFace weights."""
import typing
from collections.abc import Callable, Iterable
from itertools import islice
from typing import Any, Optional, Union
import torch
@ -420,8 +421,7 @@ class Qwen3MoeModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank:
return IntermediateTensors({

View File

@ -23,6 +23,7 @@
# limitations under the License.
"""Inference-only SeedOss model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -340,7 +341,7 @@ class SeedOssModel(nn.Module):
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(
positions,
hidden_states,

View File

@ -22,6 +22,7 @@
"""Inference-only StabeLM (https://github.com/Stability-AI/StableLM)
model compatible with HuggingFace weights."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -247,7 +248,7 @@ class StableLMEpochModel(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -21,6 +21,7 @@
# limitations under the License.
""" PyTorch Starcoder2 model."""
from collections.abc import Iterable
from itertools import islice
from typing import Optional, Union
import torch
@ -250,7 +251,7 @@ class Starcoder2Model(nn.Module):
else:
assert intermediate_tensors is not None
hidden_states = intermediate_tensors["hidden_states"]
for layer in self.layers[self.start_layer:self.end_layer]:
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states = layer(positions, hidden_states)
if not get_pp_group().is_last_rank:
return IntermediateTensors({"hidden_states": hidden_states})

View File

@ -2,6 +2,7 @@
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
"""Inference-only Jurassic model."""
from collections.abc import Iterable
from itertools import islice
from typing import Any, Optional
import torch
@ -346,8 +347,7 @@ class Step3TextModel(nn.Module):
hidden_states = intermediate_tensors["hidden_states"]
residual = intermediate_tensors["residual"]
for i in range(self.start_layer, self.end_layer):
layer = self.layers[i]
for layer in islice(self.layers, self.start_layer, self.end_layer):
hidden_states, residual = layer(positions, hidden_states, residual)
if not get_pp_group().is_last_rank: