[Models] Improve iteration over layers (#19497)
Signed-off-by: Lukas Geiger <lukas.geiger94@gmail.com>
This commit is contained in:
@ -9,6 +9,7 @@
|
||||
# activation.
|
||||
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -243,7 +244,7 @@ class ArceeModel(nn.Module):
|
||||
|
||||
aux_hidden_states: list[torch.Tensor] = []
|
||||
for idx, layer in enumerate(
|
||||
self.layers[self.start_layer:self.end_layer]):
|
||||
islice(self.layers, self.start_layer, self.end_layer)):
|
||||
if idx in self.aux_hidden_state_layers:
|
||||
aux_hidden_states.append(
|
||||
hidden_states +
|
||||
|
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Inference-only Snowflake Arctic model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -403,7 +404,7 @@ class ArcticModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -22,6 +22,7 @@
|
||||
"""Inference-only BaiChuan model compatible with HuggingFace weights."""
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -309,7 +310,7 @@ class BaiChuanModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only BailingMoE model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -359,8 +360,7 @@ class BailingMoeModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
hidden_states,
|
||||
position_ids,
|
||||
|
@ -345,8 +345,7 @@ class BambaModel(nn.Module):
|
||||
|
||||
residual = None
|
||||
num_attn = 0
|
||||
for i in range(len(self.layers)):
|
||||
layer = self.layers[i]
|
||||
for i, layer in enumerate(self.layers):
|
||||
if isinstance(layer, BambaAttentionDecoderLayer):
|
||||
num_attn += 1
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
"""Inference-only BLOOM model compatible with HuggingFace weights."""
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -273,7 +274,7 @@ class BloomModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(position_ids, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
from collections.abc import Iterable, Mapping, Sequence
|
||||
from functools import cached_property
|
||||
from itertools import islice
|
||||
from typing import Annotated, Any, Literal, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -914,7 +915,7 @@ class ChameleonModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -5,6 +5,7 @@
|
||||
"""Inference-only ChatGLM model compatible with THUDM weights."""
|
||||
import json
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -281,7 +282,7 @@ class GLMTransformer(nn.Module):
|
||||
hidden_states: torch.Tensor,
|
||||
position_ids: torch.Tensor,
|
||||
) -> Union[torch.Tensor, IntermediateTensors]:
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(hidden_states=hidden_states,
|
||||
position_ids=position_ids)
|
||||
|
||||
|
@ -23,6 +23,7 @@
|
||||
# This file is based on the LLama model definition file in transformers
|
||||
"""PyTorch Cohere model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -322,7 +323,7 @@ class CohereModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -359,7 +360,7 @@ class DbrxModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for block in self.blocks[self.start_layer:self.end_layer]:
|
||||
for block in islice(self.blocks, self.start_layer, self.end_layer):
|
||||
hidden_states = block(position_ids, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Deepseek model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -377,7 +378,7 @@ class DeepseekModel(nn.Module):
|
||||
else:
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -25,6 +25,7 @@
|
||||
"""Inference-only DeepseekV2/DeepseekV3 model."""
|
||||
import typing
|
||||
from collections.abc import Callable, Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -712,7 +713,7 @@ class DeepseekV2Model(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -25,6 +25,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only dots1 model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -391,7 +392,7 @@ class Dots1Model(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -23,6 +23,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only ErineMoE model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -419,8 +420,7 @@ class Ernie4_5_MoeModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -23,6 +23,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Erine VL model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -508,8 +509,7 @@ class Ernie4_5_VLMoeModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual,
|
||||
visual_token_mask, **kwargs)
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
"""Inference-only Exaone model compatible with HuggingFace weights."""
|
||||
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -371,7 +372,7 @@ class ExaoneModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -22,6 +22,7 @@
|
||||
"""Inference-only Exaone model compatible with HuggingFace weights."""
|
||||
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -354,7 +355,7 @@ class Exaone4Model(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -22,6 +22,7 @@
|
||||
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -389,7 +390,7 @@ class FalconModel(nn.Module):
|
||||
hidden_states = self.get_input_embeddings(input_ids)
|
||||
else:
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -18,6 +18,7 @@
|
||||
"""Inference-only Gemma model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from functools import cache
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -308,7 +309,7 @@ class GemmaModel(nn.Module):
|
||||
else:
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -17,6 +17,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -292,7 +293,7 @@ class Gemma2Model(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -16,6 +16,7 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -398,7 +399,7 @@ class Gemma3Model(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -24,6 +24,7 @@
|
||||
"""Inference-only GLM-4.5 model compatible with HuggingFace weights."""
|
||||
import typing
|
||||
from collections.abc import Callable, Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -440,8 +441,7 @@ class Glm4MoeModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -20,6 +20,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only GPT-2 model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -228,7 +229,7 @@ class GPT2Model(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(hidden_states)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -21,6 +21,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only GPTBigCode model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -246,7 +247,7 @@ class GPTBigCodeModel(nn.Module):
|
||||
else:
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(hidden_states)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -19,6 +19,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only GPT-J model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -223,7 +224,7 @@ class GPTJModel(nn.Module):
|
||||
hidden_states = self.get_input_embeddings(input_ids)
|
||||
else:
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(position_ids, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -19,6 +19,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only GPT-NeoX model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -235,7 +236,7 @@ class GPTNeoXModel(nn.Module):
|
||||
hidden_states = self.get_input_embeddings(input_ids)
|
||||
else:
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(position_ids, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only IBM Granite model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -316,7 +317,7 @@ class GraniteModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only GraniteMoe model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional
|
||||
|
||||
import torch
|
||||
@ -303,7 +304,7 @@ class GraniteMoeModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -397,8 +397,7 @@ class GraniteMoeHybridModel(nn.Module):
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
num_attn = 0
|
||||
for i in range(len(self.layers)):
|
||||
layer = self.layers[i]
|
||||
for i, layer in enumerate(self.layers):
|
||||
if isinstance(layer, GraniteMoeHybridAttentionDecoderLayer):
|
||||
num_attn += 1
|
||||
|
||||
|
@ -6,6 +6,7 @@ The architecture is the same as granitemoe but with the addition of shared
|
||||
experts.
|
||||
"""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
@ -200,8 +201,7 @@ class GraniteMoeSharedModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -23,6 +23,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Grok1 model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -347,8 +348,7 @@ class Grok1Model(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -3,6 +3,7 @@
|
||||
|
||||
from collections.abc import Iterable
|
||||
from functools import partial
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -297,7 +298,7 @@ class InternLM2Model(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -123,7 +124,7 @@ class InternLM2VEModel(InternLM2Model):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -23,6 +23,7 @@
|
||||
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -276,7 +277,7 @@ class JAISModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(hidden_states)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Inference-only Jamba model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
@ -350,7 +351,7 @@ class JambaModel(nn.Module):
|
||||
|
||||
kv_cache_index = 0
|
||||
mamba_cache_index = 0
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
layer_mamba_cache_params = None
|
||||
if isinstance(layer, JambaAttentionDecoderLayer):
|
||||
kv_cache_index += 1
|
||||
|
@ -1,6 +1,7 @@
|
||||
# SPDX-License-Identifier: Apache-2.0
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional
|
||||
|
||||
import torch
|
||||
@ -374,7 +375,7 @@ class Lfm2Model(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions=positions,
|
||||
hidden_states=hidden_states,
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only LLaMA model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -383,7 +384,7 @@ class LlamaModel(nn.Module):
|
||||
|
||||
aux_hidden_states = []
|
||||
for idx, layer in enumerate(
|
||||
self.layers[self.start_layer:self.end_layer]):
|
||||
islice(self.layers, self.start_layer, self.end_layer)):
|
||||
if idx in self.aux_hidden_state_layers:
|
||||
aux_hidden_states.append(hidden_states + residual)
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
@ -164,9 +164,7 @@ class Mamba2Model(nn.Module):
|
||||
# v1 get mamba2_metadata from forward_context
|
||||
mamba2_metadata = None
|
||||
|
||||
for i in range(len(self.layers)):
|
||||
layer = self.layers[i]
|
||||
|
||||
for i, layer in enumerate(self.layers):
|
||||
hidden_states, residual = layer(
|
||||
positions=positions,
|
||||
hidden_states=hidden_states,
|
||||
|
@ -26,6 +26,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only MiMo model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -74,7 +75,7 @@ class MiMoModel(Qwen2Model):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -25,6 +25,7 @@
|
||||
"""Inference-only MiniCPM model compatible with HuggingFace weights."""
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -414,7 +415,7 @@ class MiniCPMModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -3,6 +3,7 @@
|
||||
"""Inference-only MiniMaxText01 model."""
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import TYPE_CHECKING, Optional, Union
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@ -1019,8 +1020,7 @@ class MiniMaxText01Model(nn.Module):
|
||||
|
||||
minimax_cache_index = 0
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
_caches = None
|
||||
if not envs.VLLM_USE_V1 and isinstance(
|
||||
layer.self_attn, MiniMaxText01LinearAttention):
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Mixtral model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -307,7 +308,7 @@ class MixtralModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Mixtral model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import numpy as np
|
||||
@ -346,7 +347,7 @@ class MixtralModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -5,6 +5,7 @@ import math
|
||||
from collections.abc import Iterable, Mapping, Sequence
|
||||
from dataclasses import dataclass
|
||||
from functools import cached_property, partial
|
||||
from itertools import islice
|
||||
from typing import Annotated, Optional, Union
|
||||
|
||||
import numpy as np
|
||||
@ -842,7 +843,7 @@ class MolmoModel(nn.Module, SupportsQuant):
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
# Apply blocks one-by-one.
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -4,6 +4,7 @@
|
||||
# Adapted from https://huggingface.co/mosaicml/mpt-7b/tree/main
|
||||
import math
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -260,7 +261,7 @@ class MPTModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
|
||||
for block in self.blocks[self.start_layer:self.end_layer]:
|
||||
for block in islice(self.blocks, self.start_layer, self.end_layer):
|
||||
hidden_states = block(position_ids, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Nemotron model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -353,7 +354,7 @@ class NemotronModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -399,8 +399,7 @@ class NemotronHModel(nn.Module):
|
||||
|
||||
residual = None
|
||||
num_non_mamba_layers = 0
|
||||
for i in range(len(self.layers)):
|
||||
layer = self.layers[i]
|
||||
for i, layer in enumerate(self.layers):
|
||||
layer_mamba_cache_params = None
|
||||
if isinstance(layer,
|
||||
NemotronHMambaDecoderLayer) and mamba_cache_params:
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only deci model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -287,8 +288,7 @@ class DeciModel(nn.Module):
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
kv_cache_index = 0
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
if not layer._is_no_op_attention:
|
||||
hidden_states, residual = layer(positions, hidden_states,
|
||||
residual)
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only OLMo model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -280,7 +281,7 @@ class OlmoModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
|
||||
# Apply blocks one-by-one.
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
# shape: (batch_size, seq_len, d_model)
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
|
||||
|
@ -26,6 +26,7 @@
|
||||
|
||||
from collections.abc import Iterable
|
||||
from functools import partial
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -305,7 +306,7 @@ class Olmo2Model(nn.Module):
|
||||
assert isinstance(hidden_states, torch.Tensor)
|
||||
|
||||
# Apply blocks one-by-one.
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
# shape: (batch_size, seq_len, d_model)
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
|
||||
|
@ -15,6 +15,7 @@
|
||||
"""Inference-only OLMoE model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from functools import partial
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -314,7 +315,7 @@ class OlmoeModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -20,6 +20,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only OPT model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -269,7 +270,7 @@ class OPTDecoder(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(hidden_states)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -7,6 +7,7 @@
|
||||
# LICENSE: https://huggingface.co/OrionStarAI/Orion-14B-Base/blob/main/LICENSE
|
||||
"""Inference-only Orion-14B model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -252,7 +253,7 @@ class OrionModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -23,6 +23,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only persimmon model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -255,7 +256,7 @@ class PersimmonModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -38,6 +38,7 @@
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
"""Inference-only Phi-1.5 model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -240,7 +241,7 @@ class PhiModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
@ -24,6 +24,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only PhiMoE model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -500,7 +501,7 @@ class PhiMoEModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Inference-only PLaMo2 model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
@ -614,7 +615,7 @@ class Plamo2Decoder(torch.nn.Module):
|
||||
mamba2_metadata: Mamba2Metadata,
|
||||
) -> torch.Tensor:
|
||||
mamba_cache_index = 0
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
layer_mamba_cache_params = None
|
||||
if layer.is_mamba:
|
||||
layer_mamba_cache_params = mamba_cache_params.at_layer_idx(
|
||||
|
@ -8,6 +8,7 @@
|
||||
"""Inference-only QWen model compatible with HuggingFace weights."""
|
||||
import json
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -234,7 +235,7 @@ class QWenModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for layer in self.h[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.h, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -25,6 +25,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Qwen2 model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -358,7 +359,7 @@ class Qwen2Model(nn.Module):
|
||||
|
||||
aux_hidden_states = []
|
||||
for idx, layer in enumerate(
|
||||
self.layers[self.start_layer:self.end_layer]):
|
||||
islice(self.layers, self.start_layer, self.end_layer)):
|
||||
if idx in self.aux_hidden_state_layers:
|
||||
aux_hidden_states.append(hidden_states + residual)
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
@ -25,6 +25,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only Qwen2MoE model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -381,7 +382,7 @@ class Qwen2MoeModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -24,6 +24,7 @@
|
||||
"""Inference-only Qwen3MoE model compatible with HuggingFace weights."""
|
||||
import typing
|
||||
from collections.abc import Callable, Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
@ -420,8 +421,7 @@ class Qwen3MoeModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({
|
||||
|
@ -23,6 +23,7 @@
|
||||
# limitations under the License.
|
||||
"""Inference-only SeedOss model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -340,7 +341,7 @@ class SeedOssModel(nn.Module):
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(
|
||||
positions,
|
||||
hidden_states,
|
||||
|
@ -22,6 +22,7 @@
|
||||
"""Inference-only StabeLM (https://github.com/Stability-AI/StableLM)
|
||||
model compatible with HuggingFace weights."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -247,7 +248,7 @@ class StableLMEpochModel(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -21,6 +21,7 @@
|
||||
# limitations under the License.
|
||||
""" PyTorch Starcoder2 model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Optional, Union
|
||||
|
||||
import torch
|
||||
@ -250,7 +251,7 @@ class Starcoder2Model(nn.Module):
|
||||
else:
|
||||
assert intermediate_tensors is not None
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
for layer in self.layers[self.start_layer:self.end_layer]:
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states = layer(positions, hidden_states)
|
||||
if not get_pp_group().is_last_rank:
|
||||
return IntermediateTensors({"hidden_states": hidden_states})
|
||||
|
@ -2,6 +2,7 @@
|
||||
# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
|
||||
"""Inference-only Jurassic model."""
|
||||
from collections.abc import Iterable
|
||||
from itertools import islice
|
||||
from typing import Any, Optional
|
||||
|
||||
import torch
|
||||
@ -346,8 +347,7 @@ class Step3TextModel(nn.Module):
|
||||
hidden_states = intermediate_tensors["hidden_states"]
|
||||
residual = intermediate_tensors["residual"]
|
||||
|
||||
for i in range(self.start_layer, self.end_layer):
|
||||
layer = self.layers[i]
|
||||
for layer in islice(self.layers, self.start_layer, self.end_layer):
|
||||
hidden_states, residual = layer(positions, hidden_states, residual)
|
||||
|
||||
if not get_pp_group().is_last_rank:
|
||||
|
Reference in New Issue
Block a user