[Doc]: fix typos in various files (#24821)

Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
Didier Durand
2025-09-15 10:08:52 +02:00
committed by GitHub
parent a8c0f59973
commit 4979eb79da
7 changed files with 7 additions and 7 deletions

View File

@ -8,7 +8,7 @@ This benchmark aims to:
Latest results: [results link](https://blog.vllm.ai/2024/09/05/perf-update.html), scroll to the end.
Latest reproduction guilde: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
Latest reproduction guide: [github issue link](https://github.com/vllm-project/vllm/issues/8176)
## Setup

View File

@ -190,7 +190,7 @@ class MoeWNA16Method(FusedMoEMethodBase):
group_size = self.quant_config.group_size
group_size_div_factor = 1
# make intermediate_size and hidden_size diviable by group_size
# make intermediate_size and hidden_size divisible by group_size
# we reduce the group size to ensure that
# and we would repeat the loaded_weight later
while intermediate_size_per_partition % group_size or \

View File

@ -19,7 +19,7 @@ class MarlinWorkspace:
def __init__(self, out_features, min_thread_n, max_parallel):
assert (out_features % min_thread_n == 0), (
"out_features = {} is undivisible by min_thread_n = {}".format(
"out_features = {} is indivisible by min_thread_n = {}".format(
out_features, min_thread_n))
max_workspace_size = ((out_features // min_thread_n) * max_parallel)

View File

@ -649,7 +649,7 @@ def _sample_with_torch(
else:
sampled_token_ids_tensor = None
# Counterintiutively, having two loops here is actually faster.
# Counterintuitively, having two loops here is actually faster.
# The first loop can run without waiting on GPU<->CPU sync.
for sampling_type in SamplingType:
sample_indices = categorized_sample_indices[sampling_type]

View File

@ -1524,7 +1524,7 @@ class Glm4vForConditionalGeneration(nn.Module, SupportsMultiModal,
return None
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary

View File

@ -738,7 +738,7 @@ class InternS1ForConditionalGeneration(nn.Module, SupportsMultiModal,
return []
# The result multimodal_embeddings is tuple of tensors, with each
# tensor correspoending to a multimodal data item (image or video).
# tensor corresponding to a multimodal data item (image or video).
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
# NOTE: It is important to iterate over the keys in this dictionary

View File

@ -662,7 +662,7 @@ def pad_and_concat_to_dim3(
max_len = max(f.shape[-1] for f in features)
# Ensure all features have dim=3
features = [f.view(-1, *f.shape[-2:]) for f in features]
# Pad and oncatenate:
# Pad and concatenate:
# [[B1, 80, M1], [B2, 80, M2]] -> [B1+B2, 80, max(M1, M2)]
features = [F.pad(f, (0, max_len - f.shape[-1])) for f in features]
return torch.cat(features)