mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Doc]: fix typos in various files (#24798)
Signed-off-by: Didier Durand <durand.didier@gmail.com>
This commit is contained in:
@ -42,7 +42,7 @@ def main():
|
||||
llm_args["model"] = "meta-llama/Llama-3.1-8B-Instruct"
|
||||
|
||||
# Set `enforce_eager=True` to avoid ahead-of-time compilation.
|
||||
# In real workloads, `enforace_eager` should be `False`.
|
||||
# In real workloads, `enforce_eager` should be `False`.
|
||||
llm = LLM(**llm_args)
|
||||
outputs = llm.generate(prompts, sampling_params)
|
||||
print("-" * 50)
|
||||
|
@ -182,7 +182,7 @@ class NaiveBlockAllocator(BlockAllocator):
|
||||
# Increment refcount for each block.
|
||||
assert block.block_id is not None
|
||||
refcount = self._refcounter.incr(block.block_id)
|
||||
assert refcount != 1, "can't fork free'd block"
|
||||
assert refcount != 1, "can't fork freed block"
|
||||
|
||||
forked_block = self._block_pool.init_block(
|
||||
prev_block=prev_block,
|
||||
|
@ -58,7 +58,7 @@ class Evictor(ABC):
|
||||
|
||||
class BlockMetaData:
|
||||
"""Data structure for storing key data describe cached block, so that
|
||||
evitor could use to make its decision which one to choose for eviction
|
||||
evictor could use to make its decision which one to choose for eviction
|
||||
|
||||
Here we use physical block id as the dict key, as there maybe several
|
||||
blocks with the same content hash, but their physical id is unique.
|
||||
|
@ -379,7 +379,7 @@ class LoggingStatLogger(StatLoggerBase):
|
||||
if local_interval_elapsed(stats.now, self.last_local_log,
|
||||
self.local_interval):
|
||||
# Compute summary metrics for tracked stats (and log them
|
||||
# to promethus if applicable).
|
||||
# to prometheus if applicable).
|
||||
prompt_throughput = get_throughput(self.num_prompt_tokens,
|
||||
now=stats.now,
|
||||
last_log=self.last_local_log)
|
||||
@ -432,7 +432,7 @@ class LoggingStatLogger(StatLoggerBase):
|
||||
|
||||
|
||||
class PrometheusStatLogger(StatLoggerBase):
|
||||
"""PrometheusStatLogger is used LLMEngine to log to Promethus."""
|
||||
"""PrometheusStatLogger is used LLMEngine to log to Prometheus."""
|
||||
_metrics_cls = Metrics
|
||||
_gauge_cls = prometheus_client.Gauge
|
||||
|
||||
|
@ -740,7 +740,7 @@ class MergedColumnParallelLinear(ColumnParallelLinear):
|
||||
"""
|
||||
Handle special case for models where MLP layers are already
|
||||
fused on disk. In this case, we have no shard id. This function
|
||||
determmines the shard id by splitting these layers and then calls
|
||||
determines the shard id by splitting these layers and then calls
|
||||
the weight loader using the shard id.
|
||||
|
||||
An example of a model with these fused layers:
|
||||
@ -914,7 +914,7 @@ class QKVParallelLinear(ColumnParallelLinear):
|
||||
"""
|
||||
Handle special case for models where QKV layers are already
|
||||
fused on disk. In this case, we have no shard id. This function
|
||||
determmines the shard id by splitting these layers and then calls
|
||||
determines the shard id by splitting these layers and then calls
|
||||
the weight loader using the shard id.
|
||||
|
||||
An example of a model with these fused layers:
|
||||
|
@ -258,7 +258,7 @@ class VocabParallelEmbedding(CustomOp):
|
||||
|
||||
if params_dtype is None:
|
||||
params_dtype = torch.get_default_dtype()
|
||||
# Divide the weight matrix along the vocaburaly dimension.
|
||||
# Divide the weight matrix along the vocabulary dimension.
|
||||
self.num_added_embeddings = self.num_embeddings - self.org_vocab_size
|
||||
self.num_embeddings_per_partition = divide(self.num_embeddings_padded,
|
||||
self.tp_size)
|
||||
|
@ -1446,7 +1446,7 @@ class Ernie4_5_VLMoeForConditionalGeneration(nn.Module, SupportsMultiModal,
|
||||
return None
|
||||
|
||||
# The result multimodal_embeddings is tuple of tensors, with each
|
||||
# tensor correspoending to a multimodal data item (image or video).
|
||||
# tensor corresponding to a multimodal data item (image or video).
|
||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||
|
||||
# NOTE: It is important to iterate over the keys in this dictionary
|
||||
|
@ -586,10 +586,10 @@ class Gemma3nForConditionalGeneration(nn.Module, SupportsMultiModal,
|
||||
|
||||
# ruff: noqa
|
||||
# The Gemma3nProcessor expects all audio will be 30s in length and inserts 188 audio soft tokens into the
|
||||
# text to account for this. However, the audio preprocessing and encoder do not gurarantee they will
|
||||
# text to account for this. However, the audio preprocessing and encoder do not guarantee they will
|
||||
# produce 188 soft tokens; they will produce at most that many tokens, but they may produce fewer tokens
|
||||
# depending on the length of the longest audio input in the batch. When we encounter this situation, we pad
|
||||
# the audio feature out to 188 soft tokens with the emebedding of the last token in the embed_audio vocab.
|
||||
# the audio feature out to 188 soft tokens with the embedding of the last token in the embed_audio vocab.
|
||||
# TODO precompute and cache padding
|
||||
audio_padding_toks = torch.tensor([[self.vocab_size - 1]],
|
||||
dtype=torch.long,
|
||||
|
@ -560,7 +560,7 @@ class LlamaNemotronVLChatModel(nn.Module, SupportsMultiModal, SupportsPP,
|
||||
return []
|
||||
|
||||
# The result multimodal_embeddings is tuple of tensors, with each
|
||||
# tensor correspoending to a multimodal data item (image).
|
||||
# tensor corresponding to a multimodal data item (image).
|
||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||
|
||||
# NOTE: It is important to iterate over the keys in this dictionary
|
||||
|
@ -1154,7 +1154,7 @@ class Phi4MMForCausalLM(nn.Module, SupportsLoRA, SupportsMultiModal):
|
||||
return None
|
||||
|
||||
# The result multimodal_embeddings is tuple of tensors, with each
|
||||
# tensor correspoending to a multimodal data item (image or video).
|
||||
# tensor corresponding to a multimodal data item (image or video).
|
||||
multimodal_embeddings: tuple[torch.Tensor, ...] = ()
|
||||
|
||||
# NOTE: It is important to iterate over the keys in this dictionary
|
||||
|
Reference in New Issue
Block a user