From 9701352e4ba75e89b5d35a5cb74d0a55567319d9 Mon Sep 17 00:00:00 2001 From: Didier Durand <2927957+didier-durand@users.noreply.github.com> Date: Sun, 31 Aug 2025 10:21:59 +0200 Subject: [PATCH] [Doc]: fix typos in Python comments (#24001) Signed-off-by: Didier Durand --- vllm/compilation/monitor.py | 2 +- vllm/core/evictor.py | 2 +- vllm/engine/llm_engine.py | 2 +- vllm/entrypoints/llm.py | 10 +++++----- vllm/executor/mp_distributed_executor.py | 2 +- vllm/lora/layers.py | 2 +- vllm/platforms/interface.py | 2 +- vllm/reasoning/hunyuan_a13b_reasoning_parser.py | 2 +- vllm/v1/worker/gpu_model_runner.py | 2 +- vllm/v1/worker/gpu_worker.py | 2 +- 10 files changed, 14 insertions(+), 14 deletions(-) diff --git a/vllm/compilation/monitor.py b/vllm/compilation/monitor.py index 9047bf3cbf..c46721ab2d 100644 --- a/vllm/compilation/monitor.py +++ b/vllm/compilation/monitor.py @@ -43,7 +43,7 @@ cudagraph_capturing_enabled: bool = True def validate_cudagraph_capturing_enabled(): - # used to monitor whether an cudagraph capturing is legal at runtime. + # used to monitor whether a cudagraph capturing is legal at runtime. # should be called before any cudagraph capturing. # if an illegal cudagraph capturing happens, raise an error. global cudagraph_capturing_enabled diff --git a/vllm/core/evictor.py b/vllm/core/evictor.py index 7ec4768e90..7a4a836ee3 100644 --- a/vllm/core/evictor.py +++ b/vllm/core/evictor.py @@ -76,7 +76,7 @@ class LRUEvictor(Evictor): that's recorded in the Block. If there are multiple blocks with the same last_accessed time, then the one with the largest num_hashed_tokens will be evicted. If two blocks each have the lowest last_accessed time and - highest num_hashed_tokens value, then one will be chose arbitrarily + highest num_hashed_tokens value, then one will be chosen arbitrarily """ # CLEANUP_THRESHOLD determines the maximum allowable size of the priority diff --git a/vllm/engine/llm_engine.py b/vllm/engine/llm_engine.py index 7a5130af0b..10ded6f16d 100644 --- a/vllm/engine/llm_engine.py +++ b/vllm/engine/llm_engine.py @@ -1239,7 +1239,7 @@ class LLMEngine: # Stop the execute model loop in parallel workers until there are # more requests to process. This avoids waiting indefinitely in - # torch.distributed ops which may otherwise timeout, and unblocks + # torch.distributed ops which may otherwise time out, and unblocks # the RPC thread in the workers so that they can process any other # queued control plane messages, such as add/remove lora adapters. logger.debug("Stopping remote worker execution loop.") diff --git a/vllm/entrypoints/llm.py b/vllm/entrypoints/llm.py index 479524a117..cab761b8ea 100644 --- a/vllm/entrypoints/llm.py +++ b/vllm/entrypoints/llm.py @@ -329,7 +329,7 @@ class LLM: Args: prompts: The prompts to the LLM. You may pass a sequence of prompts for batch inference. See [PromptType][vllm.inputs.PromptType] - for more details about the format of each prompts. + for more details about the format of each prompt. sampling_params: The sampling parameters for text generation. If None, we use the default sampling parameters. When it is a single value, it is applied to every prompt. @@ -853,7 +853,7 @@ class LLM: Args: prompts: The prompts to the LLM. You may pass a sequence of prompts for batch inference. See [PromptType][vllm.inputs.PromptType] - for more details about the format of each prompts. + for more details about the format of each prompt. pooling_params: The pooling parameters for pooling. If None, we use the default pooling parameters. use_tqdm: If `True`, shows a tqdm progress bar. @@ -946,7 +946,7 @@ class LLM: Args: prompts: The prompts to the LLM. You may pass a sequence of prompts for batch inference. See [PromptType][vllm.inputs.PromptType] - for more details about the format of each prompts. + for more details about the format of each prompt. pooling_params: The pooling parameters for pooling. If None, we use the default pooling parameters. use_tqdm: If `True`, shows a tqdm progress bar. @@ -994,7 +994,7 @@ class LLM: Args: prompts: The prompts to the LLM. You may pass a sequence of prompts for batch inference. See [PromptType][vllm.inputs.PromptType] - for more details about the format of each prompts. + for more details about the format of each prompt. use_tqdm: If `True`, shows a tqdm progress bar. If a callable (e.g., `functools.partial(tqdm, leave=False)`), it is used to create the progress bar. @@ -1038,7 +1038,7 @@ class LLM: Args: prompts: The prompts to the LLM. You may pass a sequence of prompts for batch inference. See [PromptType][vllm.inputs.PromptType] - for more details about the format of each prompts. + for more details about the format of each prompt. use_tqdm: If `True`, shows a tqdm progress bar. If a callable (e.g., `functools.partial(tqdm, leave=False)`), it is used to create the progress bar. diff --git a/vllm/executor/mp_distributed_executor.py b/vllm/executor/mp_distributed_executor.py index 4e8c6d7909..136dca54e6 100644 --- a/vllm/executor/mp_distributed_executor.py +++ b/vllm/executor/mp_distributed_executor.py @@ -101,7 +101,7 @@ class MultiprocessingDistributedExecutor(DistributedExecutorBase): result_handler.start() self.worker_monitor.start() - # Set up signal handlers to shutdown the executor cleanly + # Set up signal handlers to shut down the executor cleanly # sometimes gc does not work well self.driver_worker = WorkerWrapperBase(self.vllm_config, 0) diff --git a/vllm/lora/layers.py b/vllm/lora/layers.py index 24a05d310d..d8503b2045 100644 --- a/vllm/lora/layers.py +++ b/vllm/lora/layers.py @@ -605,7 +605,7 @@ class ColumnParallelLinearWithLoRA(BaseLinearLayerWithLoRA): class MergedColumnParallelLinearWithLoRA(ColumnParallelLinearWithLoRA): """ColumnParallelLinear layer that is composed of 2 sublayers (slices) - packed together (eg. gate_proj + up_proj -> gate_up_proj). + packed together (e.g. gate_proj + up_proj -> gate_up_proj). This means we have 2 LoRAs, each applied to one half of the layer. diff --git a/vllm/platforms/interface.py b/vllm/platforms/interface.py index 01f3e2d977..ad12f7f788 100644 --- a/vllm/platforms/interface.py +++ b/vllm/platforms/interface.py @@ -537,7 +537,7 @@ class Platform: def get_global_graph_pool(self) -> Any: """ - Return the global graph pool for the this platform. + Return the global graph pool for this platform. """ cls = self.__class__ if cls._global_graph_pool is None: diff --git a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py index b2452b95c1..9deec8a1e8 100644 --- a/vllm/reasoning/hunyuan_a13b_reasoning_parser.py +++ b/vllm/reasoning/hunyuan_a13b_reasoning_parser.py @@ -30,7 +30,7 @@ class HunyuanA13BReasoningParser(ReasoningParser): Key Features: - For non-stream output , Recognizes and extracts reasoning ("think") and answer ("answer") sections from text using regular expressions. - - For stream process, it require a token id sequences to change the + - For stream process, it requires a token id sequences to change the reasoning state and other state so it maintains internal state to manage parsing across multiple token. diff --git a/vllm/v1/worker/gpu_model_runner.py b/vllm/v1/worker/gpu_model_runner.py index d6717892d4..f77373e8ad 100644 --- a/vllm/v1/worker/gpu_model_runner.py +++ b/vllm/v1/worker/gpu_model_runner.py @@ -2734,7 +2734,7 @@ class GPUModelRunner(LoRAModelRunnerMixin, KVConnectorModelRunnerMixin): layer_names) attn_backends = {} attn_backend_layers = defaultdict(list) - # Dedupe based on full class name; this is a bit safer than using + # Dedupe based on full class name; this is a bit safer than # using the class itself as the key because when we create dynamic # attention backend subclasses (e.g. ChunkedLocalAttention) unless # they are cached correctly, there will be different objects per diff --git a/vllm/v1/worker/gpu_worker.py b/vllm/v1/worker/gpu_worker.py index 2e7d668537..f49f5bdd97 100644 --- a/vllm/v1/worker/gpu_worker.py +++ b/vllm/v1/worker/gpu_worker.py @@ -224,7 +224,7 @@ class Worker(WorkerBase): memory can be used for KV cache without OOMs. The engine will first conduct a profiling of the existing memory usage. - Then, it calculate the free memory that can be used for KV cache in + Then, it calculates the free memory that can be used for KV cache in bytes. Tip: