mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Minor] Add some clarifying comments to recent changes (#27130)
Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
@ -236,7 +236,9 @@ class MessageQueue:
|
||||
n_reader, # number of all readers
|
||||
n_local_reader, # number of local readers through shared memory
|
||||
local_reader_ranks: list[int] | None = None,
|
||||
max_chunk_bytes: int = 1024 * 1024 * 24, # 24MiB
|
||||
# Default of 24MiB chosen to be large enough to accommodate grammar
|
||||
# bitmask tensors for large batches (1024 requests).
|
||||
max_chunk_bytes: int = 1024 * 1024 * 24,
|
||||
max_chunks: int = 10,
|
||||
connect_ip: str | None = None,
|
||||
):
|
||||
@ -538,6 +540,10 @@ class MessageQueue:
|
||||
buf[0] = 1 # overflow
|
||||
self.local_socket.send_multipart(all_buffers, copy=False)
|
||||
else:
|
||||
# Byte 0: 0
|
||||
# Bytes 1-2: Count of buffers
|
||||
# Then each buffer follows, preceded by 4 bytes containing its length:
|
||||
# [4 byte int L][L bytes of buffer content] ...
|
||||
with self.acquire_write(timeout) as buf:
|
||||
buf[0] = 0 # not overflow
|
||||
offset = 3
|
||||
|
@ -165,7 +165,9 @@ class SchedulerOutput:
|
||||
# freed from the encoder cache.
|
||||
free_encoder_mm_hashes: list[str]
|
||||
|
||||
# ids of structured outputs requests included in the bitmask, in order.
|
||||
# ids of structured outputs requests included in the bitmask, in the
|
||||
# same order as the corresponding stacked rows of the bitmask.
|
||||
# There may be more than one row per request in the case of speculative decoding.
|
||||
structured_output_request_ids: list[str]
|
||||
# the bitmask for the whole batch
|
||||
grammar_bitmask: "npt.NDArray[np.int32] | None"
|
||||
|
Reference in New Issue
Block a user