[Minor] Add some clarifying comments to recent changes (#27130)

Signed-off-by: Nick Hill <nhill@redhat.com>
This commit is contained in:
Nick Hill
2025-10-18 09:52:45 -07:00
committed by GitHub
parent 168e578efc
commit 3b45075206
2 changed files with 10 additions and 2 deletions

View File

@ -236,7 +236,9 @@ class MessageQueue:
n_reader, # number of all readers n_reader, # number of all readers
n_local_reader, # number of local readers through shared memory n_local_reader, # number of local readers through shared memory
local_reader_ranks: list[int] | None = None, local_reader_ranks: list[int] | None = None,
max_chunk_bytes: int = 1024 * 1024 * 24, # 24MiB # Default of 24MiB chosen to be large enough to accommodate grammar
# bitmask tensors for large batches (1024 requests).
max_chunk_bytes: int = 1024 * 1024 * 24,
max_chunks: int = 10, max_chunks: int = 10,
connect_ip: str | None = None, connect_ip: str | None = None,
): ):
@ -538,6 +540,10 @@ class MessageQueue:
buf[0] = 1 # overflow buf[0] = 1 # overflow
self.local_socket.send_multipart(all_buffers, copy=False) self.local_socket.send_multipart(all_buffers, copy=False)
else: else:
# Byte 0: 0
# Bytes 1-2: Count of buffers
# Then each buffer follows, preceded by 4 bytes containing its length:
# [4 byte int L][L bytes of buffer content] ...
with self.acquire_write(timeout) as buf: with self.acquire_write(timeout) as buf:
buf[0] = 0 # not overflow buf[0] = 0 # not overflow
offset = 3 offset = 3

View File

@ -165,7 +165,9 @@ class SchedulerOutput:
# freed from the encoder cache. # freed from the encoder cache.
free_encoder_mm_hashes: list[str] free_encoder_mm_hashes: list[str]
# ids of structured outputs requests included in the bitmask, in order. # ids of structured outputs requests included in the bitmask, in the
# same order as the corresponding stacked rows of the bitmask.
# There may be more than one row per request in the case of speculative decoding.
structured_output_request_ids: list[str] structured_output_request_ids: list[str]
# the bitmask for the whole batch # the bitmask for the whole batch
grammar_bitmask: "npt.NDArray[np.int32] | None" grammar_bitmask: "npt.NDArray[np.int32] | None"