mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 23:03:52 +08:00
Intialize io_thread_pool attribute in the beginning. (#18331)
Signed-off-by: rabi <ramishra@redhat.com>
This commit is contained in:
@ -50,6 +50,7 @@ class MultiprocExecutor(Executor):
|
|||||||
self.is_failed = False
|
self.is_failed = False
|
||||||
self.shutdown_event = threading.Event()
|
self.shutdown_event = threading.Event()
|
||||||
self.failure_callback: Optional[FailureCallback] = None
|
self.failure_callback: Optional[FailureCallback] = None
|
||||||
|
self.io_thread_pool: Optional[ThreadPoolExecutor] = None
|
||||||
|
|
||||||
self.world_size = self.parallel_config.world_size
|
self.world_size = self.parallel_config.world_size
|
||||||
tensor_parallel_size = self.parallel_config.tensor_parallel_size
|
tensor_parallel_size = self.parallel_config.tensor_parallel_size
|
||||||
@ -107,7 +108,6 @@ class MultiprocExecutor(Executor):
|
|||||||
|
|
||||||
# For pipeline parallel, we use a thread pool for asynchronous
|
# For pipeline parallel, we use a thread pool for asynchronous
|
||||||
# execute_model.
|
# execute_model.
|
||||||
self.io_thread_pool: Optional[ThreadPoolExecutor] = None
|
|
||||||
if self.max_concurrent_batches > 1:
|
if self.max_concurrent_batches > 1:
|
||||||
# Note: must use only 1 IO thread to keep dequeue sequence
|
# Note: must use only 1 IO thread to keep dequeue sequence
|
||||||
# from the response queue
|
# from the response queue
|
||||||
|
Reference in New Issue
Block a user