mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Perf] Optimize _update_states
for GPU model runner (#16910)
Signed-off-by: snowcharm <snowcharmqq@gmail.com>
This commit is contained in:
@ -454,7 +454,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
|
||||
|
||||
# Add the new or resumed requests to the persistent batch.
|
||||
# The smaller empty indices are filled first.
|
||||
removed_req_indices = sorted(removed_req_indices, reverse=True)
|
||||
removed_req_indices.sort(reverse=True)
|
||||
for req_id in req_ids_to_add:
|
||||
req_state = self.requests[req_id]
|
||||
if removed_req_indices:
|
||||
|
Reference in New Issue
Block a user