[Perf] Optimize _update_states for GPU model runner (#16910)

Signed-off-by: snowcharm <snowcharmqq@gmail.com>
This commit is contained in:
SnowCharm
2025-04-22 14:01:54 +08:00
committed by GitHub
parent 3097ce3a32
commit a114bf20a3

View File

@ -454,7 +454,7 @@ class GPUModelRunner(LoRAModelRunnerMixin):
# Add the new or resumed requests to the persistent batch.
# The smaller empty indices are filled first.
removed_req_indices = sorted(removed_req_indices, reverse=True)
removed_req_indices.sort(reverse=True)
for req_id in req_ids_to_add:
req_state = self.requests[req_id]
if removed_req_indices: