mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Core] Refactor: Clean up unused argument in Scheduler._preempt (#9696)
Signed-off-by: André Jonasson <andre.jonasson@gmail.com>
This commit is contained in:
@ -828,8 +828,7 @@ class Scheduler:
|
||||
num_running_seqs)
|
||||
|
||||
#Preempt out the victim sequence group
|
||||
self._preempt(vseq_group, blocks_to_swap_out,
|
||||
PreemptionMode.RECOMPUTE)
|
||||
self._preempt(vseq_group, blocks_to_swap_out)
|
||||
waiting_queue.appendleft(vseq_group)
|
||||
force_preemption_count += 1
|
||||
#Put the sequence back into the waiting queue
|
||||
@ -1451,12 +1450,8 @@ class Scheduler:
|
||||
if len(cows) > 0:
|
||||
blocks_to_copy.extend(cows)
|
||||
|
||||
def _preempt(
|
||||
self,
|
||||
seq_group: SequenceGroup,
|
||||
blocks_to_swap_out: List[Tuple[int, int]],
|
||||
preemption_mode: Optional[PreemptionMode] = None,
|
||||
) -> PreemptionMode:
|
||||
def _preempt(self, seq_group: SequenceGroup,
|
||||
blocks_to_swap_out: List[Tuple[int, int]]) -> PreemptionMode:
|
||||
# If preemption mode is not specified, we determine the mode as follows:
|
||||
# We use recomputation by default since it incurs lower overhead than
|
||||
# swapping. However, when the sequence group has multiple sequences
|
||||
|
Reference in New Issue
Block a user