[Misc] add reorder_batch AttentionMetadataBuilder (#23798)
Signed-off-by: Andy Xie <andy.xning@gmail.com>
This commit is contained in:
@ -212,6 +212,23 @@ class AttentionMetadataBuilder(abc.ABC, Generic[M]):
|
|||||||
"""
|
"""
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
|
||||||
|
def reorder_batch(self, input_batch: "InputBatch",
|
||||||
|
scheduler_output: "SchedulerOutput") -> bool:
|
||||||
|
"""
|
||||||
|
Update the order of requests in the batch based on the attention
|
||||||
|
backend's needs. For example, some attention backends (namely MLA) may
|
||||||
|
want to separate requests based on if the attention computation will be
|
||||||
|
compute-bound or memory-bound.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
input_batch: input batch
|
||||||
|
scheduler_output: scheduler output.
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
True if the batch was modified, False otherwise.
|
||||||
|
"""
|
||||||
|
raise NotImplementedError
|
||||||
|
|
||||||
def build_for_cudagraph_capture(
|
def build_for_cudagraph_capture(
|
||||||
self, common_attn_metadata: CommonAttentionMetadata) -> M:
|
self, common_attn_metadata: CommonAttentionMetadata) -> M:
|
||||||
"""
|
"""
|
||||||
|
Reference in New Issue
Block a user