mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Misc] fix docstrings (#4191)
Co-authored-by: Zhong Wang <wangzhong@infini-ai.com>
This commit is contained in:
@ -160,7 +160,7 @@ class SequenceData:
|
||||
self._stage = SequenceStage.PREFILL
|
||||
|
||||
def get_num_uncomputed_tokens(self) -> int:
|
||||
"""Return the number of prefil tokens that are not computed."""
|
||||
"""Return the number of prefill tokens that are not computed."""
|
||||
# we use `get_len()` which includes prompt_len + output_len instead
|
||||
# of prompt_len here. This is because during recompute we need to
|
||||
# prefill for both prompt and output.
|
||||
@ -345,12 +345,9 @@ class Sequence:
|
||||
def get_num_new_tokens(self) -> int:
|
||||
"""Get the number of new tokens to be computed.
|
||||
|
||||
Args:
|
||||
remainig_token_budget: The remaining token budgets.
|
||||
Returns:
|
||||
The new number of tokens to be computed. I.e., 1 for decode, prompt
|
||||
size for prefill. If there's not enough remainig_token_budget, it
|
||||
can return the chunked number of new tokens.
|
||||
The new number of tokens to be computed. I.e., 1 for decode, or
|
||||
the remaining prompt size for prefill.
|
||||
"""
|
||||
if self.data.stage == SequenceStage.DECODE:
|
||||
return 1
|
||||
|
Reference in New Issue
Block a user