mirror of
https://github.com/vllm-project/vllm.git
synced 2025-10-20 14:53:52 +08:00
[Speculative decoding] [Bugfix] Fix overallocation in ngram + spec logprobs (#4672)
This commit is contained in:
@ -138,7 +138,7 @@ class NGramWorker(LoraNotSupportedWorkerBase):
|
||||
SamplerOutput(
|
||||
outputs=None,
|
||||
sampled_token_probs=token_probs[i],
|
||||
logprobs=token_logprobs,
|
||||
logprobs=token_logprobs[i],
|
||||
sampled_token_ids=token_ids[i],
|
||||
))
|
||||
return outputs, False
|
||||
|
Reference in New Issue
Block a user