[TPU][V1][Bugfix] Fix chunked prefill with padding (#15037)

Signed-off-by: NickLucche <nlucches@redhat.com>
This commit is contained in:
Nicolò Lucchesi
2025-03-18 15:34:45 +01:00
committed by GitHub
parent 3b457143d2
commit af35d3a3cc

View File

@ -410,6 +410,9 @@ class TPUModelRunner:
# Do the padding and copy the tensors to the TPU.
padded_total_num_scheduled_tokens = _get_padded_token_len(
total_num_scheduled_tokens)
# Zero out to avoid spurious values from prev iteration (last cp chunk)
self.input_ids_cpu[
total_num_scheduled_tokens:padded_total_num_scheduled_tokens] = 0
self.input_ids = self.input_ids_cpu[:
padded_total_num_scheduled_tokens].to(
self.device)