mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 21:53:50 +08:00
[misc] fix: sft SFT E2E CI test failure due to megatron engine (#3786)
This commit is contained in:
@ -1,3 +1,5 @@
|
||||
#!/usr/bin/env bash
|
||||
set -xeuo pipefail
|
||||
|
||||
rm -rf ~/verl/test/log
|
||||
mkdir -p ~/verl/test/log
|
||||
|
@ -183,7 +183,6 @@ def gptmodel_forward_no_padding(
|
||||
k: preprocess_packed_seqs_no_padding(v, pre_process=True)[0] for k, v in logits_processor_args.items()
|
||||
}
|
||||
output_dict = logits_processor(output_orig, **args)
|
||||
# print(f'gptmodel_forward_no_padding: {output_dict=}')
|
||||
output = {
|
||||
k: postprocess_packed_seqs_no_padding(
|
||||
v, packed_seq_params, input_ids, batch_size, post_process=post_process
|
||||
|
@ -208,6 +208,7 @@ def preprocess_packed_seqs_no_padding(
|
||||
seqlen = seqlens_in_batch_cpu[i]
|
||||
start_idx = cu_seqlens_padded_cpu[i]
|
||||
input_ids_rmpad[start_idx : start_idx + seqlen] = input_ids[i]
|
||||
continue
|
||||
|
||||
seqlen_padded_i = seqlens_in_batch_padded_cpu[i]
|
||||
seqlen = seqlen_padded_i // cp_size
|
||||
|
@ -974,7 +974,8 @@ class FSDPEngineWithValueHead(FSDPEngineWithLMHead):
|
||||
else:
|
||||
values_rmpad = output.logits
|
||||
values_rmpad = values_rmpad.squeeze(0) # (total_nnz, 1)
|
||||
# FIXME(houmin): confirm why should we squeeze here
|
||||
# critic model arch is like Qwen3ForTokenClassfication and num_labels=1
|
||||
# so we squeeze the last dimension here to get the value for each token
|
||||
values_rmpad = values_rmpad.squeeze(-1)
|
||||
|
||||
# gather output if sp > 1
|
||||
|
@ -595,7 +595,13 @@ class MegatronEngineWithLMHead(MegatronEngine):
|
||||
else:
|
||||
logits_bak = logits
|
||||
|
||||
# FIXME(houmin): maybe shift label in another place
|
||||
# Create the final labels for next-token prediction.
|
||||
# The `label` tensor starts as a clone of `input_ids`. `torch.roll` is not applied
|
||||
# earlier because `input_ids` is a nested tensor, which is incompatible with the operation.
|
||||
# The `preprocess_packed_seqs_no_padding` function unnests and flattens the tensor
|
||||
# into `input_ids_rmpad` (shape: [1, total_seqlen]).
|
||||
# Now, on this simple, unpadded tensor, we can perform the standard left shift
|
||||
# to align the target token `t+1` with the prediction for token `t`.
|
||||
label = torch.roll(label, shifts=-1, dims=1)
|
||||
|
||||
log_probs = vocab_parallel_log_probs_from_logits(logits_bak, label)
|
||||
|
Reference in New Issue
Block a user