[misc] chore: fix typo in function name (#2525)

### What does this PR do?

fix typo `gather_outpus_and_unpad` -> `gather_outputs_and_unpad`

### Checklist Before Starting

- [x] Search for similar PRs. Paste at least one query link here: ...
- [x] Format the PR title as `[{modules}] {type}: {description}` (This
will be checked by the CI)
- `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`,
`trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`,
`ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`,
`env`, `tool`, `ckpt`, `doc`, `data`
- If this PR involves multiple modules, separate them with `,` like
`[megatron, fsdp, doc]`
  - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test`
- If this PR breaks any API (CLI arguments, config, function signature,
etc.), add `[BREAKING]` to the beginning of the title.
  - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching`

### Test

> For changes that can not be tested by CI (e.g., algorithm
implementation, new model support), validate by experiment(s) and show
results like training curve plots, evaluation results, etc.

### API and Usage Example

> Demonstrate how the API changes if any, and provide usage example(s)
if possible.

```python
# Add code snippet or script demonstrating how to use this
```

### Design & Code Changes

> Demonstrate the high-level design if this PR is complex, and list the
specific changes.

### Checklist Before Submitting

> [!IMPORTANT]
> Please check all the following items before requesting a review,
otherwise the reviewer might deprioritize this PR for review.

- [ ] Read the [Contribute
Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md).
- [ ] Apply [pre-commit
checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting):
`pre-commit install && pre-commit run --all-files --show-diff-on-failure
--color=always`
- [ ] Add / Update [the
documentation](https://github.com/volcengine/verl/tree/main/docs).
- [ ] Add unit or end-to-end test(s) to [the CI
workflow](https://github.com/volcengine/verl/tree/main/.github/workflows)
to cover all the code. If not feasible, explain why: ...
- [ ] Once your PR is ready for CI, send a message in [the `ci-request`
channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the
`verl` Slack
workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ).

---------

Signed-off-by: ShareLer <ShareLe@163.com>
This commit is contained in:
ShareLer
2025-07-15 19:06:20 +08:00
committed by GitHub
parent 473d8ff0c1
commit 10f4eb8cfc
9 changed files with 27 additions and 19 deletions

View File

@ -60,7 +60,7 @@ Ulysses Utilities
--------------------
.. automodule:: verl.utils.ulysses
:members: gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
:members: gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
FSDP Utilities
------------------

View File

@ -28,7 +28,7 @@ from verl import DataProto
from verl.utils.device import get_device_name
from verl.utils.py_functional import append_to_dict
from verl.utils.seqlen_balancing import get_reverse_idx, rearrange_micro_batches
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
from .prime_core_algos import compute_ce_dpo_loss_rm, compute_detach_dpo_loss_rm
@ -101,7 +101,9 @@ class DataParallelPRIMERewardModel:
)
if self.ulysses_sequence_parallel_size > 1:
rm_log_labels = gather_outpus_and_unpad(rm_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size)
rm_log_labels = gather_outputs_and_unpad(
rm_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size
)
rm_log_labels = pad_input(
hidden_states=rm_log_labels.unsqueeze(-1), indices=indices, batch=batch_size, seqlen=seqlen
).squeeze(-1)[:, -num_actions - 1 : -1]
@ -149,7 +151,7 @@ class DataParallelPRIMERewardModel:
logits=ref_output_logits, labels=input_ids_rmpad_rolled
)
ref_log_labels = gather_outpus_and_unpad(
ref_log_labels = gather_outputs_and_unpad(
ref_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size
)
ref_log_labels = pad_input(

View File

@ -409,7 +409,7 @@ class RewardModelWorker(Worker):
def _forward_micro_batch(self, micro_batch):
from flash_attn.bert_padding import index_first_axis, pad_input, rearrange, unpad_input
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
with torch.no_grad(), torch.autocast(device_type=get_device_name(), dtype=torch.bfloat16):
input_ids = micro_batch["input_ids"]
@ -443,7 +443,7 @@ class RewardModelWorker(Worker):
# gather output if sp > 1
if self.ulysses_sequence_parallel_size > 1:
reward_rmpad = gather_outpus_and_unpad(
reward_rmpad = gather_outputs_and_unpad(
reward_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
)

View File

@ -27,7 +27,7 @@ from verl.protocol import DataProto
from verl.utils.distributed import initialize_global_process_group
from verl.utils.model import compute_position_id_with_mask, create_random_mask
from verl.utils.ulysses import (
gather_outpus_and_unpad,
gather_outputs_and_unpad,
get_ulysses_sequence_parallel_world_size,
set_ulysses_sequence_parallel_group,
ulysses_pad_and_slice_inputs,
@ -155,7 +155,7 @@ def _hf_casual_fwd(config, sp_size, dp_size):
).logits # (1, total_nnz/n, vocab_size)
# all_gather output
logits_full = gather_outpus_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
logits_full = gather_outputs_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
# 2. perform normal forward
set_ulysses_sequence_parallel_group(None)
@ -234,7 +234,7 @@ def _hf_casual_fwd_bwd(config, sp_size, dp_size):
).logits # (1, total_nnz/n, vocab_size)
# all_gather output
logits_full = gather_outpus_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
logits_full = gather_outputs_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
# 2. perform normal forward
set_ulysses_sequence_parallel_group(None)

View File

@ -62,7 +62,7 @@ from verl.utils.torch_dtypes import PrecisionType
from verl.utils.torch_functional import get_cosine_schedule_with_warmup, get_wsd_schedule_with_warmup
from verl.utils.tracking import Tracking
from verl.utils.ulysses import (
gather_outpus_and_unpad,
gather_outputs_and_unpad,
get_ulysses_sequence_parallel_world_size,
ulysses_pad_and_slice_inputs,
)
@ -406,7 +406,7 @@ class FSDPSFTTrainer:
input_ids_rmpad_rolled = input_ids_rmpad_rolled.to(logits_rmpad.device)
loss = loss_fct(logits_rmpad, input_ids_rmpad_rolled)
# Gather and unpad for sequence parallelism
loss = gather_outpus_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)
loss = gather_outputs_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)
# This is the loss collected from all ulysses ranks
full_loss = pad_input(

View File

@ -234,7 +234,13 @@ class Gather(torch.autograd.Function):
)
def gather_outpus_and_unpad(
def gather_outpus_and_unpad(*args, **kwargs):
raise RuntimeError(
"please use verl.utils.ulysses.gather_outputs_and_unpad instead of verl.utils.ulysses.gather_outpus_and_unpad"
)
def gather_outputs_and_unpad(
x: Tensor,
gather_dim: int,
unpad_dim: int = None,

View File

@ -33,7 +33,7 @@ from verl.utils.profiler import GPUMemoryLogger
from verl.utils.py_functional import append_to_dict
from verl.utils.seqlen_balancing import prepare_dynamic_batch, restore_dynamic_batch
from verl.utils.torch_functional import logprobs_from_logits
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad, ulysses_pad_and_slice_inputs
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad, ulysses_pad_and_slice_inputs
from verl.workers.actor import BasePPOActor
if is_cuda_available:
@ -203,14 +203,14 @@ class DataParallelPPOActor(BasePPOActor):
# gather log_prob if sp > 1
if self.use_ulysses_sp:
# gather and unpad for the ulysses sp
log_probs = gather_outpus_and_unpad(
log_probs = gather_outputs_and_unpad(
log_probs,
gather_dim=0,
unpad_dim=0,
padding_size=pad_size,
)
if calculate_entropy:
entropy_rmpad = gather_outpus_and_unpad(
entropy_rmpad = gather_outputs_and_unpad(
entropy_rmpad,
gather_dim=0,
unpad_dim=0,

View File

@ -31,7 +31,7 @@ from verl.utils.profiler import GPUMemoryLogger
from verl.utils.py_functional import append_to_dict
from verl.utils.seqlen_balancing import prepare_dynamic_batch, restore_dynamic_batch
from verl.utils.torch_functional import masked_mean
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
from verl.workers.critic import BasePPOCritic
if is_cuda_available:
@ -113,7 +113,7 @@ class DataParallelPPOCritic(BasePPOCritic):
# gather output if sp > 1
if self.ulysses_sequence_parallel_size > 1:
values_rmpad = gather_outpus_and_unpad(
values_rmpad = gather_outputs_and_unpad(
values_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
)

View File

@ -1438,7 +1438,7 @@ class RewardModelWorker(Worker, DistProfilerExtension):
unpad_input,
)
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
with torch.no_grad(), torch.autocast(device_type=device_name, dtype=torch.bfloat16):
input_ids = micro_batch["input_ids"]
@ -1481,7 +1481,7 @@ class RewardModelWorker(Worker, DistProfilerExtension):
# gather output if sp > 1
if self.ulysses_sequence_parallel_size > 1:
reward_rmpad = gather_outpus_and_unpad(
reward_rmpad = gather_outputs_and_unpad(
reward_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
)