[misc] chore: fix typo in function name (#2525)

### What does this PR do? fix typo `gather_outpus_and_unpad` -> `gather_outputs_and_unpad` ### Checklist Before Starting - [x] Search for similar PRs. Paste at least one query link here: ... - [x] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. ### API and Usage Example > Demonstrate how the API changes if any, and provide usage example(s) if possible. ```python # Add code snippet or script demonstrating how to use this ``` ### Design & Code Changes > Demonstrate the high-level design if this PR is complex, and list the specific changes. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). --------- Signed-off-by: ShareLer <ShareLe@163.com>
2025-10-20 13:43:50 +08:00 · 2025-07-15 19:06:20 +08:00
parent 473d8ff0c1
commit 10f4eb8cfc
9 changed files with 27 additions and 19 deletions
--- a/docs/api/utils.rst
+++ b/docs/api/utils.rst
@ -60,7 +60,7 @@ Ulysses Utilities
 --------------------

 .. automodule:: verl.utils.ulysses
-   :members: gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
+   :members: gather_outputs_and_unpad, ulysses_pad_and_slice_inputs

 FSDP Utilities
 ------------------
--- a/recipe/prime/prime_dp_rm.py
+++ b/recipe/prime/prime_dp_rm.py
@ -28,7 +28,7 @@ from verl import DataProto
 from verl.utils.device import get_device_name
 from verl.utils.py_functional import append_to_dict
 from verl.utils.seqlen_balancing import get_reverse_idx, rearrange_micro_batches
-from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
+from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs

 from .prime_core_algos import compute_ce_dpo_loss_rm, compute_detach_dpo_loss_rm

@ -101,7 +101,9 @@ class DataParallelPRIMERewardModel:
                )

            if self.ulysses_sequence_parallel_size > 1:
-                rm_log_labels = gather_outpus_and_unpad(rm_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size)
+                rm_log_labels = gather_outputs_and_unpad(
+                    rm_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size
+                )
            rm_log_labels = pad_input(
                hidden_states=rm_log_labels.unsqueeze(-1), indices=indices, batch=batch_size, seqlen=seqlen
            ).squeeze(-1)[:, -num_actions - 1 : -1]
@ -149,7 +151,7 @@ class DataParallelPRIMERewardModel:
                            logits=ref_output_logits, labels=input_ids_rmpad_rolled
                        )

-                    ref_log_labels = gather_outpus_and_unpad(
+                    ref_log_labels = gather_outputs_and_unpad(
                        ref_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size
                    )
                    ref_log_labels = pad_input(
--- a/recipe/spin/fsdp_workers.py
+++ b/recipe/spin/fsdp_workers.py
@ -409,7 +409,7 @@ class RewardModelWorker(Worker):
    def _forward_micro_batch(self, micro_batch):
        from flash_attn.bert_padding import index_first_axis, pad_input, rearrange, unpad_input

-        from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
+        from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs

        with torch.no_grad(), torch.autocast(device_type=get_device_name(), dtype=torch.bfloat16):
            input_ids = micro_batch["input_ids"]
@ -443,7 +443,7 @@ class RewardModelWorker(Worker):

                # gather output if sp > 1
                if self.ulysses_sequence_parallel_size > 1:
-                    reward_rmpad = gather_outpus_and_unpad(
+                    reward_rmpad = gather_outputs_and_unpad(
                        reward_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
                    )

--- a/tests/models/test_transformers_ulysses.py
+++ b/tests/models/test_transformers_ulysses.py
@ -27,7 +27,7 @@ from verl.protocol import DataProto
 from verl.utils.distributed import initialize_global_process_group
 from verl.utils.model import compute_position_id_with_mask, create_random_mask
 from verl.utils.ulysses import (
-    gather_outpus_and_unpad,
+    gather_outputs_and_unpad,
    get_ulysses_sequence_parallel_world_size,
    set_ulysses_sequence_parallel_group,
    ulysses_pad_and_slice_inputs,
@ -155,7 +155,7 @@ def _hf_casual_fwd(config, sp_size, dp_size):
        ).logits  # (1, total_nnz/n, vocab_size)

        # all_gather output
-        logits_full = gather_outpus_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
+        logits_full = gather_outputs_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)

    # 2. perform normal forward
    set_ulysses_sequence_parallel_group(None)
@ -234,7 +234,7 @@ def _hf_casual_fwd_bwd(config, sp_size, dp_size):
        ).logits  # (1, total_nnz/n, vocab_size)

        # all_gather output
-        logits_full = gather_outpus_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
+        logits_full = gather_outputs_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)

    # 2. perform normal forward
    set_ulysses_sequence_parallel_group(None)
--- a/verl/trainer/fsdp_sft_trainer.py
+++ b/verl/trainer/fsdp_sft_trainer.py
@ -62,7 +62,7 @@ from verl.utils.torch_dtypes import PrecisionType
 from verl.utils.torch_functional import get_cosine_schedule_with_warmup, get_wsd_schedule_with_warmup
 from verl.utils.tracking import Tracking
 from verl.utils.ulysses import (
-    gather_outpus_and_unpad,
+    gather_outputs_and_unpad,
    get_ulysses_sequence_parallel_world_size,
    ulysses_pad_and_slice_inputs,
 )
@ -406,7 +406,7 @@ class FSDPSFTTrainer:
                input_ids_rmpad_rolled = input_ids_rmpad_rolled.to(logits_rmpad.device)
                loss = loss_fct(logits_rmpad, input_ids_rmpad_rolled)
                # Gather and unpad for sequence parallelism
-                loss = gather_outpus_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)
+                loss = gather_outputs_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)

                # This is the loss collected from all ulysses ranks
                full_loss = pad_input(
--- a/verl/utils/ulysses.py
+++ b/verl/utils/ulysses.py
@ -234,7 +234,13 @@ class Gather(torch.autograd.Function):
        )


-def gather_outpus_and_unpad(
+def gather_outpus_and_unpad(*args, **kwargs):
+    raise RuntimeError(
+        "please use verl.utils.ulysses.gather_outputs_and_unpad instead of verl.utils.ulysses.gather_outpus_and_unpad"
+    )
+
+
+def gather_outputs_and_unpad(
    x: Tensor,
    gather_dim: int,
    unpad_dim: int = None,
--- a/verl/workers/actor/dp_actor.py
+++ b/verl/workers/actor/dp_actor.py
@ -33,7 +33,7 @@ from verl.utils.profiler import GPUMemoryLogger
 from verl.utils.py_functional import append_to_dict
 from verl.utils.seqlen_balancing import prepare_dynamic_batch, restore_dynamic_batch
 from verl.utils.torch_functional import logprobs_from_logits
-from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad, ulysses_pad_and_slice_inputs
+from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad, ulysses_pad_and_slice_inputs
 from verl.workers.actor import BasePPOActor

 if is_cuda_available:
@ -203,14 +203,14 @@ class DataParallelPPOActor(BasePPOActor):
                # gather log_prob if sp > 1
                if self.use_ulysses_sp:
                    # gather and unpad for the ulysses sp
-                    log_probs = gather_outpus_and_unpad(
+                    log_probs = gather_outputs_and_unpad(
                        log_probs,
                        gather_dim=0,
                        unpad_dim=0,
                        padding_size=pad_size,
                    )
                    if calculate_entropy:
-                        entropy_rmpad = gather_outpus_and_unpad(
+                        entropy_rmpad = gather_outputs_and_unpad(
                            entropy_rmpad,
                            gather_dim=0,
                            unpad_dim=0,
--- a/verl/workers/critic/dp_critic.py
+++ b/verl/workers/critic/dp_critic.py
@ -31,7 +31,7 @@ from verl.utils.profiler import GPUMemoryLogger
 from verl.utils.py_functional import append_to_dict
 from verl.utils.seqlen_balancing import prepare_dynamic_batch, restore_dynamic_batch
 from verl.utils.torch_functional import masked_mean
-from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
+from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
 from verl.workers.critic import BasePPOCritic

 if is_cuda_available:
@ -113,7 +113,7 @@ class DataParallelPPOCritic(BasePPOCritic):

                # gather output if sp > 1
                if self.ulysses_sequence_parallel_size > 1:
-                    values_rmpad = gather_outpus_and_unpad(
+                    values_rmpad = gather_outputs_and_unpad(
                        values_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
                    )

--- a/verl/workers/fsdp_workers.py
+++ b/verl/workers/fsdp_workers.py
@ -1438,7 +1438,7 @@ class RewardModelWorker(Worker, DistProfilerExtension):
                unpad_input,
            )

-        from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
+        from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs

        with torch.no_grad(), torch.autocast(device_type=device_name, dtype=torch.bfloat16):
            input_ids = micro_batch["input_ids"]
@ -1481,7 +1481,7 @@ class RewardModelWorker(Worker, DistProfilerExtension):

                # gather output if sp > 1
                if self.ulysses_sequence_parallel_size > 1:
-                    reward_rmpad = gather_outpus_and_unpad(
+                    reward_rmpad = gather_outputs_and_unpad(
                        reward_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
                    )