mirror of
https://github.com/volcengine/verl.git
synced 2025-10-20 13:43:50 +08:00
[misc] chore: fix typo in function name (#2525)
### What does this PR do? fix typo `gather_outpus_and_unpad` -> `gather_outputs_and_unpad` ### Checklist Before Starting - [x] Search for similar PRs. Paste at least one query link here: ... - [x] Format the PR title as `[{modules}] {type}: {description}` (This will be checked by the CI) - `{modules}` include `fsdp`, `megatron`, `sglang`, `vllm`, `rollout`, `trainer`, `ci`, `training_utils`, `recipe`, `hardware`, `deployment`, `ray`, `worker`, `single_controller`, `misc`, `perf`, `model`, `algo`, `env`, `tool`, `ckpt`, `doc`, `data` - If this PR involves multiple modules, separate them with `,` like `[megatron, fsdp, doc]` - `{type}` is in `feat`, `fix`, `refactor`, `chore`, `test` - If this PR breaks any API (CLI arguments, config, function signature, etc.), add `[BREAKING]` to the beginning of the title. - Example: `[BREAKING][fsdp, megatron] feat: dynamic batching` ### Test > For changes that can not be tested by CI (e.g., algorithm implementation, new model support), validate by experiment(s) and show results like training curve plots, evaluation results, etc. ### API and Usage Example > Demonstrate how the API changes if any, and provide usage example(s) if possible. ```python # Add code snippet or script demonstrating how to use this ``` ### Design & Code Changes > Demonstrate the high-level design if this PR is complex, and list the specific changes. ### Checklist Before Submitting > [!IMPORTANT] > Please check all the following items before requesting a review, otherwise the reviewer might deprioritize this PR for review. - [ ] Read the [Contribute Guide](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md). - [ ] Apply [pre-commit checks](https://github.com/volcengine/verl/blob/main/CONTRIBUTING.md#code-linting-and-formatting): `pre-commit install && pre-commit run --all-files --show-diff-on-failure --color=always` - [ ] Add / Update [the documentation](https://github.com/volcengine/verl/tree/main/docs). - [ ] Add unit or end-to-end test(s) to [the CI workflow](https://github.com/volcengine/verl/tree/main/.github/workflows) to cover all the code. If not feasible, explain why: ... - [ ] Once your PR is ready for CI, send a message in [the `ci-request` channel](https://verl-project.slack.com/archives/C091TCESWB1) in [the `verl` Slack workspace](https://join.slack.com/t/verl-project/shared_invite/zt-3855yhg8g-CTkqXu~hKojPCmo7k_yXTQ). --------- Signed-off-by: ShareLer <ShareLe@163.com>
This commit is contained in:
@ -60,7 +60,7 @@ Ulysses Utilities
|
||||
--------------------
|
||||
|
||||
.. automodule:: verl.utils.ulysses
|
||||
:members: gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
|
||||
:members: gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
|
||||
|
||||
FSDP Utilities
|
||||
------------------
|
||||
|
@ -28,7 +28,7 @@ from verl import DataProto
|
||||
from verl.utils.device import get_device_name
|
||||
from verl.utils.py_functional import append_to_dict
|
||||
from verl.utils.seqlen_balancing import get_reverse_idx, rearrange_micro_batches
|
||||
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
|
||||
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
|
||||
|
||||
from .prime_core_algos import compute_ce_dpo_loss_rm, compute_detach_dpo_loss_rm
|
||||
|
||||
@ -101,7 +101,9 @@ class DataParallelPRIMERewardModel:
|
||||
)
|
||||
|
||||
if self.ulysses_sequence_parallel_size > 1:
|
||||
rm_log_labels = gather_outpus_and_unpad(rm_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size)
|
||||
rm_log_labels = gather_outputs_and_unpad(
|
||||
rm_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size
|
||||
)
|
||||
rm_log_labels = pad_input(
|
||||
hidden_states=rm_log_labels.unsqueeze(-1), indices=indices, batch=batch_size, seqlen=seqlen
|
||||
).squeeze(-1)[:, -num_actions - 1 : -1]
|
||||
@ -149,7 +151,7 @@ class DataParallelPRIMERewardModel:
|
||||
logits=ref_output_logits, labels=input_ids_rmpad_rolled
|
||||
)
|
||||
|
||||
ref_log_labels = gather_outpus_and_unpad(
|
||||
ref_log_labels = gather_outputs_and_unpad(
|
||||
ref_log_labels, gather_dim=0, unpad_dim=0, padding_size=pad_size
|
||||
)
|
||||
ref_log_labels = pad_input(
|
||||
|
@ -409,7 +409,7 @@ class RewardModelWorker(Worker):
|
||||
def _forward_micro_batch(self, micro_batch):
|
||||
from flash_attn.bert_padding import index_first_axis, pad_input, rearrange, unpad_input
|
||||
|
||||
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
|
||||
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
|
||||
|
||||
with torch.no_grad(), torch.autocast(device_type=get_device_name(), dtype=torch.bfloat16):
|
||||
input_ids = micro_batch["input_ids"]
|
||||
@ -443,7 +443,7 @@ class RewardModelWorker(Worker):
|
||||
|
||||
# gather output if sp > 1
|
||||
if self.ulysses_sequence_parallel_size > 1:
|
||||
reward_rmpad = gather_outpus_and_unpad(
|
||||
reward_rmpad = gather_outputs_and_unpad(
|
||||
reward_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
|
||||
)
|
||||
|
||||
|
@ -27,7 +27,7 @@ from verl.protocol import DataProto
|
||||
from verl.utils.distributed import initialize_global_process_group
|
||||
from verl.utils.model import compute_position_id_with_mask, create_random_mask
|
||||
from verl.utils.ulysses import (
|
||||
gather_outpus_and_unpad,
|
||||
gather_outputs_and_unpad,
|
||||
get_ulysses_sequence_parallel_world_size,
|
||||
set_ulysses_sequence_parallel_group,
|
||||
ulysses_pad_and_slice_inputs,
|
||||
@ -155,7 +155,7 @@ def _hf_casual_fwd(config, sp_size, dp_size):
|
||||
).logits # (1, total_nnz/n, vocab_size)
|
||||
|
||||
# all_gather output
|
||||
logits_full = gather_outpus_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
|
||||
logits_full = gather_outputs_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
|
||||
|
||||
# 2. perform normal forward
|
||||
set_ulysses_sequence_parallel_group(None)
|
||||
@ -234,7 +234,7 @@ def _hf_casual_fwd_bwd(config, sp_size, dp_size):
|
||||
).logits # (1, total_nnz/n, vocab_size)
|
||||
|
||||
# all_gather output
|
||||
logits_full = gather_outpus_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
|
||||
logits_full = gather_outputs_and_unpad(logits_split_in_seq, gather_dim=1, unpad_dim=1, padding_size=pad_size)
|
||||
|
||||
# 2. perform normal forward
|
||||
set_ulysses_sequence_parallel_group(None)
|
||||
|
@ -62,7 +62,7 @@ from verl.utils.torch_dtypes import PrecisionType
|
||||
from verl.utils.torch_functional import get_cosine_schedule_with_warmup, get_wsd_schedule_with_warmup
|
||||
from verl.utils.tracking import Tracking
|
||||
from verl.utils.ulysses import (
|
||||
gather_outpus_and_unpad,
|
||||
gather_outputs_and_unpad,
|
||||
get_ulysses_sequence_parallel_world_size,
|
||||
ulysses_pad_and_slice_inputs,
|
||||
)
|
||||
@ -406,7 +406,7 @@ class FSDPSFTTrainer:
|
||||
input_ids_rmpad_rolled = input_ids_rmpad_rolled.to(logits_rmpad.device)
|
||||
loss = loss_fct(logits_rmpad, input_ids_rmpad_rolled)
|
||||
# Gather and unpad for sequence parallelism
|
||||
loss = gather_outpus_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)
|
||||
loss = gather_outputs_and_unpad(loss, gather_dim=0, unpad_dim=0, padding_size=pad_size)
|
||||
|
||||
# This is the loss collected from all ulysses ranks
|
||||
full_loss = pad_input(
|
||||
|
@ -234,7 +234,13 @@ class Gather(torch.autograd.Function):
|
||||
)
|
||||
|
||||
|
||||
def gather_outpus_and_unpad(
|
||||
def gather_outpus_and_unpad(*args, **kwargs):
|
||||
raise RuntimeError(
|
||||
"please use verl.utils.ulysses.gather_outputs_and_unpad instead of verl.utils.ulysses.gather_outpus_and_unpad"
|
||||
)
|
||||
|
||||
|
||||
def gather_outputs_and_unpad(
|
||||
x: Tensor,
|
||||
gather_dim: int,
|
||||
unpad_dim: int = None,
|
||||
|
@ -33,7 +33,7 @@ from verl.utils.profiler import GPUMemoryLogger
|
||||
from verl.utils.py_functional import append_to_dict
|
||||
from verl.utils.seqlen_balancing import prepare_dynamic_batch, restore_dynamic_batch
|
||||
from verl.utils.torch_functional import logprobs_from_logits
|
||||
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad, ulysses_pad_and_slice_inputs
|
||||
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad, ulysses_pad_and_slice_inputs
|
||||
from verl.workers.actor import BasePPOActor
|
||||
|
||||
if is_cuda_available:
|
||||
@ -203,14 +203,14 @@ class DataParallelPPOActor(BasePPOActor):
|
||||
# gather log_prob if sp > 1
|
||||
if self.use_ulysses_sp:
|
||||
# gather and unpad for the ulysses sp
|
||||
log_probs = gather_outpus_and_unpad(
|
||||
log_probs = gather_outputs_and_unpad(
|
||||
log_probs,
|
||||
gather_dim=0,
|
||||
unpad_dim=0,
|
||||
padding_size=pad_size,
|
||||
)
|
||||
if calculate_entropy:
|
||||
entropy_rmpad = gather_outpus_and_unpad(
|
||||
entropy_rmpad = gather_outputs_and_unpad(
|
||||
entropy_rmpad,
|
||||
gather_dim=0,
|
||||
unpad_dim=0,
|
||||
|
@ -31,7 +31,7 @@ from verl.utils.profiler import GPUMemoryLogger
|
||||
from verl.utils.py_functional import append_to_dict
|
||||
from verl.utils.seqlen_balancing import prepare_dynamic_batch, restore_dynamic_batch
|
||||
from verl.utils.torch_functional import masked_mean
|
||||
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
|
||||
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
|
||||
from verl.workers.critic import BasePPOCritic
|
||||
|
||||
if is_cuda_available:
|
||||
@ -113,7 +113,7 @@ class DataParallelPPOCritic(BasePPOCritic):
|
||||
|
||||
# gather output if sp > 1
|
||||
if self.ulysses_sequence_parallel_size > 1:
|
||||
values_rmpad = gather_outpus_and_unpad(
|
||||
values_rmpad = gather_outputs_and_unpad(
|
||||
values_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
|
||||
)
|
||||
|
||||
|
@ -1438,7 +1438,7 @@ class RewardModelWorker(Worker, DistProfilerExtension):
|
||||
unpad_input,
|
||||
)
|
||||
|
||||
from verl.utils.ulysses import gather_outpus_and_unpad, ulysses_pad_and_slice_inputs
|
||||
from verl.utils.ulysses import gather_outputs_and_unpad, ulysses_pad_and_slice_inputs
|
||||
|
||||
with torch.no_grad(), torch.autocast(device_type=device_name, dtype=torch.bfloat16):
|
||||
input_ids = micro_batch["input_ids"]
|
||||
@ -1481,7 +1481,7 @@ class RewardModelWorker(Worker, DistProfilerExtension):
|
||||
|
||||
# gather output if sp > 1
|
||||
if self.ulysses_sequence_parallel_size > 1:
|
||||
reward_rmpad = gather_outpus_and_unpad(
|
||||
reward_rmpad = gather_outputs_and_unpad(
|
||||
reward_rmpad, gather_dim=0, unpad_dim=0, padding_size=pad_size
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user