set use_kl_in_reward=True in reinforce_plus_plus (#2580)

set use_kl_in_reward=True in reinforce_plus_plus
This commit is contained in:
Titanpku
2025-07-17 12:10:54 +08:00
committed by GitHub
parent 503ea75f53
commit 64601e418c
2 changed files with 4 additions and 4 deletions

View File

@ -23,7 +23,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.ppo_mini_batch_size=1024 \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
actor_rollout_ref.actor.use_kl_loss=True \
actor_rollout_ref.actor.use_kl_loss=False \
actor_rollout_ref.actor.kl_loss_coef=0.001 \
actor_rollout_ref.actor.kl_loss_type=mse \
actor_rollout_ref.actor.entropy_coeff=0 \
@ -37,7 +37,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
algorithm.use_kl_in_reward=False \
algorithm.use_kl_in_reward=True \
trainer.critic_warmup=0 \
trainer.logger='["console","wandb"]' \
trainer.project_name='verl_grpo_example_gsm8k' \

View File

@ -23,7 +23,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.model.use_remove_padding=True \
actor_rollout_ref.actor.ppo_mini_batch_size=1024 \
actor_rollout_ref.actor.ppo_micro_batch_size_per_gpu=16 \
actor_rollout_ref.actor.use_kl_loss=True \
actor_rollout_ref.actor.use_kl_loss=False \
actor_rollout_ref.actor.kl_loss_coef=0.001 \
actor_rollout_ref.actor.kl_loss_type=mse \
actor_rollout_ref.actor.entropy_coeff=0 \
@ -37,7 +37,7 @@ python3 -m verl.trainer.main_ppo \
actor_rollout_ref.rollout.n=8 \
actor_rollout_ref.ref.log_prob_micro_batch_size_per_gpu=16 \
actor_rollout_ref.ref.fsdp_config.param_offload=True \
algorithm.use_kl_in_reward=False \
algorithm.use_kl_in_reward=True \
trainer.critic_warmup=0 \
trainer.logger='["console","wandb"]' \
trainer.project_name='verl_grpo_example_gsm8k' \