Fix PPO/RLOO examples (#2100)

This commit is contained in:
lewtun
2024-09-23 11:49:36 +02:00
committed by GitHub
parent 92eea1f239
commit 6859e048da
2 changed files with 2 additions and 4 deletions

View File

@ -54,7 +54,6 @@ accelerate launch --config_file examples/accelerate_configs/deepspeed_zero3.yaml
--sft_model_path EleutherAI/pythia-1b-deduped \
--reward_model_path EleutherAI/pythia-1b-deduped \
--local_rollout_forward_batch_size 1 \
--deepspeed3 \
--missing_eos_penalty 1.0
"""
@ -89,7 +88,7 @@ if __name__ == "__main__":
# Dataset
################
dataset = load_dataset("trl-internal-testing/descriptiveness-sentiment-trl-style", split="descriptiveness")
eval_samples = 20
eval_samples = 100
train_dataset = dataset.select(range(len(dataset) - eval_samples))
eval_dataset = dataset.select(range(len(dataset) - eval_samples, len(dataset)))
dataset_text_field = "prompt"