🗳️ Remove logging_steps parameter from for simpler setup (#3612)

This commit is contained in:
Quentin Gallouédec
2025-06-18 13:52:21 +02:00
committed by GitHub
parent d6a969ff7d
commit ed9b78a5f7
27 changed files with 11 additions and 51 deletions

View File

@ -31,7 +31,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
training_args = CPOConfig(output_dir="Qwen2-0.5B-CPO", logging_steps=10)
training_args = CPOConfig(output_dir="Qwen2-0.5B-CPO")
trainer = CPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```
@ -57,7 +57,6 @@ accelerate launch examples/scripts/cpo.py \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--dataset_name trl-lib/ultrafeedback_binarized \
--num_train_epochs 1 \
--logging_steps 25 \
--output_dir Qwen2-0.5B-CPO
```

View File

@ -46,7 +46,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10)
training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO")
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```
@ -113,7 +113,6 @@ accelerate launch trl/scripts/dpo.py \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--dataset_name trl-lib/ultrafeedback_binarized \
--num_train_epochs 1 \
--logging_steps 25 \
--output_dir Qwen2-0.5B-DPO
```
@ -195,8 +194,8 @@ First install `unsloth` according to the [official documentation](https://github
+ model = FastLanguageModel.get_peft_model(model)
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
- training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10)
+ training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", logging_steps=10, bf16=True)
- training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO")
+ training_args = DPOConfig(output_dir="Qwen2-0.5B-DPO", bf16=True)
trainer = DPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()

View File

@ -36,7 +36,7 @@ dataset = load_dataset("trl-lib/tldr", split="train")
def reward_len(completions, **kwargs):
return [-abs(20 - len(completion)) for completion in completions]
training_args = GRPOConfig(output_dir="Qwen2-0.5B-GRPO", logging_steps=10)
training_args = GRPOConfig(output_dir="Qwen2-0.5B-GRPO")
trainer = GRPOTrainer(
model="Qwen/Qwen2-0.5B-Instruct",
reward_funcs=reward_len,
@ -294,7 +294,6 @@ def main():
per_device_train_batch_size=4,
bf16=True,
gradient_checkpointing=True,
logging_steps=10,
use_vllm=True,
vllm_server_host=args.vllm_server_host.replace("ip-", "").replace("-", "."), # from ip-X-X-X-X to X.X.X.X
)

View File

@ -85,7 +85,6 @@ config = IterativeSFTConfig(
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
max_steps=1000,
logging_steps=10,
save_steps=100,
optim="adamw_torch",
report_to="wandb",

View File

@ -38,7 +38,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/kto-mix-14k", split="train")
training_args = KTOConfig(output_dir="Qwen2-0.5B-KTO", logging_steps=10)
training_args = KTOConfig(output_dir="Qwen2-0.5B-KTO")
trainer = KTOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```
@ -89,7 +89,6 @@ accelerate launch trl/scripts/kto.py \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--dataset_name trl-lib/kto-mix-14k \
--num_train_epochs 1 \
--logging_steps 25 \
--output_dir Qwen2-0.5B-KTO
```

View File

@ -36,7 +36,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
judge = PairRMJudge()
train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")
training_args = NashMDConfig(output_dir="Qwen2-0.5B-NashMD", logging_steps=10)
training_args = NashMDConfig(output_dir="Qwen2-0.5B-NashMD")
trainer = NashMDTrainer(
model=model, judge=judge, args=training_args, processing_class=tokenizer, train_dataset=train_dataset
)
@ -125,7 +125,6 @@ python examples/scripts/nash_md.py \
--judge pair_rm \
--dataset_name trl-lib/ultrafeedback-prompt \
--learning_rate 5.0e-7 \
--logging_steps 25 \
--output_dir Qwen2.5-0.5B-NashMD-PairRM \
--warmup_ratio 0.1 \
--push_to_hub

View File

@ -36,7 +36,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
judge = PairRMJudge()
train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")
training_args = OnlineDPOConfig(output_dir="Qwen2-0.5B-OnlineDPO", logging_steps=10)
training_args = OnlineDPOConfig(output_dir="Qwen2-0.5B-OnlineDPO")
trainer = OnlineDPOTrainer(
model=model, judge=judge, args=training_args, processing_class=tokenizer, train_dataset=train_dataset
)
@ -125,7 +125,6 @@ python examples/scripts/dpo_online.py \
--judge pair_rm \
--dataset_name trl-lib/ultrafeedback-prompt \
--learning_rate 5.0e-7 \
--logging_steps 25 \
--output_dir Qwen2.5-0.5B-Online-DPO-PairRM \
--warmup_ratio 0.1 \
--push_to_hub
@ -171,7 +170,6 @@ accelerate launch --config_file examples/accelerate_configs/multi_gpu.yaml \
--max_new_tokens 53 \
--warmup_ratio 0.1 \
--missing_eos_penalty 1.0 \
--logging_steps 20 \
--save_steps 0.1 \
--push_to_hub
@ -191,7 +189,6 @@ accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml
--warmup_ratio 0.1 \
--missing_eos_penalty 1.0 \
--bf16 \
--logging_steps 20 \
--save_steps 0.1 \
--push_to_hub
@ -212,7 +209,6 @@ accelerate launch --config_file examples/accelerate_configs/deepspeed_zero2.yaml
--missing_eos_penalty 1.0 \
--bf16 \
--gradient_checkpointing \
--logging_steps 20 \
--save_steps 0.1 \
--push_to_hub
```

View File

@ -41,7 +41,7 @@ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
train_dataset = load_dataset("trl-lib/ultrafeedback_binarized", split="train")
training_args = ORPOConfig(output_dir="Qwen2-0.5B-ORPO", logging_steps=10)
training_args = ORPOConfig(output_dir="Qwen2-0.5B-ORPO")
trainer = ORPOTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```
@ -94,7 +94,6 @@ accelerate launch examples/scripts/orpo.py \
--model_name_or_path Qwen/Qwen2-0.5B-Instruct \
--dataset_name trl-lib/ultrafeedback_binarized \
--num_train_epochs 1 \
--logging_steps 25 \
--output_dir Qwen2-0.5B-ORPO
```

View File

@ -42,7 +42,7 @@ model = AutoModelForTokenClassification.from_pretrained("Qwen/Qwen2-0.5B", num_l
tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B")
train_dataset = load_dataset("trl-lib/math_shepherd", split="train[:10%]")
training_args = PRMConfig(output_dir="Qwen2-0.5B-Reward-Math-Sheperd", logging_steps=10)
training_args = PRMConfig(output_dir="Qwen2-0.5B-Reward-Math-Sheperd")
trainer = PRMTrainer(model=model, args=training_args, processing_class=tokenizer, train_dataset=train_dataset)
trainer.train()
```
@ -112,7 +112,6 @@ accelerate launch examples/scripts/prm.py \
--model_name_or_path Qwen/Qwen2-0.5B \
--dataset_name trl-lib/math_shepherd \
--num_train_epochs 1 \
--logging_steps 25 \
--output_dir Qwen2-0.5B-Reward-Math-Sheperd
```

View File

@ -253,7 +253,6 @@ training_args = SFTConfig(
gradient_accumulation_steps=4, # Number of steps before performing a backward/update pass to accumulate gradients. multi-image -> gradient_accumulation_steps=1
gradient_checkpointing=True, # Enable gradient checkpointing to reduce memory usage during training.
optim="adamw_torch_fused", # Use the fused AdamW optimizer for better performance.
logging_steps=10, # Frequency of logging training progress (log every 10 steps).
save_strategy="epoch", # Save checkpoints at the end of each epoch.
learning_rate=2e-05, # Learning rate for training.
bf16=True, # Enable bfloat16 precision for training to save memory and speed up computations.

View File

@ -39,7 +39,6 @@ training_args = GRPOConfig(
use_vllm=True,
bf16=True,
gradient_checkpointing=True,
logging_steps=10,
)
trainer = GRPOTrainer(

View File

@ -35,7 +35,7 @@ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
judge = PairRMJudge()
train_dataset = load_dataset("trl-lib/ultrafeedback-prompt", split="train")
training_args = XPOConfig(output_dir="Qwen2-0.5B-XPO", logging_steps=10)
training_args = XPOConfig(output_dir="Qwen2-0.5B-XPO")
trainer = XPOTrainer(
model=model, judge=judge, args=training_args, processing_class=tokenizer, train_dataset=train_dataset
)
@ -124,7 +124,6 @@ python examples/scripts/xpo.py \
--judge pair_rm \
--dataset_name trl-lib/ultrafeedback-prompt \
--learning_rate 5.0e-7 \
--logging_steps 25 \
--output_dir Qwen2.5-0.5B-XPO-PairRM \
--warmup_ratio 0.1 \
--push_to_hub

View File

@ -75,7 +75,6 @@ if __name__ == "__main__":
push_to_hub=True,
hub_model_id=config.hub_model_id,
output_dir=config.output_dir,
logging_steps=500,
save_steps=1000,
save_total_limit=2,
)

View File

@ -148,7 +148,6 @@ training_args = TrainingArguments(
label_names=[],
bf16=script_args.bf16,
logging_strategy="steps",
logging_steps=10,
optim=script_args.optim,
lr_scheduler_type=script_args.lr_scheduler_type,
seed=script_args.seed,

View File

@ -22,7 +22,6 @@ There were two main steps to the DPO training process:
accelerate launch examples/research_projects/stack_llama_2/scripts/sft_llama2.py \
--output_dir="./sft" \
--max_steps=500 \
--logging_steps=10 \
--save_steps=10 \
--per_device_train_batch_size=4 \
--per_device_eval_batch_size=1 \

View File

@ -26,7 +26,6 @@ python examples/scripts/bco.py \
--learning_rate 1e-6 \
--gradient_checkpointing \
--gradient_accumulation_steps 1 \
--logging_steps 0.01 \
--eval_steps 0.2 \
--save_strategy no \
--output_dir=bco-aligned-model \
@ -48,7 +47,6 @@ python examples/scripts/bco.py \
--learning_rate 1e-6 \
--gradient_checkpointing \
--gradient_accumulation_steps 1 \
--logging_steps 0.01 \
--eval_steps 0.2 \
--save_strategy no \
--output_dir=bco-aligned-model-lora \

View File

@ -24,7 +24,6 @@ python examples/scripts/cpo.py \
--max_steps 1000 \
--learning_rate 8e-6 \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir="gpt2-aligned-cpo" \
--warmup_steps 150 \
@ -41,7 +40,6 @@ python examples/scripts/cpo.py \
--max_steps 1000 \
--learning_rate 8e-5 \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir="gpt2-lora-aligned-cpo" \
--optim rmsprop \

View File

@ -22,7 +22,6 @@ python examples/scripts/gkd.py \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 8 \
--output_dir gkd-model \
--logging_steps 10 \
--num_train_epochs 1 \
--push_to_hub \
--gradient_checkpointing
@ -36,7 +35,6 @@ python examples/scripts/gkd.py \
--per_device_train_batch_size 4 \
--gradient_accumulation_steps 8 \
--output_dir gkd-model \
--logging_steps 10 \
--num_train_epochs 1 \
--push_to_hub \
--gradient_checkpointing \

View File

@ -24,7 +24,6 @@ python trl/scripts/kto.py \
--learning_rate 5e-7 \
--lr_scheduler_type=cosine \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir=kto-aligned-model \
--warmup_ratio 0.1 \
@ -41,7 +40,6 @@ python trl/scripts/kto.py \
--learning_rate 5e-7 \
--lr_scheduler_type=cosine \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir=kto-aligned-model-lora \
--warmup_ratio 0.1 \

View File

@ -24,7 +24,6 @@ python examples/scripts/orpo.py \
--max_steps 1000 \
--learning_rate 8e-6 \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir="gpt2-aligned-orpo" \
--warmup_steps 150 \
@ -41,7 +40,6 @@ python examples/scripts/orpo.py \
--max_steps 1000 \
--learning_rate 8e-5 \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir="gpt2-lora-aligned-orpo" \
--optim rmsprop \

View File

@ -22,7 +22,6 @@ python examples/scripts/prm.py \
--num_train_epochs 1 \
--gradient_checkpointing True \
--learning_rate 1.0e-5 \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 50
@ -35,7 +34,6 @@ python examples/scripts/prm.py \
--num_train_epochs 1 \
--gradient_checkpointing True \
--learning_rate 1.0e-4 \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 50
--use_peft \

View File

@ -22,7 +22,6 @@ python examples/scripts/reward_modeling.py \
--num_train_epochs 1 \
--gradient_checkpointing True \
--learning_rate 1.0e-5 \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 50 \
--max_length 2048
@ -36,7 +35,6 @@ python examples/scripts/reward_modeling.py \
--num_train_epochs 1 \
--gradient_checkpointing True \
--learning_rate 1.0e-4 \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 50 \
--max_length 2048 \

View File

@ -36,7 +36,6 @@ def main():
# Train model
training_args = SFTConfig(
output_dir=f"{model_id}-codeforces-SFT",
logging_steps=10,
bf16=True,
use_liger_kernel=True,
gradient_checkpointing=True,

View File

@ -27,7 +27,6 @@ accelerate launch \
--gradient_accumulation_steps=4 \
--num_train_epochs=4 \
--optim="adamw_torch_fused" \
--logging_steps=1 \
--log_level="debug" \
--log_level_replica="debug" \
--save_strategy="steps" \

View File

@ -23,7 +23,6 @@ python trl/scripts/dpo.py \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 8 \
--gradient_checkpointing \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 50 \
--output_dir Qwen2-0.5B-DPO \
@ -40,7 +39,6 @@ python trl/scripts/dpo.py \
--per_device_train_batch_size 2 \
--gradient_accumulation_steps 8 \
--gradient_checkpointing \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 50 \
--output_dir Qwen2-0.5B-DPO \

View File

@ -24,7 +24,6 @@ python trl/scripts/kto.py \
--learning_rate 5e-7 \
--lr_scheduler_type=cosine \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir=kto-aligned-model \
--warmup_ratio 0.1 \
@ -41,7 +40,6 @@ python trl/scripts/kto.py \
--learning_rate 5e-7 \
--lr_scheduler_type=cosine \
--gradient_accumulation_steps 1 \
--logging_steps 10 \
--eval_steps 500 \
--output_dir=kto-aligned-model-lora \
--warmup_ratio 0.1 \

View File

@ -24,7 +24,6 @@ python trl/scripts/sft.py \
--gradient_accumulation_steps 8 \
--gradient_checkpointing \
--eos_token '<|im_end|>' \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 100 \
--output_dir Qwen2-0.5B-SFT \
@ -41,7 +40,6 @@ python trl/scripts/sft.py \
--gradient_accumulation_steps 8 \
--gradient_checkpointing \
--eos_token '<|im_end|>' \
--logging_steps 25 \
--eval_strategy steps \
--eval_steps 100 \
--use_peft \