🤏 New models for tests (#2287)

* first commit

* uncomment

* other tests adaptations

* Remove unused variable in test_setup_chat_format

* Remove unused import statement

* style

* Add Bart model

* Update BCOTrainerTester class in test_bco_trainer.py

* Update model IDs and tokenizers in test files

* Add new models and processors

* Update model IDs in test files

* Fix formatting issue in test_dataset_formatting.py

* Refactor dataset formatting in test_dataset_formatting.py

* Fix dataset sequence length in SFTTrainerTester

* Remove tokenizer

* Remove print statement

* Add reward_model_path and sft_model_path to PPO trainer

* Fix tokenizer padding issue

* Add chat template for testing purposes in PaliGemma model

* Update PaliGemma model and chat template

* Increase learning rate to speed up test

* Update model names in run_dpo.sh and run_sft.sh scripts

* Update model and dataset names

* Fix formatting issue in test_dataset_formatting.py

* Fix formatting issue in test_dataset_formatting.py

* Remove unused chat template

* Update model generation script

* additional models

* Update model references in test files

* Remove unused imports in test_online_dpo_trainer.py

* Add is_llm_blender_available import and update reward_tokenizer

* Refactor test_online_dpo_trainer.py: Move skipped test case decorator

* remove models without chat templates

* Update model names in scripts and tests

* Update model_id in test_modeling_value_head.py

* Update model versions in test files

* Fix formatting issue in test_dataset_formatting.py

* Update embedding model ID in BCOTrainerTester

* Update test_online_dpo_trainer.py with reward model changes

* Update expected formatted text in test_dataset_formatting.py

* Add reward_tokenizer to TestOnlineDPOTrainer

* fix tests

* Add SIMPLE_CHAT_TEMPLATE to T5 tokenizer

* Fix dummy_text format in test_rloo_trainer.py

* Skip outdated test for chatML data collator

* Add new vision language models

* Commented out unused model IDs in test_vdpo_trainer

* Update model and vision configurations in generate_tiny_models.py and test_dpo_trainer.py

* Update model and tokenizer references

* Don't push if it already exists

* Add comment explaining test skip

* Fix model_exists function call and add new models

* Update LlavaForConditionalGeneration model and processor

* `qgallouedec` -> `trl-internal-testing`
This commit is contained in:
Quentin Gallouédec
2024-11-25 16:31:56 +01:00
committed by GitHub
parent ee3cbe1946
commit 453db5cd79
32 changed files with 482 additions and 275 deletions

View File

@ -2,7 +2,7 @@
# This script runs an SFT example end-to-end on a tiny model using different possible configurations # This script runs an SFT example end-to-end on a tiny model using different possible configurations
# but defaults to QLoRA + PEFT # but defaults to QLoRA + PEFT
OUTPUT_DIR="test_dpo/" OUTPUT_DIR="test_dpo/"
MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM" MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
DATASET_NAME="trl-internal-testing/hh-rlhf-helpful-base-trl-style" DATASET_NAME="trl-internal-testing/hh-rlhf-helpful-base-trl-style"
MAX_STEPS=5 MAX_STEPS=5
BATCH_SIZE=2 BATCH_SIZE=2

View File

@ -2,7 +2,7 @@
# This script runs an SFT example end-to-end on a tiny model using different possible configurations # This script runs an SFT example end-to-end on a tiny model using different possible configurations
# but defaults to QLoRA + PEFT # but defaults to QLoRA + PEFT
OUTPUT_DIR="test_sft/" OUTPUT_DIR="test_sft/"
MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM" MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
DATASET_NAME="stanfordnlp/imdb" DATASET_NAME="stanfordnlp/imdb"
MAX_STEPS=5 MAX_STEPS=5
BATCH_SIZE=2 BATCH_SIZE=2

View File

@ -23,7 +23,7 @@ We also recommend you passing a YAML config file to configure your training prot
```yaml ```yaml
model_name_or_path: model_name_or_path:
trl-internal-testing/tiny-random-LlamaForCausalLM Qwen/Qwen2.5-0.5B
dataset_name: dataset_name:
stanfordnlp/imdb stanfordnlp/imdb
report_to: report_to:

View File

@ -7,7 +7,7 @@
# CUDA_VISIBLE_DEVICES: 0 # CUDA_VISIBLE_DEVICES: 0
model_name_or_path: model_name_or_path:
trl-internal-testing/tiny-random-LlamaForCausalLM Qwen/Qwen2.5-0.5B
dataset_name: dataset_name:
stanfordnlp/imdb stanfordnlp/imdb
report_to: report_to:

View File

@ -0,0 +1,193 @@
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# This script generates tiny models used in the TRL library for unit tests. It pushes them to the Hub under the
# `trl-internal-testing` organization.
# This script is meant to be run when adding new tiny model to the TRL library.
from huggingface_hub import HfApi, ModelCard
from transformers import (
AutoProcessor,
AutoTokenizer,
BartConfig,
BartModel,
BloomConfig,
BloomForCausalLM,
CLIPVisionConfig,
CohereConfig,
CohereForCausalLM,
DbrxConfig,
DbrxForCausalLM,
FalconMambaConfig,
FalconMambaForCausalLM,
Gemma2Config,
Gemma2ForCausalLM,
GemmaConfig,
GemmaForCausalLM,
GPT2Config,
GPT2LMHeadModel,
GPTNeoXConfig,
GPTNeoXForCausalLM,
Idefics2Config,
Idefics2ForConditionalGeneration,
LlamaConfig,
LlamaForCausalLM,
LlavaConfig,
LlavaForConditionalGeneration,
LlavaNextConfig,
LlavaNextForConditionalGeneration,
MistralConfig,
MistralForCausalLM,
OPTConfig,
OPTForCausalLM,
PaliGemmaConfig,
PaliGemmaForConditionalGeneration,
Phi3Config,
Phi3ForCausalLM,
Qwen2Config,
Qwen2ForCausalLM,
SiglipVisionConfig,
T5Config,
T5ForConditionalGeneration,
)
from transformers.models.idefics2.configuration_idefics2 import Idefics2VisionConfig
ORGANIZATION = "trl-internal-testing"
MODEL_CARD = """
---
library_name: transformers
tags: [trl]
---
# Tiny {model_class_name}
This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
"""
api = HfApi()
def push_to_hub(model, tokenizer, suffix=None):
model_class_name = model.__class__.__name__
content = MODEL_CARD.format(model_class_name=model_class_name)
model_card = ModelCard(content)
repo_id = f"{ORGANIZATION}/tiny-{model_class_name}"
if suffix is not None:
repo_id += f"-{suffix}"
if api.repo_exists(repo_id):
print(f"Model {repo_id} already exists, skipping")
else:
model.push_to_hub(repo_id)
tokenizer.push_to_hub(repo_id)
model_card.push_to_hub(repo_id)
# Decoder models
for model_id, config_class, model_class, suffix in [
("bigscience/bloomz-560m", BloomConfig, BloomForCausalLM, None),
("CohereForAI/aya-expanse-8b", CohereConfig, CohereForCausalLM, None),
("databricks/dbrx-instruct", DbrxConfig, DbrxForCausalLM, None),
("tiiuae/falcon-7b-instruct", FalconMambaConfig, FalconMambaForCausalLM, None),
("google/gemma-2-2b-it", Gemma2Config, Gemma2ForCausalLM, None),
("google/gemma-7b-it", GemmaConfig, GemmaForCausalLM, None),
("openai-community/gpt2", GPT2Config, GPT2LMHeadModel, None),
("EleutherAI/pythia-14m", GPTNeoXConfig, GPTNeoXForCausalLM, None),
("meta-llama/Meta-Llama-3-8B-Instruct", LlamaConfig, LlamaForCausalLM, "3"),
("meta-llama/Llama-3.1-8B-Instruct", LlamaConfig, LlamaForCausalLM, "3.1"),
("meta-llama/Llama-3.2-1B-Instruct", LlamaConfig, LlamaForCausalLM, "3.2"),
("mistralai/Mistral-7B-Instruct-v0.1", MistralConfig, MistralForCausalLM, "0.1"),
("mistralai/Mistral-7B-Instruct-v0.2", MistralConfig, MistralForCausalLM, "0.2"),
("facebook/opt-1.3b", OPTConfig, OPTForCausalLM, None),
("microsoft/Phi-3.5-mini-instruct", Phi3Config, Phi3ForCausalLM, None),
("Qwen/Qwen2.5-32B-Instruct", Qwen2Config, Qwen2ForCausalLM, "2.5"),
("Qwen/Qwen2.5-Coder-0.5B", Qwen2Config, Qwen2ForCausalLM, "2.5-Coder"),
]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = config_class(
vocab_size=tokenizer.vocab_size + len(tokenizer.added_tokens_encoder.keys()),
hidden_size=8,
num_attention_heads=4,
num_key_value_heads=2,
num_hidden_layers=2,
intermediate_size=32,
)
model = model_class(config)
push_to_hub(model, tokenizer, suffix)
# Encoder-decoder models
for model_id, config_class, model_class, suffix in [
("google/flan-t5-small", T5Config, T5ForConditionalGeneration, None),
("facebook/bart-base", BartConfig, BartModel, None),
]:
tokenizer = AutoTokenizer.from_pretrained(model_id)
config = config_class(
vocab_size=tokenizer.vocab_size + len(tokenizer.added_tokens_encoder.keys()),
d_model=16,
encoder_layers=2,
decoder_layers=2,
d_kv=2,
d_ff=64,
num_layers=6,
num_heads=8,
decoder_start_token_id=0,
is_encoder_decoder=True,
)
model = model_class(config)
push_to_hub(model, tokenizer, suffix)
# Vision Language Models
# fmt: off
for model_id, config_class, text_config_class, vision_config_class, model_class in [
("HuggingFaceM4/idefics2-8b", Idefics2Config, MistralConfig, Idefics2VisionConfig, Idefics2ForConditionalGeneration),
("llava-hf/llava-1.5-7b-hf", LlavaConfig, LlamaConfig, CLIPVisionConfig, LlavaForConditionalGeneration),
("llava-hf/llava-v1.6-mistral-7b-hf", LlavaNextConfig, MistralConfig, CLIPVisionConfig, LlavaNextForConditionalGeneration),
("google/paligemma-3b-pt-224", PaliGemmaConfig, GemmaConfig, SiglipVisionConfig, PaliGemmaForConditionalGeneration),
]:
# fmt: on
processor = AutoProcessor.from_pretrained(model_id)
kwargs = {}
if config_class == PaliGemmaConfig:
kwargs["projection_dim"] = 8
vision_kwargs = {}
if vision_config_class in [CLIPVisionConfig, SiglipVisionConfig]:
vision_kwargs["projection_dim"] = 8
if vision_config_class == CLIPVisionConfig:
vision_kwargs["image_size"] = 336
vision_kwargs["patch_size"] = 14
config = config_class(
text_config=text_config_class(
vocab_size=processor.tokenizer.vocab_size + len(processor.tokenizer.added_tokens_encoder),
hidden_size=8,
num_attention_heads=4,
num_key_value_heads=2,
num_hidden_layers=2,
intermediate_size=32,
),
vision_config=vision_config_class(
hidden_size=8,
num_attention_heads=4,
num_hidden_layers=2,
intermediate_size=32,
**vision_kwargs,
),
**kwargs,
)
model = model_class(config)
push_to_hub(model, processor)

View File

@ -14,8 +14,8 @@
# TODO: push them under trl-org # TODO: push them under trl-org
MODELS_TO_TEST = [ MODELS_TO_TEST = [
"trl-internal-testing/tiny-random-LlamaForCausalLM", "trl-internal-testing/tiny-LlamaForCausalLM-3.2",
"HuggingFaceM4/tiny-random-MistralForCausalLM", "trl-internal-testing/tiny-MistralForCausalLM-0.2",
] ]
# We could have also not declared these variables but let's be verbose # We could have also not declared these variables but let's be verbose

View File

@ -30,30 +30,30 @@ from .testing_utils import require_no_wandb, require_sklearn
class BCOTrainerTester(unittest.TestCase): class BCOTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
# get t5 as seq2seq example: # get t5 as seq2seq example:
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
# get embedding model # get embedding model
model_id = "facebook/bart-base" model_id = "trl-internal-testing/tiny-BartModel"
self.embedding_model = AutoModel.from_pretrained(model_id) self.embedding_model = AutoModel.from_pretrained(model_id)
self.embedding_tokenizer = AutoTokenizer.from_pretrained(model_id) self.embedding_tokenizer = AutoTokenizer.from_pretrained(model_id)
@parameterized.expand( @parameterized.expand(
[ [
["gpt2", True, True, "standard_unpaired_preference"], ("qwen", True, True, "standard_unpaired_preference"),
["gpt2", True, False, "standard_unpaired_preference"], ("qwen", True, False, "standard_unpaired_preference"),
["gpt2", False, True, "standard_unpaired_preference"], ("qwen", False, True, "standard_unpaired_preference"),
["gpt2", False, False, "standard_unpaired_preference"], ("qwen", False, False, "standard_unpaired_preference"),
["gpt2", True, True, "conversational_unpaired_preference"], ("qwen", True, True, "conversational_unpaired_preference"),
] ]
) )
@require_sklearn @require_sklearn
@ -73,7 +73,7 @@ class BCOTrainerTester(unittest.TestCase):
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name) dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
if name == "gpt2": if name == "qwen":
model = self.model model = self.model
ref_model = self.ref_model ref_model = self.ref_model
tokenizer = self.tokenizer tokenizer = self.tokenizer
@ -160,9 +160,9 @@ class BCOTrainerTester(unittest.TestCase):
self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"]) self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"])
self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"]) self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"])
self.assertListEqual(tokenized_dataset["label"], train_dataset["label"]) self.assertListEqual(tokenized_dataset["label"], train_dataset["label"])
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [5377, 11141]) self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [31137])
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1, 1]) self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1])
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [318, 1365, 621, 8253, 13]) self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [374, 2664, 1091, 16965, 13])
self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1]) self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1])
fn_kwargs = { fn_kwargs = {
@ -178,15 +178,13 @@ class BCOTrainerTester(unittest.TestCase):
self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"]) self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"])
self.assertListEqual(processed_dataset["completion"], train_dataset["completion"]) self.assertListEqual(processed_dataset["completion"], train_dataset["completion"])
self.assertListEqual(processed_dataset["label"], train_dataset["label"]) self.assertListEqual(processed_dataset["label"], train_dataset["label"])
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [50256, 5377, 11141]) self.assertListEqual(processed_dataset["prompt_input_ids"][0], [31137])
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1, 1, 1]) self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1])
self.assertListEqual( self.assertListEqual(
processed_dataset["completion_input_ids"][0], [50256, 5377, 11141, 318, 1365, 621, 8253, 13, 50256] processed_dataset["completion_input_ids"][0], [31137, 374, 2664, 1091, 16965, 13, 151645]
)
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1, 1, 1])
self.assertListEqual(
processed_dataset["completion_labels"][0], [-100, -100, -100, 318, 1365, 621, 8253, 13, 50256]
) )
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1])
self.assertListEqual(processed_dataset["completion_labels"][0], [-100, 374, 2664, 1091, 16965, 13, 151645])
@require_sklearn @require_sklearn
def test_bco_trainer_without_providing_ref_model(self): def test_bco_trainer_without_providing_ref_model(self):

View File

@ -31,7 +31,7 @@ class BestOfNSamplerTester(unittest.TestCase):
Tests the BestOfNSampler class Tests the BestOfNSampler class
""" """
ref_model_name = "trl-internal-testing/dummy-GPT2-correct-vocab" ref_model_name = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
output_length_sampler = LengthSampler(2, 6) output_length_sampler = LengthSampler(2, 6)
model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name) model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name)
tokenizer = AutoTokenizer.from_pretrained(ref_model_name) tokenizer = AutoTokenizer.from_pretrained(ref_model_name)

View File

@ -60,9 +60,9 @@ class TrainerWithRefModel(Trainer):
class WinRateCallbackTester(unittest.TestCase): class WinRateCallbackTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
dataset["train"] = dataset["train"].select(range(8)) dataset["train"] = dataset["train"].select(range(8))
@ -219,8 +219,8 @@ class WinRateCallbackTester(unittest.TestCase):
@require_wandb @require_wandb
class LogCompletionsCallbackTester(unittest.TestCase): class LogCompletionsCallbackTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only") dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
dataset["train"] = dataset["train"].select(range(8)) dataset["train"] = dataset["train"].select(range(8))
@ -283,8 +283,8 @@ class LogCompletionsCallbackTester(unittest.TestCase):
) )
class MergeModelCallbackTester(unittest.TestCase): class MergeModelCallbackTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM") self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
def test_callback(self): def test_callback(self):

View File

@ -21,7 +21,7 @@ class CLITester(unittest.TestCase):
def test_sft_cli(self): def test_sft_cli(self):
try: try:
subprocess.run( subprocess.run(
"trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name stanfordnlp/imdb --learning_rate 1e-4 --lr_scheduler_type cosine", "trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name stanfordnlp/imdb --learning_rate 1e-4 --lr_scheduler_type cosine",
shell=True, shell=True,
check=True, check=True,
) )
@ -32,7 +32,7 @@ class CLITester(unittest.TestCase):
def test_dpo_cli(self): def test_dpo_cli(self):
try: try:
subprocess.run( subprocess.run(
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine", "trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
shell=True, shell=True,
check=True, check=True,
) )

View File

@ -21,29 +21,31 @@ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokeni
from transformers.testing_utils import require_peft from transformers.testing_utils import require_peft
from trl import CPOConfig, CPOTrainer from trl import CPOConfig, CPOTrainer
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
class CPOTrainerTester(unittest.TestCase): class CPOTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
# get t5 as seq2seq example: # get t5 as seq2seq example:
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
@parameterized.expand( @parameterized.expand(
[ [
["gpt2", "sigmoid", "standard_preference"], ("qwen", "sigmoid", "standard_preference"),
["t5", "hinge", "standard_implicit_prompt_preference"], ("t5", "hinge", "standard_implicit_prompt_preference"),
["gpt2", "ipo", "conversational_preference"], ("qwen", "ipo", "conversational_preference"),
["t5", "ipo", "conversational_implicit_prompt_preference"], ("t5", "ipo", "conversational_implicit_prompt_preference"),
["gpt2", "simpo", "standard_preference"], ("qwen", "simpo", "standard_preference"),
["t5", "simpo", "standard_implicit_prompt_preference"], ("t5", "simpo", "standard_implicit_prompt_preference"),
["gpt2", "hinge", "conversational_preference"], ("qwen", "hinge", "conversational_preference"),
] ]
) )
def test_cpo_trainer(self, name, loss_type, config_name): def test_cpo_trainer(self, name, loss_type, config_name):
@ -64,7 +66,7 @@ class CPOTrainerTester(unittest.TestCase):
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name) dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
if name == "gpt2": if name == "qwen":
model = self.model model = self.model
tokenizer = self.tokenizer tokenizer = self.tokenizer
elif name == "t5": elif name == "t5":

View File

@ -22,7 +22,7 @@ from trl import DataCollatorForCompletionOnlyLM
class DataCollatorForCompletionOnlyLMTester(unittest.TestCase): class DataCollatorForCompletionOnlyLMTester(unittest.TestCase):
def test_data_collator_finds_response_template_llama2_tokenizer(self): def test_data_collator_finds_response_template_llama2_tokenizer(self):
# this should ideally be tested with meta-llama/Llama-2-7b-hf # this should ideally be tested with meta-llama/Llama-2-7b-hf
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.instruction = """### System: You are a helpful assistant. self.instruction = """### System: You are a helpful assistant.
### User: How much is 2+2? ### User: How much is 2+2?
@ -83,7 +83,7 @@ class DataCollatorForCompletionOnlyLMTester(unittest.TestCase):
self.assertEqual(collator_text, expected_text) self.assertEqual(collator_text, expected_text)
def test_data_collator_handling_of_long_sequences(self): def test_data_collator_handling_of_long_sequences(self):
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.instruction = """### System: You are a helpful assistant. self.instruction = """### System: You are a helpful assistant.
### User: How much is 2+2? I'm asking because I'm not sure. And I'm not sure because I'm not good at math. ### User: How much is 2+2? I'm asking because I'm not sure. And I'm not sure because I'm not good at math.
@ -106,7 +106,7 @@ class DataCollatorForCompletionOnlyLMTester(unittest.TestCase):
self.assertTrue(result, "Not all values in the tensor are -100.") self.assertTrue(result, "Not all values in the tensor are -100.")
def test_padding_free(self): def test_padding_free(self):
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab") tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
if tokenizer.pad_token_id is None: if tokenizer.pad_token_id is None:
tokenizer.pad_token = tokenizer.eos_token tokenizer.pad_token = tokenizer.eos_token
tokenizer.pad_token_id = tokenizer.eos_token_id tokenizer.pad_token_id = tokenizer.eos_token_id

View File

@ -85,14 +85,18 @@ class IsConversationalTester(unittest.TestCase):
class ApplyChatTemplateTester(unittest.TestCase): class ApplyChatTemplateTester(unittest.TestCase):
tokenizers = [ tokenizers = [
"trl-internal-testing/tiny-random-Qwen2-7B-Instruct", "trl-internal-testing/tiny-CohereForCausalLM",
"trl-internal-testing/tiny-random-Meta-Llama-3.1-8B-Instruct", "trl-internal-testing/tiny-DbrxForCausalLM",
"trl-internal-testing/tiny-random-Meta-Llama-3-8B-Instruct", "trl-internal-testing/tiny-FalconMambaForCausalLM",
"trl-internal-testing/tiny-random-DeepSeek-Coder-V2-Instruct", "trl-internal-testing/tiny-Gemma2ForCausalLM",
"trl-internal-testing/tiny-random-Phi-3-mini-128k-instruct", "trl-internal-testing/tiny-GemmaForCausalLM",
"trl-internal-testing/tiny-random-gemma-2-9b-it", "trl-internal-testing/tiny-LlamaForCausalLM-3.1",
"trl-internal-testing/tiny-random-Mistral-7B-Instruct-v0.1", "trl-internal-testing/tiny-LlamaForCausalLM-3.2",
"trl-internal-testing/tiny-random-Mistral-7B-Instruct-v0.2", "trl-internal-testing/tiny-LlamaForCausalLM-3",
"trl-internal-testing/tiny-MistralForCausalLM-0.1",
"trl-internal-testing/tiny-MistralForCausalLM-0.2",
"trl-internal-testing/tiny-Phi3ForCausalLM",
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
] ]
conversational_examples = [ conversational_examples = [

View File

@ -24,8 +24,8 @@ from trl.models.utils import ChatMlSpecialTokens, setup_chat_format
class DatasetFormattingTestCase(unittest.TestCase): class DatasetFormattingTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") self.llama_tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-MistralForCausalLM-0.1")
self.chatml_tokenizer = AutoTokenizer.from_pretrained("philschmid/gpt2-chatml-tokenizer") self.chatml_tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
def test_get_formatting_func_from_dataset_with_chatml_messages(self): def test_get_formatting_func_from_dataset_with_chatml_messages(self):
dataset = Dataset.from_dict( dataset = Dataset.from_dict(
@ -44,7 +44,7 @@ class DatasetFormattingTestCase(unittest.TestCase):
formatting_func = get_formatting_func_from_dataset(dataset, self.llama_tokenizer) formatting_func = get_formatting_func_from_dataset(dataset, self.llama_tokenizer)
self.assertIsInstance(formatting_func, Callable) self.assertIsInstance(formatting_func, Callable)
formatted_text = formatting_func(dataset[0]) formatted_text = formatting_func(dataset[0])
expected = "<s>[INST] <<SYS>>\nYou are helpful\n<</SYS>>\n\nHello [/INST] Hi, how can I help you? </s>" expected = "<s> [INST] You are helpful\n\nHello [/INST] Hi, how can I help you?</s>"
self.assertEqual(formatted_text, expected) self.assertEqual(formatted_text, expected)
formatted_text = formatting_func(dataset[0:1]) formatted_text = formatting_func(dataset[0:1])
self.assertListEqual(formatted_text, [expected]) self.assertListEqual(formatted_text, [expected])
@ -73,7 +73,7 @@ class DatasetFormattingTestCase(unittest.TestCase):
formatting_func = get_formatting_func_from_dataset(dataset, self.llama_tokenizer) formatting_func = get_formatting_func_from_dataset(dataset, self.llama_tokenizer)
self.assertIsInstance(formatting_func, Callable) self.assertIsInstance(formatting_func, Callable)
formatted_text = formatting_func(dataset[0]) formatted_text = formatting_func(dataset[0])
expected = "<s>[INST] <<SYS>>\nYou are helpful\n<</SYS>>\n\nHello [/INST] Hi, how can I help you? </s>" expected = "<s> [INST] You are helpful\n\nHello [/INST] Hi, how can I help you?</s>"
self.assertEqual(formatted_text, expected) self.assertEqual(formatted_text, expected)
formatted_text = formatting_func(dataset[0:1]) formatted_text = formatting_func(dataset[0:1])
self.assertListEqual(formatted_text, [expected]) self.assertListEqual(formatted_text, [expected])
@ -94,9 +94,9 @@ class DatasetFormattingTestCase(unittest.TestCase):
self.assertIsNotNone(formatting_func) self.assertIsNotNone(formatting_func)
self.assertIsInstance(formatting_func, Callable) self.assertIsInstance(formatting_func, Callable)
formatted_text = formatting_func(dataset[0]) formatted_text = formatting_func(dataset[0])
self.assertEqual(formatted_text, "<s>[INST] What is 2+2? [/INST] 4 </s>") self.assertEqual(formatted_text, "<s> [INST] What is 2+2? [/INST] 4</s>")
formatted_text = formatting_func(dataset[0:1]) formatted_text = formatting_func(dataset[0:1])
self.assertListEqual(formatted_text, ["<s>[INST] What is 2+2? [/INST] 4 </s>"]) self.assertListEqual(formatted_text, ["<s> [INST] What is 2+2? [/INST] 4</s>"])
def test_get_formatting_func_from_dataset_from_hub(self): def test_get_formatting_func_from_dataset_from_hub(self):
ds_1 = load_dataset("philschmid/trl-test-instruction", split="train") ds_1 = load_dataset("philschmid/trl-test-instruction", split="train")
@ -117,13 +117,12 @@ class DatasetFormattingTestCase(unittest.TestCase):
class SetupChatFormatTestCase(unittest.TestCase): class SetupChatFormatTestCase(unittest.TestCase):
def setUp(self): def setUp(self):
self.tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
self.model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM") self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
# remove built-in chat_template to simulate a model having no chat_template # remove built-in chat_template to simulate a model having no chat_template
self.tokenizer.chat_template = None self.tokenizer.chat_template = None
def test_setup_chat_format(self): def test_setup_chat_format(self):
original_tokenizer_len = len(self.tokenizer)
modified_model, modified_tokenizer = setup_chat_format( modified_model, modified_tokenizer = setup_chat_format(
self.model, self.tokenizer, format="chatml", resize_to_multiple_of=64 self.model, self.tokenizer, format="chatml", resize_to_multiple_of=64
) )
@ -136,9 +135,7 @@ class SetupChatFormatTestCase(unittest.TestCase):
self.assertEqual(modified_tokenizer.eos_token, _chatml.eos_token) self.assertEqual(modified_tokenizer.eos_token, _chatml.eos_token)
self.assertEqual(modified_tokenizer.pad_token, _chatml.pad_token) self.assertEqual(modified_tokenizer.pad_token, _chatml.pad_token)
self.assertEqual(modified_tokenizer.bos_token, _chatml.bos_token) self.assertEqual(modified_tokenizer.bos_token, _chatml.bos_token)
self.assertEqual(len(modified_tokenizer), (original_tokenizer_len + 2))
self.assertEqual((self.model.get_input_embeddings().weight.shape[0] % 64), 0) self.assertEqual((self.model.get_input_embeddings().weight.shape[0] % 64), 0)
self.assertEqual(self.model.get_input_embeddings().weight.shape[0], (original_tokenizer_len + 64))
def test_example_with_setup_model(self): def test_example_with_setup_model(self):
modified_model, modified_tokenizer = setup_chat_format( modified_model, modified_tokenizer = setup_chat_format(

View File

@ -158,40 +158,40 @@ class TestTokenizeRow(unittest.TestCase):
class DPOTrainerTester(unittest.TestCase): class DPOTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
# get t5 as seq2seq example: # get t5 as seq2seq example:
model_id = "trl-internal-testing/T5ForConditionalGeneration-correct-vocab-calibrated" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
@parameterized.expand( @parameterized.expand(
[ [
["gpt2", "sigmoid", True], ("qwen", "sigmoid", True),
["t5", "hinge", False], ("t5", "hinge", False),
["gpt2", "ipo", False], ("qwen", "ipo", False),
["t5", "ipo", True], ("t5", "ipo", True),
["gpt2", "aot_pair", True], ("qwen", "aot_pair", True),
["t5", "aot_pair", False], ("t5", "aot_pair", False),
["gpt2", "aot", True], ("qwen", "aot", True),
["t5", "aot", False], ("t5", "aot", False),
["gpt2", "bco_pair", False], ("qwen", "bco_pair", False),
["t5", "bco_pair", True], ("t5", "bco_pair", True),
["gpt2", "sppo_hard", False], ("qwen", "sppo_hard", False),
["t5", "sppo_hard", True], ("t5", "sppo_hard", True),
["gpt2", "nca_pair", False], ("qwen", "nca_pair", False),
["t5", "nca_pair", True], ("t5", "nca_pair", True),
["gpt2", "robust", True], ("qwen", "robust", True),
["gpt2", "exo_pair", False], ("qwen", "exo_pair", False),
["t5", "exo_pair", True], ("t5", "exo_pair", True),
["gpt2", "apo_zero", True], ("qwen", "apo_zero", True),
["t5", "apo_down", False], ("t5", "apo_down", False),
["gpt2", "discopop", False], ("qwen", "discopop", False),
] ]
) )
def test_dpo_trainer(self, name, loss_type, pre_compute): def test_dpo_trainer(self, name, loss_type, pre_compute):
@ -212,7 +212,7 @@ class DPOTrainerTester(unittest.TestCase):
dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference") dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference")
if name == "gpt2": if name == "qwen":
model = self.model model = self.model
ref_model = self.ref_model ref_model = self.ref_model
tokenizer = self.tokenizer tokenizer = self.tokenizer
@ -286,8 +286,8 @@ class DPOTrainerTester(unittest.TestCase):
@parameterized.expand( @parameterized.expand(
[ [
[None, "Test when rpo_alpha is set to None"], (None, "Test when rpo_alpha is set to None"),
[0.5, "Test when rpo_alpha is set to 0.5"], (0.5, "Test when rpo_alpha is set to 0.5"),
] ]
) )
def test_dpo_trainer_without_providing_ref_model(self, rpo_alpha, _): def test_dpo_trainer_without_providing_ref_model(self, rpo_alpha, _):
@ -609,7 +609,7 @@ class DPOTrainerTester(unittest.TestCase):
# Note this test only works on compute capability > 7 GPU devices # Note this test only works on compute capability > 7 GPU devices
from peft import LoraConfig from peft import LoraConfig
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM" model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id)
lora_config = LoraConfig( lora_config = LoraConfig(
@ -659,36 +659,36 @@ class DPOTrainerTester(unittest.TestCase):
@parameterized.expand( @parameterized.expand(
[ [
["gpt2", "sigmoid", False, False], ("sigmoid", False, False),
["gpt2", "sigmoid", False, True], ("sigmoid", False, True),
["gpt2", "sigmoid", True, False], ("sigmoid", True, False),
["gpt2", "sigmoid", True, True], ("sigmoid", True, True),
["gpt2", "ipo", False, False], ("ipo", False, False),
["gpt2", "ipo", False, True], ("ipo", False, True),
["gpt2", "ipo", True, False], ("ipo", True, False),
["gpt2", "ipo", True, True], ("ipo", True, True),
["gpt2", "aot_pair", False, False], ("aot_pair", False, False),
["gpt2", "aot_pair", False, True], ("aot_pair", False, True),
["gpt2", "aot_pair", True, False], ("aot_pair", True, False),
["gpt2", "aot_pair", True, True], ("aot_pair", True, True),
["gpt2", "aot", False, False], ("aot", False, False),
["gpt2", "aot", False, True], ("aot", False, True),
["gpt2", "aot", True, False], ("aot", True, False),
["gpt2", "aot", True, True], ("aot", True, True),
["gpt2", "bco_pair", False, False], ("bco_pair", False, False),
["gpt2", "bco_pair", False, True], ("bco_pair", False, True),
["gpt2", "bco_pair", True, False], ("bco_pair", True, False),
["gpt2", "bco_pair", True, True], ("bco_pair", True, True),
["gpt2", "robust", False, False], ("robust", False, False),
["gpt2", "robust", False, True], ("robust", False, True),
["gpt2", "robust", True, False], ("robust", True, False),
["gpt2", "robust", True, True], ("robust", True, True),
] ]
) )
@require_bitsandbytes @require_bitsandbytes
@require_peft @require_peft
@unittest.skip("You need a GPU with bf16 support in order to run these tests") @unittest.skip("You need a GPU with bf16 support in order to run these tests")
def test_dpo_lora_bf16_autocast(self, name, loss_type, pre_compute, gen_during_eval): def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval):
# Note this test only works on compute capability > 7 GPU devices # Note this test only works on compute capability > 7 GPU devices
from peft import LoraConfig from peft import LoraConfig
@ -743,7 +743,7 @@ class DPOTrainerTester(unittest.TestCase):
def test_dpo_lora_tags(self): def test_dpo_lora_tags(self):
from peft import LoraConfig from peft import LoraConfig
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM" model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id)
lora_config = LoraConfig( lora_config = LoraConfig(
@ -788,7 +788,7 @@ class DPOTrainerTester(unittest.TestCase):
@require_peft @require_peft
def test_dpo_tags(self): def test_dpo_tags(self):
model_id = "HuggingFaceM4/tiny-random-LlamaForCausalLM" model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id)
# lora model # lora model
@ -964,7 +964,7 @@ class DPOTrainerTester(unittest.TestCase):
) )
def test_dpo_loss_alpha_div_f(self): def test_dpo_loss_alpha_div_f(self):
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM" model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id)
# lora model # lora model
@ -1007,7 +1007,7 @@ class DPOTrainerTester(unittest.TestCase):
self.assertTrue(torch.isfinite(losses).cpu().numpy().all()) self.assertTrue(torch.isfinite(losses).cpu().numpy().all())
def test_dpo_loss_js_div_f(self): def test_dpo_loss_js_div_f(self):
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM" model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id)
# lora model # lora model
@ -1050,8 +1050,9 @@ class DPOTrainerTester(unittest.TestCase):
self.assertTrue(torch.isfinite(losses).cpu().numpy().all()) self.assertTrue(torch.isfinite(losses).cpu().numpy().all())
def test_dpo_trainer_use_num_logits_to_keep(self): def test_dpo_trainer_use_num_logits_to_keep(self):
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM" model_id = "trl-internal-testing/tiny-LlamaForCausalLM-3.2"
tokenizer = AutoTokenizer.from_pretrained(model_id) tokenizer = AutoTokenizer.from_pretrained(model_id)
tokenizer.pad_token = tokenizer.eos_token
model = AutoModelForCausalLM.from_pretrained(model_id) model = AutoModelForCausalLM.from_pretrained(model_id)
@ -1135,9 +1136,10 @@ class DPOTrainerTester(unittest.TestCase):
class DPOVisionTrainerTester(unittest.TestCase): class DPOVisionTrainerTester(unittest.TestCase):
@parameterized.expand( @parameterized.expand(
[ [
["trl-internal-testing/tiny-random-idefics2"], ("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",),
["trl-internal-testing/tiny-random-paligemma"], # ("trl-internal-testing/tiny-PaliGemmaForConditionalGeneration",),
["trl-internal-testing/tiny-random-llava-1.5"], ("trl-internal-testing/tiny-LlavaForConditionalGeneration",),
# ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",),
] ]
) )
def test_vdpo_trainer(self, model_id): def test_vdpo_trainer(self, model_id):
@ -1185,9 +1187,8 @@ class DPOVisionTrainerTester(unittest.TestCase):
training_args = DPOConfig( training_args = DPOConfig(
output_dir=tmp_dir, output_dir=tmp_dir,
per_device_train_batch_size=2, per_device_train_batch_size=2,
max_length=512,
max_prompt_length=512,
remove_unused_columns=False, remove_unused_columns=False,
learning_rate=0.01, # increase learning rate to speed up test
report_to="none", report_to="none",
) )
trainer = DPOTrainer( trainer = DPOTrainer(
@ -1210,8 +1211,8 @@ class DPOVisionTrainerTester(unittest.TestCase):
for n, param in previous_trainable_params.items(): for n, param in previous_trainable_params.items():
new_param = trainer.model.get_parameter(n) new_param = trainer.model.get_parameter(n)
if param.sum() != 0: # ignore 0 biases if param.sum() != 0: # ignore 0 biases
if model_id == "trl-internal-testing/tiny-random-llava-1.5" and ( if model_id == "trl-internal-testing/tiny-LlavaForConditionalGeneration" and (
n.startswith("vision_tower.vision_model.encoder.layers.3") n.startswith("vision_tower.vision_model.encoder.layers.1")
or n == "vision_tower.vision_model.post_layernorm.weight" or n == "vision_tower.vision_model.post_layernorm.weight"
): ):
# For some reason, these params are not updated. This is probably not related to TRL, but to # For some reason, these params are not updated. This is probably not related to TRL, but to

View File

@ -96,7 +96,7 @@ class TextHistoryTest(unittest.TestCase):
class TextEnvironmentTester(unittest.TestCase): class TextEnvironmentTester(unittest.TestCase):
def setUp(self): def setUp(self):
# model_id # model_id
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
# get models and tokenizer # get models and tokenizer
self.gpt2_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.model_id) self.gpt2_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.model_id)
@ -258,7 +258,7 @@ class TextEnvironmentTester(unittest.TestCase):
task_2 = "Hello there! General Kenobi!" task_2 = "Hello there! General Kenobi!"
query, response, response_mask, reward, histories = env.run([task_1, task_2]) query, response, response_mask, reward, histories = env.run([task_1, task_2])
self.assertEqual(len(query[0]), 9) self.assertEqual(len(query[0]), 8)
self.assertEqual(len(query[1]), 12) self.assertEqual(len(query[1]), 12)
self.assertEqual(len(response[0]), 14) self.assertEqual(len(response[0]), 14)
self.assertEqual(len(response[1]), 14) self.assertEqual(len(response[1]), 14)

View File

@ -27,9 +27,10 @@ from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
class TestGKDTrainer(unittest.TestCase): class TestGKDTrainer(unittest.TestCase):
@classmethod @classmethod
def setUpClass(cls): def setUpClass(cls):
cls.tokenizer = AutoTokenizer.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
cls.tokenizer = AutoTokenizer.from_pretrained(model_id)
cls.tokenizer.pad_token = cls.tokenizer.eos_token cls.tokenizer.pad_token = cls.tokenizer.eos_token
cls.model = AutoModelForCausalLM.from_pretrained("gpt2") cls.model = AutoModelForCausalLM.from_pretrained(model_id)
cls.generation_config = GenerationConfig( cls.generation_config = GenerationConfig(
max_new_tokens=20, max_new_tokens=20,
num_return_sequences=1, num_return_sequences=1,
@ -201,7 +202,7 @@ class TestGeneralizedJSDLoss(unittest.TestCase):
class GKDTrainerTester(unittest.TestCase): class GKDTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.teacher_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.teacher_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)

View File

@ -25,13 +25,13 @@ from trl import IterativeSFTTrainer
class IterativeTrainerTester(unittest.TestCase): class IterativeTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
# get t5 as seq2seq example: # get t5 as seq2seq example:
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab-calibrated" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
@ -70,8 +70,8 @@ class IterativeTrainerTester(unittest.TestCase):
@parameterized.expand( @parameterized.expand(
[ [
["gpt2", "tensor"], ["qwen", "tensor"],
["gpt2", "text"], ["qwen", "text"],
["t5", "tensor"], ["t5", "tensor"],
["t5", "text"], ["t5", "text"],
] ]
@ -93,7 +93,7 @@ class IterativeTrainerTester(unittest.TestCase):
"texts_labels": dummy_dataset["texts_labels"], "texts_labels": dummy_dataset["texts_labels"],
} }
if model_name == "gpt2": if model_name == "qwen":
model = self.model model = self.model
tokenizer = self.tokenizer tokenizer = self.tokenizer
else: else:

View File

@ -28,27 +28,27 @@ from .testing_utils import require_no_wandb
class KTOTrainerTester(unittest.TestCase): class KTOTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
# get t5 as seq2seq example: # get t5 as seq2seq example:
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
@parameterized.expand( @parameterized.expand(
[ [
("gpt2", "standard_preference", "kto", True, True), ("qwen", "standard_preference", "kto", True, True),
# ("t5", "standard_implicit_prompt_preference", "kto", True, False), # KTO broken for enc-dec # ("t5", "standard_implicit_prompt_preference", "kto", True, False), # KTO broken for enc-dec
("gpt2", "standard_unpaired_preference", "kto", False, True), ("qwen", "standard_unpaired_preference", "kto", False, True),
# ("t5", "conversational_preference", "kto", False, False), # ("t5", "conversational_preference", "kto", False, False),
("gpt2", "conversational_implicit_prompt_preference", "apo_zero_unpaired", True, True), ("qwen", "conversational_implicit_prompt_preference", "apo_zero_unpaired", True, True),
# ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", True, False), # ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", True, False),
("gpt2", "standard_unpaired_preference", "apo_zero_unpaired", False, True), ("qwen", "standard_unpaired_preference", "apo_zero_unpaired", False, True),
# ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", False, False), # ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", False, False),
] ]
) )
@ -70,7 +70,7 @@ class KTOTrainerTester(unittest.TestCase):
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name) dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
if name == "gpt2": if name == "qwen":
model = self.model model = self.model
ref_model = self.ref_model ref_model = self.ref_model
tokenizer = self.tokenizer tokenizer = self.tokenizer
@ -156,9 +156,9 @@ class KTOTrainerTester(unittest.TestCase):
self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"]) self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"])
self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"]) self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"])
self.assertListEqual(tokenized_dataset["label"], train_dataset["label"]) self.assertListEqual(tokenized_dataset["label"], train_dataset["label"])
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [5377, 11141]) self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [31137])
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1, 1]) self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1])
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [318, 1365, 621, 8253, 13]) self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [374, 2664, 1091, 16965, 13])
self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1]) self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1])
# Test corruption of (prompt, completion) pairs for KL dataset # Test corruption of (prompt, completion) pairs for KL dataset
@ -196,15 +196,13 @@ class KTOTrainerTester(unittest.TestCase):
self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"]) self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"])
self.assertListEqual(processed_dataset["completion"], train_dataset["completion"]) self.assertListEqual(processed_dataset["completion"], train_dataset["completion"])
self.assertListEqual(processed_dataset["label"], train_dataset["label"]) self.assertListEqual(processed_dataset["label"], train_dataset["label"])
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [50256, 5377, 11141]) self.assertListEqual(processed_dataset["prompt_input_ids"][0], [31137])
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1, 1, 1]) self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1])
self.assertListEqual( self.assertListEqual(
processed_dataset["completion_input_ids"][0], [50256, 5377, 11141, 318, 1365, 621, 8253, 13, 50256] processed_dataset["completion_input_ids"][0], [31137, 374, 2664, 1091, 16965, 13, 151645]
)
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1, 1, 1])
self.assertListEqual(
processed_dataset["completion_labels"][0], [-100, -100, -100, 318, 1365, 621, 8253, 13, 50256]
) )
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1])
self.assertListEqual(processed_dataset["completion_labels"][0], [-100, 374, 2664, 1091, 16965, 13, 151645])
def test_kto_trainer_without_providing_ref_model(self): def test_kto_trainer_without_providing_ref_model(self):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:

View File

@ -21,9 +21,10 @@ from trl.models.modeling_base import GeometricMixtureWrapper, create_reference_m
class TestGeometricMixtureWrapper(unittest.TestCase): class TestGeometricMixtureWrapper(unittest.TestCase):
def setUp(self): def setUp(self):
self.model = AutoModelForCausalLM.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(model_id)
self.ref_model = create_reference_model(self.model) self.ref_model = create_reference_model(self.model)
self.generation_config = GenerationConfig.from_pretrained("gpt2") self.generation_config = GenerationConfig.from_pretrained(model_id)
self.mixture_coef = 0.5 self.mixture_coef = 0.5
self.wrapper = GeometricMixtureWrapper( self.wrapper = GeometricMixtureWrapper(
self.model, self.ref_model, self.generation_config, mixture_coef=self.mixture_coef self.model, self.ref_model, self.generation_config, mixture_coef=self.mixture_coef

View File

@ -24,38 +24,27 @@ from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValu
ALL_CAUSAL_LM_MODELS = [ ALL_CAUSAL_LM_MODELS = [
"trl-internal-testing/tiny-random-CodeGenForCausalLM", "trl-internal-testing/tiny-BloomForCausalLM",
"trl-internal-testing/tiny-random-GPTJForCausalLM", "trl-internal-testing/tiny-CohereForCausalLM",
"trl-internal-testing/tiny-random-GPTNeoForCausalLM", "trl-internal-testing/tiny-DbrxForCausalLM",
"trl-internal-testing/tiny-random-GPTNeoXForCausalLM", "trl-internal-testing/tiny-FalconMambaForCausalLM",
"trl-internal-testing/tiny-random-OPTForCausalLM", "trl-internal-testing/tiny-Gemma2ForCausalLM",
"trl-internal-testing/tiny-random-BloomForCausalLM", "trl-internal-testing/tiny-GemmaForCausalLM",
"trl-internal-testing/tiny-random-GPT2LMHeadModel", "trl-internal-testing/tiny-GPT2LMHeadModel",
"trl-internal-testing/tiny-random-CodeGenForCausalLM-sharded", "trl-internal-testing/tiny-GPTNeoXForCausalLM",
"trl-internal-testing/tiny-random-GPTNeoXForCausalLM-safetensors-sharded", "trl-internal-testing/tiny-LlamaForCausalLM-3.1",
"trl-internal-testing/tiny-random-GPTNeoXForCausalLM-safetensors", "trl-internal-testing/tiny-LlamaForCausalLM-3.2",
"trl-internal-testing/tiny-random-LlamaForCausalLM", "trl-internal-testing/tiny-LlamaForCausalLM-3",
"trl-internal-testing/tiny-MistralForCausalLM-0.1",
"trl-internal-testing/tiny-MistralForCausalLM-0.2",
"trl-internal-testing/tiny-OPTForCausalLM",
"trl-internal-testing/tiny-Phi3ForCausalLM",
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
] ]
ALL_SEQ2SEQ_MODELS = [ ALL_SEQ2SEQ_MODELS = [
"trl-internal-testing/tiny-random-BartForConditionalGeneration", "trl-internal-testing/tiny-T5ForConditionalGeneration",
"trl-internal-testing/tiny-random-BigBirdPegasusForConditionalGeneration", "trl-internal-testing/tiny-BartModel",
"trl-internal-testing/tiny-random-BlenderbotForConditionalGeneration",
"trl-internal-testing/tiny-random-BlenderbotSmallForConditionalGeneration",
"trl-internal-testing/tiny-random-FSMTForConditionalGeneration",
"trl-internal-testing/tiny-random-LEDForConditionalGeneration",
"trl-internal-testing/tiny-random-LongT5ForConditionalGeneration",
"trl-internal-testing/tiny-random-M2M100ForConditionalGeneration",
"trl-internal-testing/tiny-random-MarianMTModel",
"trl-internal-testing/tiny-random-MBartForConditionalGeneration",
"trl-internal-testing/tiny-random-MT5ForConditionalGeneration",
"trl-internal-testing/tiny-random-MvpForConditionalGeneration",
"trl-internal-testing/tiny-random-PegasusForConditionalGeneration",
"trl-internal-testing/tiny-random-PegasusXForConditionalGeneration",
"trl-internal-testing/tiny-random-PLBartForConditionalGeneration",
"trl-internal-testing/tiny-random-ProphetNetForConditionalGeneration",
"trl-internal-testing/tiny-random-SwitchTransformersForConditionalGeneration",
"trl-internal-testing/tiny-random-T5ForConditionalGeneration",
] ]
@ -278,7 +267,7 @@ class CausalLMValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCas
def test_raise_error_not_causallm(self): def test_raise_error_not_causallm(self):
# Test with a model without a LM head # Test with a model without a LM head
model_id = "trl-internal-testing/tiny-random-GPT2Model" model_id = "trl-internal-testing/tiny-GPT2LMHeadModel"
# This should raise a ValueError # This should raise a ValueError
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
pretrained_model = AutoModelForCausalLM.from_pretrained(model_id) pretrained_model = AutoModelForCausalLM.from_pretrained(model_id)
@ -405,7 +394,7 @@ class Seq2SeqValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCase
def test_raise_error_not_causallm(self): def test_raise_error_not_causallm(self):
# Test with a model without a LM head # Test with a model without a LM head
model_id = "trl-internal-testing/tiny-random-T5Model" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
# This should raise a ValueError # This should raise a ValueError
with self.assertRaises(ValueError): with self.assertRaises(ValueError):
pretrained_model = AutoModel.from_pretrained(model_id) pretrained_model = AutoModel.from_pretrained(model_id)
@ -442,10 +431,6 @@ class Seq2SeqValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCase
lm_head_namings = self.trl_model_class.lm_head_namings lm_head_namings = self.trl_model_class.lm_head_namings
if model_name == "trl-internal-testing/tiny-random-FSMTForConditionalGeneration":
# skip the test for FSMT as it does not support mixed-prec
continue
self.assertTrue( self.assertTrue(
any(hasattr(trl_model.pretrained_model, lm_head_naming) for lm_head_naming in lm_head_namings) any(hasattr(trl_model.pretrained_model, lm_head_naming) for lm_head_naming in lm_head_namings)
) )
@ -462,34 +447,32 @@ class Seq2SeqValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCase
class ReferenceModelTest(unittest.TestCase): class ReferenceModelTest(unittest.TestCase):
def setUp(self): def setUp(self):
self.model = AutoModelForCausalLMWithValueHead.from_pretrained( self.model = AutoModelForCausalLMWithValueHead.from_pretrained("trl-internal-testing/tiny-GPT2LMHeadModel")
"trl-internal-testing/tiny-random-GPT2LMHeadModel"
)
self.test_input = torch.tensor([[0, 1, 2, 3]]) self.test_input = torch.tensor([[0, 1, 2, 3]])
self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=1) self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=1)
self.layer_format = "pretrained_model.transformer.h.{layer}.attn.c_attn.weight" self.layer_format = "pretrained_model.transformer.h.{layer}.attn.c_attn.weight"
def test_independent_reference(self): def test_independent_reference(self):
layer_0 = self.layer_format.format(layer=0) layer_0 = self.layer_format.format(layer=0)
layer_5 = self.layer_format.format(layer=4) layer_1 = self.layer_format.format(layer=1)
ref_model = create_reference_model(self.model) ref_model = create_reference_model(self.model)
first_layer_before = self.model.get_parameter(layer_0).data.clone() first_layer_before = self.model.get_parameter(layer_0).data.clone()
last_layer_before = self.model.get_parameter(layer_5).data.clone() last_layer_before = self.model.get_parameter(layer_1).data.clone() # the model only has 2 layers
first_ref_layer_before = ref_model.get_parameter(layer_0).data.clone() first_ref_layer_before = ref_model.get_parameter(layer_0).data.clone()
last_ref_layer_before = ref_model.get_parameter(layer_5).data.clone() last_ref_layer_before = ref_model.get_parameter(layer_1).data.clone()
output = self.model(input_ids=self.test_input, labels=self.test_input) output = self.model(input_ids=self.test_input, labels=self.test_input)
output[1].backward() output[1].backward()
self.optimizer.step() self.optimizer.step()
first_layer_after = self.model.get_parameter(layer_0).data.clone() first_layer_after = self.model.get_parameter(layer_0).data.clone()
last_layer_after = self.model.get_parameter(layer_5).data.clone() last_layer_after = self.model.get_parameter(layer_1).data.clone()
first_ref_layer_after = ref_model.get_parameter(layer_0).data.clone() first_ref_layer_after = ref_model.get_parameter(layer_0).data.clone()
last_ref_layer_after = ref_model.get_parameter(layer_5).data.clone() last_ref_layer_after = ref_model.get_parameter(layer_1).data.clone()
# before optimization ref and model are identical # before optimization ref and model are identical
self.assertTrue((first_layer_before == first_ref_layer_before).all()) self.assertTrue((first_layer_before == first_ref_layer_before).all())

View File

@ -31,10 +31,10 @@ if is_peft_available():
class TestNashMDTrainer(unittest.TestCase): class TestNashMDTrainer(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1) self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.model_id, num_labels=1)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token

View File

@ -21,7 +21,6 @@ from transformers.testing_utils import require_peft
from transformers.utils import is_peft_available from transformers.utils import is_peft_available
from trl import OnlineDPOConfig, OnlineDPOTrainer, is_llm_blender_available from trl import OnlineDPOConfig, OnlineDPOTrainer, is_llm_blender_available
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
from .testing_utils import RandomPairwiseJudge from .testing_utils import RandomPairwiseJudge
@ -32,16 +31,17 @@ if is_peft_available():
class TestOnlineDPOTrainer(unittest.TestCase): class TestOnlineDPOTrainer(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
self.reward_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
self.reward_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
self.reward_tokenizer.pad_token = self.reward_tokenizer.eos_token
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
self.reward_model_id = "trl-internal-testing/tiny-LlamaForCausalLM-3.2"
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.reward_model_id, num_labels=1)
self.reward_tokenizer = AutoTokenizer.from_pretrained(self.reward_model_id)
self.reward_tokenizer.pad_token = self.reward_tokenizer.eos_token
@parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)]) @parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
def test_training(self, config_name): def test_training(self, config_name):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:

View File

@ -21,25 +21,27 @@ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokeni
from transformers.testing_utils import require_peft from transformers.testing_utils import require_peft
from trl import ORPOConfig, ORPOTrainer from trl import ORPOConfig, ORPOTrainer
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
class ORPOTrainerTester(unittest.TestCase): class ORPOTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
# get t5 as seq2seq example: # get t5 as seq2seq example:
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab" model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id) self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id) self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
@parameterized.expand( @parameterized.expand(
[ [
("gpt2", "standard_preference"), ("qwen", "standard_preference"),
("t5", "standard_implicit_prompt_preference"), ("t5", "standard_implicit_prompt_preference"),
("gpt2", "conversational_preference"), ("qwen", "conversational_preference"),
("t5", "conversational_implicit_prompt_preference"), ("t5", "conversational_implicit_prompt_preference"),
] ]
) )
@ -59,7 +61,7 @@ class ORPOTrainerTester(unittest.TestCase):
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name) dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
if name == "gpt2": if name == "qwen":
model = self.model model = self.model
tokenizer = self.tokenizer tokenizer = self.tokenizer
elif name == "t5": elif name == "t5":

View File

@ -33,7 +33,7 @@ if is_peft_available():
@require_peft @require_peft
class PeftModelTester(unittest.TestCase): class PeftModelTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.causal_lm_model_id = "trl-internal-testing/tiny-random-GPTNeoXForCausalLM" self.causal_lm_model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.lora_config = LoraConfig( self.lora_config = LoraConfig(
r=16, r=16,
lora_alpha=32, lora_alpha=32,
@ -74,12 +74,12 @@ class PeftModelTester(unittest.TestCase):
# Check that the number of trainable parameters is correct # Check that the number of trainable parameters is correct
nb_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 10273) self.assertEqual(nb_trainable_params, 905)
# Check that the number of trainable param for the non-peft model is correct # Check that the number of trainable param for the non-peft model is correct
non_peft_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id) non_peft_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id)
nb_trainable_params = sum(p.numel() for p in non_peft_model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in non_peft_model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 99578) self.assertEqual(nb_trainable_params, 2428641)
def test_create_peft_model_from_config(self): def test_create_peft_model_from_config(self):
r""" r"""
@ -90,13 +90,13 @@ class PeftModelTester(unittest.TestCase):
) )
# Check that the number of trainable parameters is correct # Check that the number of trainable parameters is correct
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 10273) self.assertEqual(nb_trainable_params, 905)
causal_lm_model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id) causal_lm_model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id)
trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(causal_lm_model, peft_config=self.lora_config) trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(causal_lm_model, peft_config=self.lora_config)
# Check that the number of trainable parameters is correct # Check that the number of trainable parameters is correct
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 10273) self.assertEqual(nb_trainable_params, 905)
@require_torch_gpu_if_bnb_not_multi_backend_enabled @require_torch_gpu_if_bnb_not_multi_backend_enabled
def test_create_bnb_peft_model_from_config(self): def test_create_bnb_peft_model_from_config(self):
@ -110,7 +110,7 @@ class PeftModelTester(unittest.TestCase):
) )
# Check that the number of trainable parameters is correct # Check that the number of trainable parameters is correct
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 10273) self.assertEqual(nb_trainable_params, 905)
self.assertEqual(trl_model.pretrained_model.model.gpt_neox.layers[0].mlp.dense_h_to_4h.__class__, Linear8bitLt) self.assertEqual(trl_model.pretrained_model.model.gpt_neox.layers[0].mlp.dense_h_to_4h.__class__, Linear8bitLt)
causal_lm_model = AutoModelForCausalLM.from_pretrained( causal_lm_model = AutoModelForCausalLM.from_pretrained(
@ -119,7 +119,7 @@ class PeftModelTester(unittest.TestCase):
trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(causal_lm_model, peft_config=self.lora_config) trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(causal_lm_model, peft_config=self.lora_config)
# Check that the number of trainable parameters is correct # Check that the number of trainable parameters is correct
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 10273) self.assertEqual(nb_trainable_params, 905)
self.assertEqual(trl_model.pretrained_model.model.gpt_neox.layers[0].mlp.dense_h_to_4h.__class__, Linear8bitLt) self.assertEqual(trl_model.pretrained_model.model.gpt_neox.layers[0].mlp.dense_h_to_4h.__class__, Linear8bitLt)
def test_save_pretrained_peft(self): def test_save_pretrained_peft(self):
@ -201,4 +201,4 @@ class PeftModelTester(unittest.TestCase):
model = AutoModelForCausalLMWithValueHead.from_pretrained(tmp_dir, is_trainable=True) model = AutoModelForCausalLMWithValueHead.from_pretrained(tmp_dir, is_trainable=True)
# Check that the number of trainable parameters is correct # Check that the number of trainable parameters is correct
nb_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) nb_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
self.assertEqual(nb_trainable_params, 10273) self.assertEqual(nb_trainable_params, 905)

View File

@ -27,7 +27,9 @@ python examples/scripts/ppo/ppo.py \
--per_device_train_batch_size 4 \ --per_device_train_batch_size 4 \
--gradient_accumulation_steps 1 \ --gradient_accumulation_steps 1 \
--total_episodes 10 \ --total_episodes 10 \
--model_name_or_path EleutherAI/pythia-14m \ --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--missing_eos_penalty 1.0 \ --missing_eos_penalty 1.0 \
--save_strategy no \ --save_strategy no \
--stop_token eos --stop_token eos
@ -53,7 +55,9 @@ python examples/scripts/ppo/ppo.py \
--per_device_train_batch_size 4 \ --per_device_train_batch_size 4 \
--gradient_accumulation_steps 1 \ --gradient_accumulation_steps 1 \
--num_train_epochs 0.003 \ --num_train_epochs 0.003 \
--model_name_or_path EleutherAI/pythia-14m \ --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--missing_eos_penalty 1.0 \ --missing_eos_penalty 1.0 \
--save_strategy no \ --save_strategy no \
--stop_token eos --stop_token eos

View File

@ -31,10 +31,10 @@ if is_peft_available():
class RewardTrainerTester(unittest.TestCase): class RewardTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_id) self.model = AutoModelForSequenceClassification.from_pretrained(self.model_id)
self.model.config.pad_token_id = self.tokenizer.pad_token_id
def test_accuracy_metrics(self): def test_accuracy_metrics(self):
dummy_eval_predictions = EvalPrediction(torch.FloatTensor([[0.1, 0.9], [0.9, 0.1]]), torch.LongTensor([0, 0])) dummy_eval_predictions = EvalPrediction(torch.FloatTensor([[0.1, 0.9], [0.9, 0.1]]), torch.LongTensor([0, 0]))

View File

@ -33,9 +33,9 @@ python examples/scripts/rloo/rloo.py \
--per_device_train_batch_size 4 \ --per_device_train_batch_size 4 \
--gradient_accumulation_steps 1 \ --gradient_accumulation_steps 1 \
--total_episodes 10 \ --total_episodes 10 \
--model_name_or_path EleutherAI/pythia-14m \ --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--sft_model_path EleutherAI/pythia-14m \ --sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--reward_model_path EleutherAI/pythia-14m \ --reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
--missing_eos_penalty 1.0 \ --missing_eos_penalty 1.0 \
--save_strategy no \ --save_strategy no \
--stop_token eos --stop_token eos
@ -53,15 +53,13 @@ python examples/scripts/rloo/rloo.py \
class RLOOTrainerTester(unittest.TestCase): class RLOOTrainerTester(unittest.TestCase):
def setUp(self): def setUp(self):
self.sft_model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.reward_model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
self.policy_model = AutoModelForCausalLM.from_pretrained(self.sft_model_id) self.policy_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.reward_model_id) self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.model_id)
self.policy_ref_model = AutoModelForCausalLM.from_pretrained(self.sft_model_id) self.policy_ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.sft_model_id, padding_side="left") self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, padding_side="left")
self.tokenizer.chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}"
self.tokenizer.add_special_tokens({"pad_token": "[PAD]"}) self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
def test_rloo_checkpoint(self): def test_rloo_checkpoint(self):
@ -73,7 +71,7 @@ class RLOOTrainerTester(unittest.TestCase):
report_to="none", report_to="none",
) )
dummy_text = {"content": "Hello World!", "role": "user"} dummy_text = [{"content": "Hello World!", "role": "user"}]
dummy_data = self.tokenizer.apply_chat_template(dummy_text) dummy_data = self.tokenizer.apply_chat_template(dummy_text)
dummy_dataset = Dataset.from_dict({"input_ids": dummy_data}) dummy_dataset = Dataset.from_dict({"input_ids": dummy_data})

View File

@ -62,7 +62,7 @@ class SFTTrainerTester(unittest.TestCase):
r""" """ r""" """
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
@ -1146,7 +1146,7 @@ class SFTTrainerTester(unittest.TestCase):
eval_dataset=self.conversational_lm_dataset["test"], eval_dataset=self.conversational_lm_dataset["test"],
) )
self.assertEqual(len(trainer.train_dataset["input_ids"]), 21) self.assertEqual(len(trainer.train_dataset["input_ids"]), 46) # w/ this dataset, we end up with 46 seqs
self.assertEqual(len(trainer.eval_dataset["input_ids"]), len(self.conversational_lm_dataset["test"])) self.assertEqual(len(trainer.eval_dataset["input_ids"]), len(self.conversational_lm_dataset["test"]))
def test_sft_trainer_eval_packing(self): def test_sft_trainer_eval_packing(self):
@ -1171,8 +1171,8 @@ class SFTTrainerTester(unittest.TestCase):
eval_dataset=self.conversational_lm_dataset["test"], eval_dataset=self.conversational_lm_dataset["test"],
) )
self.assertEqual(len(trainer.train_dataset["input_ids"]), 21) self.assertEqual(len(trainer.train_dataset["input_ids"]), 46) # w/ this dataset, we end up with 46 seqs
self.assertEqual(len(trainer.eval_dataset["input_ids"]), 2) self.assertEqual(len(trainer.eval_dataset["input_ids"]), 5) # w/ this dataset, we end up with 5 seqs
def test_sft_trainer_no_packing(self): def test_sft_trainer_no_packing(self):
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
@ -1265,9 +1265,9 @@ class SFTTrainerTester(unittest.TestCase):
report_to="none", report_to="none",
) )
tiny_llava = LlavaForConditionalGeneration.from_pretrained( tiny_llava = LlavaForConditionalGeneration.from_pretrained(
"trl-internal-testing/tiny-random-LlavaForConditionalGeneration" "trl-internal-testing/tiny-LlavaForConditionalGeneration"
) )
processor = AutoProcessor.from_pretrained("trl-internal-testing/tiny-random-LlavaForConditionalGeneration") processor = AutoProcessor.from_pretrained("trl-internal-testing/tiny-LlavaForConditionalGeneration")
processor.chat_template = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}""" processor.chat_template = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}"""

View File

@ -48,7 +48,8 @@ from .testing_utils import require_sklearn
class TrainerArgTester(unittest.TestCase): class TrainerArgTester(unittest.TestCase):
@require_sklearn @require_sklearn
def test_bco(self): def test_bco(self):
tokenizer = AutoTokenizer.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = BCOConfig( training_args = BCOConfig(
@ -71,7 +72,11 @@ class TrainerArgTester(unittest.TestCase):
max_density_ratio=20.0, max_density_ratio=20.0,
) )
trainer = BCOTrainer( trainer = BCOTrainer(
model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer model=model_id,
ref_model=model_id,
args=training_args,
train_dataset=dataset,
processing_class=tokenizer,
) )
self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_length, 256)
self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_prompt_length, 64)
@ -91,7 +96,8 @@ class TrainerArgTester(unittest.TestCase):
self.assertEqual(trainer.args.max_density_ratio, 20.0) self.assertEqual(trainer.args.max_density_ratio, 20.0)
def test_cpo(self): def test_cpo(self):
tokenizer = AutoTokenizer.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = CPOConfig( training_args = CPOConfig(
@ -113,7 +119,7 @@ class TrainerArgTester(unittest.TestCase):
model_init_kwargs={"trust_remote_code": True}, model_init_kwargs={"trust_remote_code": True},
dataset_num_proc=4, dataset_num_proc=4,
) )
trainer = CPOTrainer(model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer) trainer = CPOTrainer(model=model_id, args=training_args, train_dataset=dataset, processing_class=tokenizer)
self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_length, 256)
self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_prompt_length, 64)
self.assertEqual(trainer.args.max_completion_length, 64) self.assertEqual(trainer.args.max_completion_length, 64)
@ -132,7 +138,8 @@ class TrainerArgTester(unittest.TestCase):
self.assertEqual(trainer.args.dataset_num_proc, 4) self.assertEqual(trainer.args.dataset_num_proc, 4)
def test_dpo(self): def test_dpo(self):
tokenizer = AutoTokenizer.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = DPOConfig( training_args = DPOConfig(
@ -166,7 +173,11 @@ class TrainerArgTester(unittest.TestCase):
discopop_tau=0.1, discopop_tau=0.1,
) )
trainer = DPOTrainer( trainer = DPOTrainer(
model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer model=model_id,
ref_model=model_id,
args=training_args,
train_dataset=dataset,
processing_class=tokenizer,
) )
self.assertEqual(trainer.args.beta, 0.5) self.assertEqual(trainer.args.beta, 0.5)
self.assertEqual(trainer.args.label_smoothing, 0.5) self.assertEqual(trainer.args.label_smoothing, 0.5)
@ -197,7 +208,8 @@ class TrainerArgTester(unittest.TestCase):
self.assertEqual(trainer.args.discopop_tau, 0.1) self.assertEqual(trainer.args.discopop_tau, 0.1)
def test_kto(self): def test_kto(self):
tokenizer = AutoTokenizer.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = KTOConfig( training_args = KTOConfig(
@ -219,7 +231,11 @@ class TrainerArgTester(unittest.TestCase):
dataset_num_proc=4, dataset_num_proc=4,
) )
trainer = KTOTrainer( trainer = KTOTrainer(
model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer model=model_id,
ref_model=model_id,
args=training_args,
train_dataset=dataset,
processing_class=tokenizer,
) )
self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_length, 256)
self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_prompt_length, 64)
@ -239,16 +255,17 @@ class TrainerArgTester(unittest.TestCase):
@parameterized.expand([(False,), (True,)]) @parameterized.expand([(False,), (True,)])
def test_nash_md(self, mixtures_coef_list): def test_nash_md(self, mixtures_coef_list):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
reward_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = NashMDConfig( training_args = NashMDConfig(
tmp_dir, tmp_dir,
mixture_coef=0.5 if not mixtures_coef_list else [0.5, 0.6], mixture_coef=0.5 if not mixtures_coef_list else [0.5, 0.6],
) )
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
trainer = NashMDTrainer( trainer = NashMDTrainer(
args=training_args, args=training_args,
processing_class=tokenizer, processing_class=tokenizer,
@ -261,6 +278,11 @@ class TrainerArgTester(unittest.TestCase):
@parameterized.expand([(False,), (True,)]) @parameterized.expand([(False,), (True,)])
def test_online_dpo(self, beta_list): def test_online_dpo(self, beta_list):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
reward_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = OnlineDPOConfig( training_args = OnlineDPOConfig(
@ -272,10 +294,6 @@ class TrainerArgTester(unittest.TestCase):
loss_type="hinge", loss_type="hinge",
dataset_num_proc=4, dataset_num_proc=4,
) )
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
trainer = OnlineDPOTrainer( trainer = OnlineDPOTrainer(
model=model, model=model,
ref_model=ref_model, ref_model=ref_model,
@ -293,7 +311,8 @@ class TrainerArgTester(unittest.TestCase):
self.assertEqual(trainer.args.dataset_num_proc, 4) self.assertEqual(trainer.args.dataset_num_proc, 4)
def test_orpo(self): def test_orpo(self):
tokenizer = AutoTokenizer.from_pretrained("gpt2") model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = ORPOConfig( training_args = ORPOConfig(
@ -311,8 +330,9 @@ class TrainerArgTester(unittest.TestCase):
model_init_kwargs={"trust_remote_code": True}, model_init_kwargs={"trust_remote_code": True},
dataset_num_proc=4, dataset_num_proc=4,
) )
trainer = ORPOTrainer(
trainer = ORPOTrainer(model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer) model=model_id, args=training_args, train_dataset=dataset, processing_class=tokenizer
)
self.assertEqual(trainer.args.max_length, 256) self.assertEqual(trainer.args.max_length, 256)
self.assertEqual(trainer.args.max_prompt_length, 64) self.assertEqual(trainer.args.max_prompt_length, 64)
self.assertEqual(trainer.args.max_completion_length, 64) self.assertEqual(trainer.args.max_completion_length, 64)
@ -321,6 +341,9 @@ class TrainerArgTester(unittest.TestCase):
self.assertEqual(trainer.args.label_pad_token_id, -99) self.assertEqual(trainer.args.label_pad_token_id, -99)
def test_reward(self): def test_reward(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = RewardConfig( training_args = RewardConfig(
@ -329,8 +352,6 @@ class TrainerArgTester(unittest.TestCase):
dataset_num_proc=4, dataset_num_proc=4,
center_rewards_coefficient=0.1, center_rewards_coefficient=0.1,
) )
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
trainer = RewardTrainer( trainer = RewardTrainer(
model=model, model=model,
args=training_args, args=training_args,
@ -342,6 +363,7 @@ class TrainerArgTester(unittest.TestCase):
self.assertEqual(trainer.args.center_rewards_coefficient, 0.1) self.assertEqual(trainer.args.center_rewards_coefficient, 0.1)
def test_sft(self): def test_sft(self):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = SFTConfig( training_args = SFTConfig(
@ -358,7 +380,7 @@ class TrainerArgTester(unittest.TestCase):
num_of_sequences=32, num_of_sequences=32,
chars_per_token=4.2, chars_per_token=4.2,
) )
trainer = SFTTrainer("gpt2", args=training_args, train_dataset=dataset) trainer = SFTTrainer(model_id, args=training_args, train_dataset=dataset)
self.assertEqual(trainer.args.dataset_text_field, "dummy_text_field") self.assertEqual(trainer.args.dataset_text_field, "dummy_text_field")
self.assertEqual(trainer.args.packing, True) self.assertEqual(trainer.args.packing, True)
self.assertEqual(trainer.args.max_seq_length, 256) self.assertEqual(trainer.args.max_seq_length, 256)
@ -374,16 +396,17 @@ class TrainerArgTester(unittest.TestCase):
@parameterized.expand([(False,), (True,)]) @parameterized.expand([(False,), (True,)])
def test_xpo(self, alpha_list): def test_xpo(self, alpha_list):
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(model_id)
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
reward_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train") dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
with tempfile.TemporaryDirectory() as tmp_dir: with tempfile.TemporaryDirectory() as tmp_dir:
training_args = XPOConfig( training_args = XPOConfig(
tmp_dir, tmp_dir,
alpha=0.5 if not alpha_list else [0.5, 0.6], alpha=0.5 if not alpha_list else [0.5, 0.6],
) )
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
trainer = XPOTrainer( trainer = XPOTrainer(
args=training_args, args=training_args,
processing_class=tokenizer, processing_class=tokenizer,

View File

@ -123,7 +123,7 @@ class TestGetPEFTConfig(unittest.TestCase):
class TestDecodeAndStripPadding(unittest.TestCase): class TestDecodeAndStripPadding(unittest.TestCase):
def setUp(self): def setUp(self):
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
def test_example_with_padding(self): def test_example_with_padding(self):
inputs = self.tokenizer(["Hello world", "Hello"], padding=True, return_tensors="pt") inputs = self.tokenizer(["Hello world", "Hello"], padding=True, return_tensors="pt")
@ -182,7 +182,7 @@ class TestGenerateModelCard(unittest.TestCase):
class TestDataCollatorForChatML(unittest.TestCase): class TestDataCollatorForChatML(unittest.TestCase):
def setUp(self): def setUp(self):
# Initialize the tokenizer # Initialize the tokenizer
self.tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf") self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
if self.tokenizer.pad_token is None: if self.tokenizer.pad_token is None:
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token
@ -205,6 +205,8 @@ class TestDataCollatorForChatML(unittest.TestCase):
ignore_index=self.ignore_index, ignore_index=self.ignore_index,
) )
# See https://github.com/huggingface/trl/pull/2287#discussion_r1856594421
@unittest.skip("This test must be updated.")
def test_data_collator_for_chatml(self): def test_data_collator_for_chatml(self):
# Process the data # Process the data
data = self.collator(self.examples) data = self.collator(self.examples)
@ -256,7 +258,7 @@ class TestDataCollatorForChatML(unittest.TestCase):
class TestBatchGeneration(unittest.TestCase): class TestBatchGeneration(unittest.TestCase):
def setUp(self): def setUp(self):
# Initialize the tokenizer # Initialize the tokenizer
self.model_id = "Qwen/Qwen2-0.5B-Instruct" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)

View File

@ -31,10 +31,10 @@ if is_peft_available():
class TestXPOTrainer(unittest.TestCase): class TestXPOTrainer(unittest.TestCase):
def setUp(self): def setUp(self):
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab" self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
self.model = AutoModelForCausalLM.from_pretrained(self.model_id) self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id) self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
self.reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1) self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.model_id, num_labels=1)
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id) self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
self.tokenizer.pad_token = self.tokenizer.eos_token self.tokenizer.pad_token = self.tokenizer.eos_token