mirror of
https://github.com/huggingface/trl.git
synced 2025-10-20 10:03:51 +08:00
🤏 New models for tests (#2287)
* first commit * uncomment * other tests adaptations * Remove unused variable in test_setup_chat_format * Remove unused import statement * style * Add Bart model * Update BCOTrainerTester class in test_bco_trainer.py * Update model IDs and tokenizers in test files * Add new models and processors * Update model IDs in test files * Fix formatting issue in test_dataset_formatting.py * Refactor dataset formatting in test_dataset_formatting.py * Fix dataset sequence length in SFTTrainerTester * Remove tokenizer * Remove print statement * Add reward_model_path and sft_model_path to PPO trainer * Fix tokenizer padding issue * Add chat template for testing purposes in PaliGemma model * Update PaliGemma model and chat template * Increase learning rate to speed up test * Update model names in run_dpo.sh and run_sft.sh scripts * Update model and dataset names * Fix formatting issue in test_dataset_formatting.py * Fix formatting issue in test_dataset_formatting.py * Remove unused chat template * Update model generation script * additional models * Update model references in test files * Remove unused imports in test_online_dpo_trainer.py * Add is_llm_blender_available import and update reward_tokenizer * Refactor test_online_dpo_trainer.py: Move skipped test case decorator * remove models without chat templates * Update model names in scripts and tests * Update model_id in test_modeling_value_head.py * Update model versions in test files * Fix formatting issue in test_dataset_formatting.py * Update embedding model ID in BCOTrainerTester * Update test_online_dpo_trainer.py with reward model changes * Update expected formatted text in test_dataset_formatting.py * Add reward_tokenizer to TestOnlineDPOTrainer * fix tests * Add SIMPLE_CHAT_TEMPLATE to T5 tokenizer * Fix dummy_text format in test_rloo_trainer.py * Skip outdated test for chatML data collator * Add new vision language models * Commented out unused model IDs in test_vdpo_trainer * Update model and vision configurations in generate_tiny_models.py and test_dpo_trainer.py * Update model and tokenizer references * Don't push if it already exists * Add comment explaining test skip * Fix model_exists function call and add new models * Update LlavaForConditionalGeneration model and processor * `qgallouedec` -> `trl-internal-testing`
This commit is contained in:
committed by
GitHub
parent
ee3cbe1946
commit
453db5cd79
@ -2,7 +2,7 @@
|
||||
# This script runs an SFT example end-to-end on a tiny model using different possible configurations
|
||||
# but defaults to QLoRA + PEFT
|
||||
OUTPUT_DIR="test_dpo/"
|
||||
MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
DATASET_NAME="trl-internal-testing/hh-rlhf-helpful-base-trl-style"
|
||||
MAX_STEPS=5
|
||||
BATCH_SIZE=2
|
||||
|
@ -2,7 +2,7 @@
|
||||
# This script runs an SFT example end-to-end on a tiny model using different possible configurations
|
||||
# but defaults to QLoRA + PEFT
|
||||
OUTPUT_DIR="test_sft/"
|
||||
MODEL_NAME="trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
MODEL_NAME="trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
DATASET_NAME="stanfordnlp/imdb"
|
||||
MAX_STEPS=5
|
||||
BATCH_SIZE=2
|
||||
|
@ -23,7 +23,7 @@ We also recommend you passing a YAML config file to configure your training prot
|
||||
|
||||
```yaml
|
||||
model_name_or_path:
|
||||
trl-internal-testing/tiny-random-LlamaForCausalLM
|
||||
Qwen/Qwen2.5-0.5B
|
||||
dataset_name:
|
||||
stanfordnlp/imdb
|
||||
report_to:
|
||||
|
@ -7,7 +7,7 @@
|
||||
# CUDA_VISIBLE_DEVICES: 0
|
||||
|
||||
model_name_or_path:
|
||||
trl-internal-testing/tiny-random-LlamaForCausalLM
|
||||
Qwen/Qwen2.5-0.5B
|
||||
dataset_name:
|
||||
stanfordnlp/imdb
|
||||
report_to:
|
||||
|
193
scripts/generate_tiny_models.py
Normal file
193
scripts/generate_tiny_models.py
Normal file
@ -0,0 +1,193 @@
|
||||
# Copyright 2024 The HuggingFace Inc. team. All rights reserved.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
# This script generates tiny models used in the TRL library for unit tests. It pushes them to the Hub under the
|
||||
# `trl-internal-testing` organization.
|
||||
# This script is meant to be run when adding new tiny model to the TRL library.
|
||||
|
||||
from huggingface_hub import HfApi, ModelCard
|
||||
from transformers import (
|
||||
AutoProcessor,
|
||||
AutoTokenizer,
|
||||
BartConfig,
|
||||
BartModel,
|
||||
BloomConfig,
|
||||
BloomForCausalLM,
|
||||
CLIPVisionConfig,
|
||||
CohereConfig,
|
||||
CohereForCausalLM,
|
||||
DbrxConfig,
|
||||
DbrxForCausalLM,
|
||||
FalconMambaConfig,
|
||||
FalconMambaForCausalLM,
|
||||
Gemma2Config,
|
||||
Gemma2ForCausalLM,
|
||||
GemmaConfig,
|
||||
GemmaForCausalLM,
|
||||
GPT2Config,
|
||||
GPT2LMHeadModel,
|
||||
GPTNeoXConfig,
|
||||
GPTNeoXForCausalLM,
|
||||
Idefics2Config,
|
||||
Idefics2ForConditionalGeneration,
|
||||
LlamaConfig,
|
||||
LlamaForCausalLM,
|
||||
LlavaConfig,
|
||||
LlavaForConditionalGeneration,
|
||||
LlavaNextConfig,
|
||||
LlavaNextForConditionalGeneration,
|
||||
MistralConfig,
|
||||
MistralForCausalLM,
|
||||
OPTConfig,
|
||||
OPTForCausalLM,
|
||||
PaliGemmaConfig,
|
||||
PaliGemmaForConditionalGeneration,
|
||||
Phi3Config,
|
||||
Phi3ForCausalLM,
|
||||
Qwen2Config,
|
||||
Qwen2ForCausalLM,
|
||||
SiglipVisionConfig,
|
||||
T5Config,
|
||||
T5ForConditionalGeneration,
|
||||
)
|
||||
from transformers.models.idefics2.configuration_idefics2 import Idefics2VisionConfig
|
||||
|
||||
|
||||
ORGANIZATION = "trl-internal-testing"
|
||||
|
||||
MODEL_CARD = """
|
||||
---
|
||||
library_name: transformers
|
||||
tags: [trl]
|
||||
---
|
||||
|
||||
# Tiny {model_class_name}
|
||||
|
||||
This is a minimal model built for unit tests in the [TRL](https://github.com/huggingface/trl) library.
|
||||
"""
|
||||
|
||||
|
||||
api = HfApi()
|
||||
|
||||
|
||||
def push_to_hub(model, tokenizer, suffix=None):
|
||||
model_class_name = model.__class__.__name__
|
||||
content = MODEL_CARD.format(model_class_name=model_class_name)
|
||||
model_card = ModelCard(content)
|
||||
repo_id = f"{ORGANIZATION}/tiny-{model_class_name}"
|
||||
if suffix is not None:
|
||||
repo_id += f"-{suffix}"
|
||||
|
||||
if api.repo_exists(repo_id):
|
||||
print(f"Model {repo_id} already exists, skipping")
|
||||
else:
|
||||
model.push_to_hub(repo_id)
|
||||
tokenizer.push_to_hub(repo_id)
|
||||
model_card.push_to_hub(repo_id)
|
||||
|
||||
|
||||
# Decoder models
|
||||
for model_id, config_class, model_class, suffix in [
|
||||
("bigscience/bloomz-560m", BloomConfig, BloomForCausalLM, None),
|
||||
("CohereForAI/aya-expanse-8b", CohereConfig, CohereForCausalLM, None),
|
||||
("databricks/dbrx-instruct", DbrxConfig, DbrxForCausalLM, None),
|
||||
("tiiuae/falcon-7b-instruct", FalconMambaConfig, FalconMambaForCausalLM, None),
|
||||
("google/gemma-2-2b-it", Gemma2Config, Gemma2ForCausalLM, None),
|
||||
("google/gemma-7b-it", GemmaConfig, GemmaForCausalLM, None),
|
||||
("openai-community/gpt2", GPT2Config, GPT2LMHeadModel, None),
|
||||
("EleutherAI/pythia-14m", GPTNeoXConfig, GPTNeoXForCausalLM, None),
|
||||
("meta-llama/Meta-Llama-3-8B-Instruct", LlamaConfig, LlamaForCausalLM, "3"),
|
||||
("meta-llama/Llama-3.1-8B-Instruct", LlamaConfig, LlamaForCausalLM, "3.1"),
|
||||
("meta-llama/Llama-3.2-1B-Instruct", LlamaConfig, LlamaForCausalLM, "3.2"),
|
||||
("mistralai/Mistral-7B-Instruct-v0.1", MistralConfig, MistralForCausalLM, "0.1"),
|
||||
("mistralai/Mistral-7B-Instruct-v0.2", MistralConfig, MistralForCausalLM, "0.2"),
|
||||
("facebook/opt-1.3b", OPTConfig, OPTForCausalLM, None),
|
||||
("microsoft/Phi-3.5-mini-instruct", Phi3Config, Phi3ForCausalLM, None),
|
||||
("Qwen/Qwen2.5-32B-Instruct", Qwen2Config, Qwen2ForCausalLM, "2.5"),
|
||||
("Qwen/Qwen2.5-Coder-0.5B", Qwen2Config, Qwen2ForCausalLM, "2.5-Coder"),
|
||||
]:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
config = config_class(
|
||||
vocab_size=tokenizer.vocab_size + len(tokenizer.added_tokens_encoder.keys()),
|
||||
hidden_size=8,
|
||||
num_attention_heads=4,
|
||||
num_key_value_heads=2,
|
||||
num_hidden_layers=2,
|
||||
intermediate_size=32,
|
||||
)
|
||||
model = model_class(config)
|
||||
push_to_hub(model, tokenizer, suffix)
|
||||
|
||||
|
||||
# Encoder-decoder models
|
||||
for model_id, config_class, model_class, suffix in [
|
||||
("google/flan-t5-small", T5Config, T5ForConditionalGeneration, None),
|
||||
("facebook/bart-base", BartConfig, BartModel, None),
|
||||
]:
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
config = config_class(
|
||||
vocab_size=tokenizer.vocab_size + len(tokenizer.added_tokens_encoder.keys()),
|
||||
d_model=16,
|
||||
encoder_layers=2,
|
||||
decoder_layers=2,
|
||||
d_kv=2,
|
||||
d_ff=64,
|
||||
num_layers=6,
|
||||
num_heads=8,
|
||||
decoder_start_token_id=0,
|
||||
is_encoder_decoder=True,
|
||||
)
|
||||
model = model_class(config)
|
||||
push_to_hub(model, tokenizer, suffix)
|
||||
|
||||
|
||||
# Vision Language Models
|
||||
# fmt: off
|
||||
for model_id, config_class, text_config_class, vision_config_class, model_class in [
|
||||
("HuggingFaceM4/idefics2-8b", Idefics2Config, MistralConfig, Idefics2VisionConfig, Idefics2ForConditionalGeneration),
|
||||
("llava-hf/llava-1.5-7b-hf", LlavaConfig, LlamaConfig, CLIPVisionConfig, LlavaForConditionalGeneration),
|
||||
("llava-hf/llava-v1.6-mistral-7b-hf", LlavaNextConfig, MistralConfig, CLIPVisionConfig, LlavaNextForConditionalGeneration),
|
||||
("google/paligemma-3b-pt-224", PaliGemmaConfig, GemmaConfig, SiglipVisionConfig, PaliGemmaForConditionalGeneration),
|
||||
]:
|
||||
# fmt: on
|
||||
processor = AutoProcessor.from_pretrained(model_id)
|
||||
kwargs = {}
|
||||
if config_class == PaliGemmaConfig:
|
||||
kwargs["projection_dim"] = 8
|
||||
vision_kwargs = {}
|
||||
if vision_config_class in [CLIPVisionConfig, SiglipVisionConfig]:
|
||||
vision_kwargs["projection_dim"] = 8
|
||||
if vision_config_class == CLIPVisionConfig:
|
||||
vision_kwargs["image_size"] = 336
|
||||
vision_kwargs["patch_size"] = 14
|
||||
config = config_class(
|
||||
text_config=text_config_class(
|
||||
vocab_size=processor.tokenizer.vocab_size + len(processor.tokenizer.added_tokens_encoder),
|
||||
hidden_size=8,
|
||||
num_attention_heads=4,
|
||||
num_key_value_heads=2,
|
||||
num_hidden_layers=2,
|
||||
intermediate_size=32,
|
||||
),
|
||||
vision_config=vision_config_class(
|
||||
hidden_size=8,
|
||||
num_attention_heads=4,
|
||||
num_hidden_layers=2,
|
||||
intermediate_size=32,
|
||||
**vision_kwargs,
|
||||
),
|
||||
**kwargs,
|
||||
)
|
||||
model = model_class(config)
|
||||
push_to_hub(model, processor)
|
@ -14,8 +14,8 @@
|
||||
|
||||
# TODO: push them under trl-org
|
||||
MODELS_TO_TEST = [
|
||||
"trl-internal-testing/tiny-random-LlamaForCausalLM",
|
||||
"HuggingFaceM4/tiny-random-MistralForCausalLM",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3.2",
|
||||
"trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
]
|
||||
|
||||
# We could have also not declared these variables but let's be verbose
|
||||
|
@ -30,30 +30,30 @@ from .testing_utils import require_no_wandb, require_sklearn
|
||||
|
||||
class BCOTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# get t5 as seq2seq example:
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
# get embedding model
|
||||
model_id = "facebook/bart-base"
|
||||
model_id = "trl-internal-testing/tiny-BartModel"
|
||||
self.embedding_model = AutoModel.from_pretrained(model_id)
|
||||
self.embedding_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
["gpt2", True, True, "standard_unpaired_preference"],
|
||||
["gpt2", True, False, "standard_unpaired_preference"],
|
||||
["gpt2", False, True, "standard_unpaired_preference"],
|
||||
["gpt2", False, False, "standard_unpaired_preference"],
|
||||
["gpt2", True, True, "conversational_unpaired_preference"],
|
||||
("qwen", True, True, "standard_unpaired_preference"),
|
||||
("qwen", True, False, "standard_unpaired_preference"),
|
||||
("qwen", False, True, "standard_unpaired_preference"),
|
||||
("qwen", False, False, "standard_unpaired_preference"),
|
||||
("qwen", True, True, "conversational_unpaired_preference"),
|
||||
]
|
||||
)
|
||||
@require_sklearn
|
||||
@ -73,7 +73,7 @@ class BCOTrainerTester(unittest.TestCase):
|
||||
|
||||
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
|
||||
|
||||
if name == "gpt2":
|
||||
if name == "qwen":
|
||||
model = self.model
|
||||
ref_model = self.ref_model
|
||||
tokenizer = self.tokenizer
|
||||
@ -160,9 +160,9 @@ class BCOTrainerTester(unittest.TestCase):
|
||||
self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"])
|
||||
self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"])
|
||||
self.assertListEqual(tokenized_dataset["label"], train_dataset["label"])
|
||||
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [5377, 11141])
|
||||
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1, 1])
|
||||
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [318, 1365, 621, 8253, 13])
|
||||
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [31137])
|
||||
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1])
|
||||
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [374, 2664, 1091, 16965, 13])
|
||||
self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1])
|
||||
|
||||
fn_kwargs = {
|
||||
@ -178,15 +178,13 @@ class BCOTrainerTester(unittest.TestCase):
|
||||
self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"])
|
||||
self.assertListEqual(processed_dataset["completion"], train_dataset["completion"])
|
||||
self.assertListEqual(processed_dataset["label"], train_dataset["label"])
|
||||
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [50256, 5377, 11141])
|
||||
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1, 1, 1])
|
||||
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [31137])
|
||||
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1])
|
||||
self.assertListEqual(
|
||||
processed_dataset["completion_input_ids"][0], [50256, 5377, 11141, 318, 1365, 621, 8253, 13, 50256]
|
||||
)
|
||||
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1, 1, 1])
|
||||
self.assertListEqual(
|
||||
processed_dataset["completion_labels"][0], [-100, -100, -100, 318, 1365, 621, 8253, 13, 50256]
|
||||
processed_dataset["completion_input_ids"][0], [31137, 374, 2664, 1091, 16965, 13, 151645]
|
||||
)
|
||||
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1])
|
||||
self.assertListEqual(processed_dataset["completion_labels"][0], [-100, 374, 2664, 1091, 16965, 13, 151645])
|
||||
|
||||
@require_sklearn
|
||||
def test_bco_trainer_without_providing_ref_model(self):
|
||||
|
@ -31,7 +31,7 @@ class BestOfNSamplerTester(unittest.TestCase):
|
||||
Tests the BestOfNSampler class
|
||||
"""
|
||||
|
||||
ref_model_name = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
ref_model_name = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
output_length_sampler = LengthSampler(2, 6)
|
||||
model = AutoModelForCausalLMWithValueHead.from_pretrained(ref_model_name)
|
||||
tokenizer = AutoTokenizer.from_pretrained(ref_model_name)
|
||||
|
@ -60,9 +60,9 @@ class TrainerWithRefModel(Trainer):
|
||||
|
||||
class WinRateCallbackTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
|
||||
dataset["train"] = dataset["train"].select(range(8))
|
||||
@ -219,8 +219,8 @@ class WinRateCallbackTester(unittest.TestCase):
|
||||
@require_wandb
|
||||
class LogCompletionsCallbackTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only")
|
||||
dataset["train"] = dataset["train"].select(range(8))
|
||||
@ -283,8 +283,8 @@ class LogCompletionsCallbackTester(unittest.TestCase):
|
||||
)
|
||||
class MergeModelCallbackTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM")
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
|
||||
|
||||
def test_callback(self):
|
||||
|
@ -21,7 +21,7 @@ class CLITester(unittest.TestCase):
|
||||
def test_sft_cli(self):
|
||||
try:
|
||||
subprocess.run(
|
||||
"trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name stanfordnlp/imdb --learning_rate 1e-4 --lr_scheduler_type cosine",
|
||||
"trl sft --max_steps 1 --output_dir tmp-sft --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name stanfordnlp/imdb --learning_rate 1e-4 --lr_scheduler_type cosine",
|
||||
shell=True,
|
||||
check=True,
|
||||
)
|
||||
@ -32,7 +32,7 @@ class CLITester(unittest.TestCase):
|
||||
def test_dpo_cli(self):
|
||||
try:
|
||||
subprocess.run(
|
||||
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-random-LlamaForCausalLM --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
|
||||
"trl dpo --max_steps 1 --output_dir tmp-dpo --model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 --dataset_name trl-internal-testing/tiny-ultrafeedback-binarized --learning_rate 1e-4 --lr_scheduler_type cosine",
|
||||
shell=True,
|
||||
check=True,
|
||||
)
|
||||
|
@ -21,29 +21,31 @@ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokeni
|
||||
from transformers.testing_utils import require_peft
|
||||
|
||||
from trl import CPOConfig, CPOTrainer
|
||||
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
|
||||
|
||||
|
||||
class CPOTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# get t5 as seq2seq example:
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
["gpt2", "sigmoid", "standard_preference"],
|
||||
["t5", "hinge", "standard_implicit_prompt_preference"],
|
||||
["gpt2", "ipo", "conversational_preference"],
|
||||
["t5", "ipo", "conversational_implicit_prompt_preference"],
|
||||
["gpt2", "simpo", "standard_preference"],
|
||||
["t5", "simpo", "standard_implicit_prompt_preference"],
|
||||
["gpt2", "hinge", "conversational_preference"],
|
||||
("qwen", "sigmoid", "standard_preference"),
|
||||
("t5", "hinge", "standard_implicit_prompt_preference"),
|
||||
("qwen", "ipo", "conversational_preference"),
|
||||
("t5", "ipo", "conversational_implicit_prompt_preference"),
|
||||
("qwen", "simpo", "standard_preference"),
|
||||
("t5", "simpo", "standard_implicit_prompt_preference"),
|
||||
("qwen", "hinge", "conversational_preference"),
|
||||
]
|
||||
)
|
||||
def test_cpo_trainer(self, name, loss_type, config_name):
|
||||
@ -64,7 +66,7 @@ class CPOTrainerTester(unittest.TestCase):
|
||||
|
||||
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
|
||||
|
||||
if name == "gpt2":
|
||||
if name == "qwen":
|
||||
model = self.model
|
||||
tokenizer = self.tokenizer
|
||||
elif name == "t5":
|
||||
|
@ -22,7 +22,7 @@ from trl import DataCollatorForCompletionOnlyLM
|
||||
class DataCollatorForCompletionOnlyLMTester(unittest.TestCase):
|
||||
def test_data_collator_finds_response_template_llama2_tokenizer(self):
|
||||
# this should ideally be tested with meta-llama/Llama-2-7b-hf
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.instruction = """### System: You are a helpful assistant.
|
||||
|
||||
### User: How much is 2+2?
|
||||
@ -83,7 +83,7 @@ class DataCollatorForCompletionOnlyLMTester(unittest.TestCase):
|
||||
self.assertEqual(collator_text, expected_text)
|
||||
|
||||
def test_data_collator_handling_of_long_sequences(self):
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.instruction = """### System: You are a helpful assistant.
|
||||
|
||||
### User: How much is 2+2? I'm asking because I'm not sure. And I'm not sure because I'm not good at math.
|
||||
@ -106,7 +106,7 @@ class DataCollatorForCompletionOnlyLMTester(unittest.TestCase):
|
||||
self.assertTrue(result, "Not all values in the tensor are -100.")
|
||||
|
||||
def test_padding_free(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/dummy-GPT2-correct-vocab")
|
||||
tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
if tokenizer.pad_token_id is None:
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
|
@ -85,14 +85,18 @@ class IsConversationalTester(unittest.TestCase):
|
||||
|
||||
class ApplyChatTemplateTester(unittest.TestCase):
|
||||
tokenizers = [
|
||||
"trl-internal-testing/tiny-random-Qwen2-7B-Instruct",
|
||||
"trl-internal-testing/tiny-random-Meta-Llama-3.1-8B-Instruct",
|
||||
"trl-internal-testing/tiny-random-Meta-Llama-3-8B-Instruct",
|
||||
"trl-internal-testing/tiny-random-DeepSeek-Coder-V2-Instruct",
|
||||
"trl-internal-testing/tiny-random-Phi-3-mini-128k-instruct",
|
||||
"trl-internal-testing/tiny-random-gemma-2-9b-it",
|
||||
"trl-internal-testing/tiny-random-Mistral-7B-Instruct-v0.1",
|
||||
"trl-internal-testing/tiny-random-Mistral-7B-Instruct-v0.2",
|
||||
"trl-internal-testing/tiny-CohereForCausalLM",
|
||||
"trl-internal-testing/tiny-DbrxForCausalLM",
|
||||
"trl-internal-testing/tiny-FalconMambaForCausalLM",
|
||||
"trl-internal-testing/tiny-Gemma2ForCausalLM",
|
||||
"trl-internal-testing/tiny-GemmaForCausalLM",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3.1",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3.2",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3",
|
||||
"trl-internal-testing/tiny-MistralForCausalLM-0.1",
|
||||
"trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
"trl-internal-testing/tiny-Phi3ForCausalLM",
|
||||
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
|
||||
]
|
||||
|
||||
conversational_examples = [
|
||||
|
@ -24,8 +24,8 @@ from trl.models.utils import ChatMlSpecialTokens, setup_chat_format
|
||||
|
||||
class DatasetFormattingTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.llama_tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
|
||||
self.chatml_tokenizer = AutoTokenizer.from_pretrained("philschmid/gpt2-chatml-tokenizer")
|
||||
self.llama_tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-MistralForCausalLM-0.1")
|
||||
self.chatml_tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
|
||||
def test_get_formatting_func_from_dataset_with_chatml_messages(self):
|
||||
dataset = Dataset.from_dict(
|
||||
@ -44,7 +44,7 @@ class DatasetFormattingTestCase(unittest.TestCase):
|
||||
formatting_func = get_formatting_func_from_dataset(dataset, self.llama_tokenizer)
|
||||
self.assertIsInstance(formatting_func, Callable)
|
||||
formatted_text = formatting_func(dataset[0])
|
||||
expected = "<s>[INST] <<SYS>>\nYou are helpful\n<</SYS>>\n\nHello [/INST] Hi, how can I help you? </s>"
|
||||
expected = "<s> [INST] You are helpful\n\nHello [/INST] Hi, how can I help you?</s>"
|
||||
self.assertEqual(formatted_text, expected)
|
||||
formatted_text = formatting_func(dataset[0:1])
|
||||
self.assertListEqual(formatted_text, [expected])
|
||||
@ -73,7 +73,7 @@ class DatasetFormattingTestCase(unittest.TestCase):
|
||||
formatting_func = get_formatting_func_from_dataset(dataset, self.llama_tokenizer)
|
||||
self.assertIsInstance(formatting_func, Callable)
|
||||
formatted_text = formatting_func(dataset[0])
|
||||
expected = "<s>[INST] <<SYS>>\nYou are helpful\n<</SYS>>\n\nHello [/INST] Hi, how can I help you? </s>"
|
||||
expected = "<s> [INST] You are helpful\n\nHello [/INST] Hi, how can I help you?</s>"
|
||||
self.assertEqual(formatted_text, expected)
|
||||
formatted_text = formatting_func(dataset[0:1])
|
||||
self.assertListEqual(formatted_text, [expected])
|
||||
@ -94,9 +94,9 @@ class DatasetFormattingTestCase(unittest.TestCase):
|
||||
self.assertIsNotNone(formatting_func)
|
||||
self.assertIsInstance(formatting_func, Callable)
|
||||
formatted_text = formatting_func(dataset[0])
|
||||
self.assertEqual(formatted_text, "<s>[INST] What is 2+2? [/INST] 4 </s>")
|
||||
self.assertEqual(formatted_text, "<s> [INST] What is 2+2? [/INST] 4</s>")
|
||||
formatted_text = formatting_func(dataset[0:1])
|
||||
self.assertListEqual(formatted_text, ["<s>[INST] What is 2+2? [/INST] 4 </s>"])
|
||||
self.assertListEqual(formatted_text, ["<s> [INST] What is 2+2? [/INST] 4</s>"])
|
||||
|
||||
def test_get_formatting_func_from_dataset_from_hub(self):
|
||||
ds_1 = load_dataset("philschmid/trl-test-instruction", split="train")
|
||||
@ -117,13 +117,12 @@ class DatasetFormattingTestCase(unittest.TestCase):
|
||||
|
||||
class SetupChatFormatTestCase(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/llama-tokenizer")
|
||||
self.model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
self.model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
# remove built-in chat_template to simulate a model having no chat_template
|
||||
self.tokenizer.chat_template = None
|
||||
|
||||
def test_setup_chat_format(self):
|
||||
original_tokenizer_len = len(self.tokenizer)
|
||||
modified_model, modified_tokenizer = setup_chat_format(
|
||||
self.model, self.tokenizer, format="chatml", resize_to_multiple_of=64
|
||||
)
|
||||
@ -136,9 +135,7 @@ class SetupChatFormatTestCase(unittest.TestCase):
|
||||
self.assertEqual(modified_tokenizer.eos_token, _chatml.eos_token)
|
||||
self.assertEqual(modified_tokenizer.pad_token, _chatml.pad_token)
|
||||
self.assertEqual(modified_tokenizer.bos_token, _chatml.bos_token)
|
||||
self.assertEqual(len(modified_tokenizer), (original_tokenizer_len + 2))
|
||||
self.assertEqual((self.model.get_input_embeddings().weight.shape[0] % 64), 0)
|
||||
self.assertEqual(self.model.get_input_embeddings().weight.shape[0], (original_tokenizer_len + 64))
|
||||
|
||||
def test_example_with_setup_model(self):
|
||||
modified_model, modified_tokenizer = setup_chat_format(
|
||||
|
@ -158,40 +158,40 @@ class TestTokenizeRow(unittest.TestCase):
|
||||
|
||||
class DPOTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# get t5 as seq2seq example:
|
||||
model_id = "trl-internal-testing/T5ForConditionalGeneration-correct-vocab-calibrated"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
["gpt2", "sigmoid", True],
|
||||
["t5", "hinge", False],
|
||||
["gpt2", "ipo", False],
|
||||
["t5", "ipo", True],
|
||||
["gpt2", "aot_pair", True],
|
||||
["t5", "aot_pair", False],
|
||||
["gpt2", "aot", True],
|
||||
["t5", "aot", False],
|
||||
["gpt2", "bco_pair", False],
|
||||
["t5", "bco_pair", True],
|
||||
["gpt2", "sppo_hard", False],
|
||||
["t5", "sppo_hard", True],
|
||||
["gpt2", "nca_pair", False],
|
||||
["t5", "nca_pair", True],
|
||||
["gpt2", "robust", True],
|
||||
["gpt2", "exo_pair", False],
|
||||
["t5", "exo_pair", True],
|
||||
["gpt2", "apo_zero", True],
|
||||
["t5", "apo_down", False],
|
||||
["gpt2", "discopop", False],
|
||||
("qwen", "sigmoid", True),
|
||||
("t5", "hinge", False),
|
||||
("qwen", "ipo", False),
|
||||
("t5", "ipo", True),
|
||||
("qwen", "aot_pair", True),
|
||||
("t5", "aot_pair", False),
|
||||
("qwen", "aot", True),
|
||||
("t5", "aot", False),
|
||||
("qwen", "bco_pair", False),
|
||||
("t5", "bco_pair", True),
|
||||
("qwen", "sppo_hard", False),
|
||||
("t5", "sppo_hard", True),
|
||||
("qwen", "nca_pair", False),
|
||||
("t5", "nca_pair", True),
|
||||
("qwen", "robust", True),
|
||||
("qwen", "exo_pair", False),
|
||||
("t5", "exo_pair", True),
|
||||
("qwen", "apo_zero", True),
|
||||
("t5", "apo_down", False),
|
||||
("qwen", "discopop", False),
|
||||
]
|
||||
)
|
||||
def test_dpo_trainer(self, name, loss_type, pre_compute):
|
||||
@ -212,7 +212,7 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
|
||||
dummy_dataset = load_dataset("trl-internal-testing/zen", "standard_preference")
|
||||
|
||||
if name == "gpt2":
|
||||
if name == "qwen":
|
||||
model = self.model
|
||||
ref_model = self.ref_model
|
||||
tokenizer = self.tokenizer
|
||||
@ -286,8 +286,8 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
[None, "Test when rpo_alpha is set to None"],
|
||||
[0.5, "Test when rpo_alpha is set to 0.5"],
|
||||
(None, "Test when rpo_alpha is set to None"),
|
||||
(0.5, "Test when rpo_alpha is set to 0.5"),
|
||||
]
|
||||
)
|
||||
def test_dpo_trainer_without_providing_ref_model(self, rpo_alpha, _):
|
||||
@ -609,7 +609,7 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
# Note this test only works on compute capability > 7 GPU devices
|
||||
from peft import LoraConfig
|
||||
|
||||
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
lora_config = LoraConfig(
|
||||
@ -659,36 +659,36 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
["gpt2", "sigmoid", False, False],
|
||||
["gpt2", "sigmoid", False, True],
|
||||
["gpt2", "sigmoid", True, False],
|
||||
["gpt2", "sigmoid", True, True],
|
||||
["gpt2", "ipo", False, False],
|
||||
["gpt2", "ipo", False, True],
|
||||
["gpt2", "ipo", True, False],
|
||||
["gpt2", "ipo", True, True],
|
||||
["gpt2", "aot_pair", False, False],
|
||||
["gpt2", "aot_pair", False, True],
|
||||
["gpt2", "aot_pair", True, False],
|
||||
["gpt2", "aot_pair", True, True],
|
||||
["gpt2", "aot", False, False],
|
||||
["gpt2", "aot", False, True],
|
||||
["gpt2", "aot", True, False],
|
||||
["gpt2", "aot", True, True],
|
||||
["gpt2", "bco_pair", False, False],
|
||||
["gpt2", "bco_pair", False, True],
|
||||
["gpt2", "bco_pair", True, False],
|
||||
["gpt2", "bco_pair", True, True],
|
||||
["gpt2", "robust", False, False],
|
||||
["gpt2", "robust", False, True],
|
||||
["gpt2", "robust", True, False],
|
||||
["gpt2", "robust", True, True],
|
||||
("sigmoid", False, False),
|
||||
("sigmoid", False, True),
|
||||
("sigmoid", True, False),
|
||||
("sigmoid", True, True),
|
||||
("ipo", False, False),
|
||||
("ipo", False, True),
|
||||
("ipo", True, False),
|
||||
("ipo", True, True),
|
||||
("aot_pair", False, False),
|
||||
("aot_pair", False, True),
|
||||
("aot_pair", True, False),
|
||||
("aot_pair", True, True),
|
||||
("aot", False, False),
|
||||
("aot", False, True),
|
||||
("aot", True, False),
|
||||
("aot", True, True),
|
||||
("bco_pair", False, False),
|
||||
("bco_pair", False, True),
|
||||
("bco_pair", True, False),
|
||||
("bco_pair", True, True),
|
||||
("robust", False, False),
|
||||
("robust", False, True),
|
||||
("robust", True, False),
|
||||
("robust", True, True),
|
||||
]
|
||||
)
|
||||
@require_bitsandbytes
|
||||
@require_peft
|
||||
@unittest.skip("You need a GPU with bf16 support in order to run these tests")
|
||||
def test_dpo_lora_bf16_autocast(self, name, loss_type, pre_compute, gen_during_eval):
|
||||
def test_dpo_lora_bf16_autocast(self, loss_type, pre_compute, gen_during_eval):
|
||||
# Note this test only works on compute capability > 7 GPU devices
|
||||
from peft import LoraConfig
|
||||
|
||||
@ -743,7 +743,7 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
def test_dpo_lora_tags(self):
|
||||
from peft import LoraConfig
|
||||
|
||||
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
lora_config = LoraConfig(
|
||||
@ -788,7 +788,7 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
|
||||
@require_peft
|
||||
def test_dpo_tags(self):
|
||||
model_id = "HuggingFaceM4/tiny-random-LlamaForCausalLM"
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
# lora model
|
||||
@ -964,7 +964,7 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
)
|
||||
|
||||
def test_dpo_loss_alpha_div_f(self):
|
||||
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
# lora model
|
||||
@ -1007,7 +1007,7 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
self.assertTrue(torch.isfinite(losses).cpu().numpy().all())
|
||||
|
||||
def test_dpo_loss_js_div_f(self):
|
||||
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
# lora model
|
||||
@ -1050,8 +1050,9 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
self.assertTrue(torch.isfinite(losses).cpu().numpy().all())
|
||||
|
||||
def test_dpo_trainer_use_num_logits_to_keep(self):
|
||||
model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
model_id = "trl-internal-testing/tiny-LlamaForCausalLM-3.2"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
tokenizer.pad_token = tokenizer.eos_token
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
|
||||
@ -1135,9 +1136,10 @@ class DPOTrainerTester(unittest.TestCase):
|
||||
class DPOVisionTrainerTester(unittest.TestCase):
|
||||
@parameterized.expand(
|
||||
[
|
||||
["trl-internal-testing/tiny-random-idefics2"],
|
||||
["trl-internal-testing/tiny-random-paligemma"],
|
||||
["trl-internal-testing/tiny-random-llava-1.5"],
|
||||
("trl-internal-testing/tiny-Idefics2ForConditionalGeneration",),
|
||||
# ("trl-internal-testing/tiny-PaliGemmaForConditionalGeneration",),
|
||||
("trl-internal-testing/tiny-LlavaForConditionalGeneration",),
|
||||
# ("trl-internal-testing/tiny-LlavaNextForConditionalGeneration",),
|
||||
]
|
||||
)
|
||||
def test_vdpo_trainer(self, model_id):
|
||||
@ -1185,9 +1187,8 @@ class DPOVisionTrainerTester(unittest.TestCase):
|
||||
training_args = DPOConfig(
|
||||
output_dir=tmp_dir,
|
||||
per_device_train_batch_size=2,
|
||||
max_length=512,
|
||||
max_prompt_length=512,
|
||||
remove_unused_columns=False,
|
||||
learning_rate=0.01, # increase learning rate to speed up test
|
||||
report_to="none",
|
||||
)
|
||||
trainer = DPOTrainer(
|
||||
@ -1210,8 +1211,8 @@ class DPOVisionTrainerTester(unittest.TestCase):
|
||||
for n, param in previous_trainable_params.items():
|
||||
new_param = trainer.model.get_parameter(n)
|
||||
if param.sum() != 0: # ignore 0 biases
|
||||
if model_id == "trl-internal-testing/tiny-random-llava-1.5" and (
|
||||
n.startswith("vision_tower.vision_model.encoder.layers.3")
|
||||
if model_id == "trl-internal-testing/tiny-LlavaForConditionalGeneration" and (
|
||||
n.startswith("vision_tower.vision_model.encoder.layers.1")
|
||||
or n == "vision_tower.vision_model.post_layernorm.weight"
|
||||
):
|
||||
# For some reason, these params are not updated. This is probably not related to TRL, but to
|
||||
|
@ -96,7 +96,7 @@ class TextHistoryTest(unittest.TestCase):
|
||||
class TextEnvironmentTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# model_id
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
|
||||
# get models and tokenizer
|
||||
self.gpt2_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.model_id)
|
||||
@ -258,7 +258,7 @@ class TextEnvironmentTester(unittest.TestCase):
|
||||
task_2 = "Hello there! General Kenobi!"
|
||||
|
||||
query, response, response_mask, reward, histories = env.run([task_1, task_2])
|
||||
self.assertEqual(len(query[0]), 9)
|
||||
self.assertEqual(len(query[0]), 8)
|
||||
self.assertEqual(len(query[1]), 12)
|
||||
self.assertEqual(len(response[0]), 14)
|
||||
self.assertEqual(len(response[1]), 14)
|
||||
|
@ -27,9 +27,10 @@ from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
|
||||
class TestGKDTrainer(unittest.TestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
cls.tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
cls.tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
cls.tokenizer.pad_token = cls.tokenizer.eos_token
|
||||
cls.model = AutoModelForCausalLM.from_pretrained("gpt2")
|
||||
cls.model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
cls.generation_config = GenerationConfig(
|
||||
max_new_tokens=20,
|
||||
num_return_sequences=1,
|
||||
@ -201,7 +202,7 @@ class TestGeneralizedJSDLoss(unittest.TestCase):
|
||||
|
||||
class GKDTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.teacher_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
|
@ -25,13 +25,13 @@ from trl import IterativeSFTTrainer
|
||||
|
||||
class IterativeTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# get t5 as seq2seq example:
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab-calibrated"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
@ -70,8 +70,8 @@ class IterativeTrainerTester(unittest.TestCase):
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
["gpt2", "tensor"],
|
||||
["gpt2", "text"],
|
||||
["qwen", "tensor"],
|
||||
["qwen", "text"],
|
||||
["t5", "tensor"],
|
||||
["t5", "text"],
|
||||
]
|
||||
@ -93,7 +93,7 @@ class IterativeTrainerTester(unittest.TestCase):
|
||||
"texts_labels": dummy_dataset["texts_labels"],
|
||||
}
|
||||
|
||||
if model_name == "gpt2":
|
||||
if model_name == "qwen":
|
||||
model = self.model
|
||||
tokenizer = self.tokenizer
|
||||
else:
|
||||
|
@ -28,27 +28,27 @@ from .testing_utils import require_no_wandb
|
||||
|
||||
class KTOTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# get t5 as seq2seq example:
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_ref_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
("gpt2", "standard_preference", "kto", True, True),
|
||||
("qwen", "standard_preference", "kto", True, True),
|
||||
# ("t5", "standard_implicit_prompt_preference", "kto", True, False), # KTO broken for enc-dec
|
||||
("gpt2", "standard_unpaired_preference", "kto", False, True),
|
||||
("qwen", "standard_unpaired_preference", "kto", False, True),
|
||||
# ("t5", "conversational_preference", "kto", False, False),
|
||||
("gpt2", "conversational_implicit_prompt_preference", "apo_zero_unpaired", True, True),
|
||||
("qwen", "conversational_implicit_prompt_preference", "apo_zero_unpaired", True, True),
|
||||
# ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", True, False),
|
||||
("gpt2", "standard_unpaired_preference", "apo_zero_unpaired", False, True),
|
||||
("qwen", "standard_unpaired_preference", "apo_zero_unpaired", False, True),
|
||||
# ("t5", "conversational_unpaired_preference", "apo_zero_unpaired", False, False),
|
||||
]
|
||||
)
|
||||
@ -70,7 +70,7 @@ class KTOTrainerTester(unittest.TestCase):
|
||||
|
||||
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
|
||||
|
||||
if name == "gpt2":
|
||||
if name == "qwen":
|
||||
model = self.model
|
||||
ref_model = self.ref_model
|
||||
tokenizer = self.tokenizer
|
||||
@ -156,9 +156,9 @@ class KTOTrainerTester(unittest.TestCase):
|
||||
self.assertListEqual(tokenized_dataset["prompt"], train_dataset["prompt"])
|
||||
self.assertListEqual(tokenized_dataset["completion"], train_dataset["completion"])
|
||||
self.assertListEqual(tokenized_dataset["label"], train_dataset["label"])
|
||||
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [5377, 11141])
|
||||
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1, 1])
|
||||
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [318, 1365, 621, 8253, 13])
|
||||
self.assertListEqual(tokenized_dataset["prompt_input_ids"][0], [31137])
|
||||
self.assertListEqual(tokenized_dataset["prompt_attention_mask"][0], [1])
|
||||
self.assertListEqual(tokenized_dataset["answer_input_ids"][0], [374, 2664, 1091, 16965, 13])
|
||||
self.assertListEqual(tokenized_dataset["answer_attention_mask"][0], [1, 1, 1, 1, 1])
|
||||
|
||||
# Test corruption of (prompt, completion) pairs for KL dataset
|
||||
@ -196,15 +196,13 @@ class KTOTrainerTester(unittest.TestCase):
|
||||
self.assertListEqual(processed_dataset["prompt"], train_dataset["prompt"])
|
||||
self.assertListEqual(processed_dataset["completion"], train_dataset["completion"])
|
||||
self.assertListEqual(processed_dataset["label"], train_dataset["label"])
|
||||
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [50256, 5377, 11141])
|
||||
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1, 1, 1])
|
||||
self.assertListEqual(processed_dataset["prompt_input_ids"][0], [31137])
|
||||
self.assertListEqual(processed_dataset["prompt_attention_mask"][0], [1])
|
||||
self.assertListEqual(
|
||||
processed_dataset["completion_input_ids"][0], [50256, 5377, 11141, 318, 1365, 621, 8253, 13, 50256]
|
||||
)
|
||||
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1, 1, 1])
|
||||
self.assertListEqual(
|
||||
processed_dataset["completion_labels"][0], [-100, -100, -100, 318, 1365, 621, 8253, 13, 50256]
|
||||
processed_dataset["completion_input_ids"][0], [31137, 374, 2664, 1091, 16965, 13, 151645]
|
||||
)
|
||||
self.assertListEqual(processed_dataset["completion_attention_mask"][0], [1, 1, 1, 1, 1, 1, 1])
|
||||
self.assertListEqual(processed_dataset["completion_labels"][0], [-100, 374, 2664, 1091, 16965, 13, 151645])
|
||||
|
||||
def test_kto_trainer_without_providing_ref_model(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
|
@ -21,9 +21,10 @@ from trl.models.modeling_base import GeometricMixtureWrapper, create_reference_m
|
||||
|
||||
class TestGeometricMixtureWrapper(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model = AutoModelForCausalLM.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
self.ref_model = create_reference_model(self.model)
|
||||
self.generation_config = GenerationConfig.from_pretrained("gpt2")
|
||||
self.generation_config = GenerationConfig.from_pretrained(model_id)
|
||||
self.mixture_coef = 0.5
|
||||
self.wrapper = GeometricMixtureWrapper(
|
||||
self.model, self.ref_model, self.generation_config, mixture_coef=self.mixture_coef
|
||||
|
@ -24,38 +24,27 @@ from trl import AutoModelForCausalLMWithValueHead, AutoModelForSeq2SeqLMWithValu
|
||||
|
||||
|
||||
ALL_CAUSAL_LM_MODELS = [
|
||||
"trl-internal-testing/tiny-random-CodeGenForCausalLM",
|
||||
"trl-internal-testing/tiny-random-GPTJForCausalLM",
|
||||
"trl-internal-testing/tiny-random-GPTNeoForCausalLM",
|
||||
"trl-internal-testing/tiny-random-GPTNeoXForCausalLM",
|
||||
"trl-internal-testing/tiny-random-OPTForCausalLM",
|
||||
"trl-internal-testing/tiny-random-BloomForCausalLM",
|
||||
"trl-internal-testing/tiny-random-GPT2LMHeadModel",
|
||||
"trl-internal-testing/tiny-random-CodeGenForCausalLM-sharded",
|
||||
"trl-internal-testing/tiny-random-GPTNeoXForCausalLM-safetensors-sharded",
|
||||
"trl-internal-testing/tiny-random-GPTNeoXForCausalLM-safetensors",
|
||||
"trl-internal-testing/tiny-random-LlamaForCausalLM",
|
||||
"trl-internal-testing/tiny-BloomForCausalLM",
|
||||
"trl-internal-testing/tiny-CohereForCausalLM",
|
||||
"trl-internal-testing/tiny-DbrxForCausalLM",
|
||||
"trl-internal-testing/tiny-FalconMambaForCausalLM",
|
||||
"trl-internal-testing/tiny-Gemma2ForCausalLM",
|
||||
"trl-internal-testing/tiny-GemmaForCausalLM",
|
||||
"trl-internal-testing/tiny-GPT2LMHeadModel",
|
||||
"trl-internal-testing/tiny-GPTNeoXForCausalLM",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3.1",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3.2",
|
||||
"trl-internal-testing/tiny-LlamaForCausalLM-3",
|
||||
"trl-internal-testing/tiny-MistralForCausalLM-0.1",
|
||||
"trl-internal-testing/tiny-MistralForCausalLM-0.2",
|
||||
"trl-internal-testing/tiny-OPTForCausalLM",
|
||||
"trl-internal-testing/tiny-Phi3ForCausalLM",
|
||||
"trl-internal-testing/tiny-Qwen2ForCausalLM-2.5",
|
||||
]
|
||||
|
||||
ALL_SEQ2SEQ_MODELS = [
|
||||
"trl-internal-testing/tiny-random-BartForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-BigBirdPegasusForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-BlenderbotForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-BlenderbotSmallForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-FSMTForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-LEDForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-LongT5ForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-M2M100ForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-MarianMTModel",
|
||||
"trl-internal-testing/tiny-random-MBartForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-MT5ForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-MvpForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-PegasusForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-PegasusXForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-PLBartForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-ProphetNetForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-SwitchTransformersForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-random-T5ForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-T5ForConditionalGeneration",
|
||||
"trl-internal-testing/tiny-BartModel",
|
||||
]
|
||||
|
||||
|
||||
@ -278,7 +267,7 @@ class CausalLMValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCas
|
||||
|
||||
def test_raise_error_not_causallm(self):
|
||||
# Test with a model without a LM head
|
||||
model_id = "trl-internal-testing/tiny-random-GPT2Model"
|
||||
model_id = "trl-internal-testing/tiny-GPT2LMHeadModel"
|
||||
# This should raise a ValueError
|
||||
with self.assertRaises(ValueError):
|
||||
pretrained_model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
@ -405,7 +394,7 @@ class Seq2SeqValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCase
|
||||
|
||||
def test_raise_error_not_causallm(self):
|
||||
# Test with a model without a LM head
|
||||
model_id = "trl-internal-testing/tiny-random-T5Model"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
# This should raise a ValueError
|
||||
with self.assertRaises(ValueError):
|
||||
pretrained_model = AutoModel.from_pretrained(model_id)
|
||||
@ -442,10 +431,6 @@ class Seq2SeqValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCase
|
||||
|
||||
lm_head_namings = self.trl_model_class.lm_head_namings
|
||||
|
||||
if model_name == "trl-internal-testing/tiny-random-FSMTForConditionalGeneration":
|
||||
# skip the test for FSMT as it does not support mixed-prec
|
||||
continue
|
||||
|
||||
self.assertTrue(
|
||||
any(hasattr(trl_model.pretrained_model, lm_head_naming) for lm_head_naming in lm_head_namings)
|
||||
)
|
||||
@ -462,34 +447,32 @@ class Seq2SeqValueHeadModelTester(BaseTester.VHeadModelTester, unittest.TestCase
|
||||
|
||||
class ReferenceModelTest(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model = AutoModelForCausalLMWithValueHead.from_pretrained(
|
||||
"trl-internal-testing/tiny-random-GPT2LMHeadModel"
|
||||
)
|
||||
self.model = AutoModelForCausalLMWithValueHead.from_pretrained("trl-internal-testing/tiny-GPT2LMHeadModel")
|
||||
self.test_input = torch.tensor([[0, 1, 2, 3]])
|
||||
self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=1)
|
||||
self.layer_format = "pretrained_model.transformer.h.{layer}.attn.c_attn.weight"
|
||||
|
||||
def test_independent_reference(self):
|
||||
layer_0 = self.layer_format.format(layer=0)
|
||||
layer_5 = self.layer_format.format(layer=4)
|
||||
layer_1 = self.layer_format.format(layer=1)
|
||||
|
||||
ref_model = create_reference_model(self.model)
|
||||
|
||||
first_layer_before = self.model.get_parameter(layer_0).data.clone()
|
||||
last_layer_before = self.model.get_parameter(layer_5).data.clone()
|
||||
last_layer_before = self.model.get_parameter(layer_1).data.clone() # the model only has 2 layers
|
||||
|
||||
first_ref_layer_before = ref_model.get_parameter(layer_0).data.clone()
|
||||
last_ref_layer_before = ref_model.get_parameter(layer_5).data.clone()
|
||||
last_ref_layer_before = ref_model.get_parameter(layer_1).data.clone()
|
||||
|
||||
output = self.model(input_ids=self.test_input, labels=self.test_input)
|
||||
output[1].backward()
|
||||
self.optimizer.step()
|
||||
|
||||
first_layer_after = self.model.get_parameter(layer_0).data.clone()
|
||||
last_layer_after = self.model.get_parameter(layer_5).data.clone()
|
||||
last_layer_after = self.model.get_parameter(layer_1).data.clone()
|
||||
|
||||
first_ref_layer_after = ref_model.get_parameter(layer_0).data.clone()
|
||||
last_ref_layer_after = ref_model.get_parameter(layer_5).data.clone()
|
||||
last_ref_layer_after = ref_model.get_parameter(layer_1).data.clone()
|
||||
|
||||
# before optimization ref and model are identical
|
||||
self.assertTrue((first_layer_before == first_ref_layer_before).all())
|
||||
|
@ -31,10 +31,10 @@ if is_peft_available():
|
||||
|
||||
class TestNashMDTrainer(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.model_id, num_labels=1)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
|
@ -21,7 +21,6 @@ from transformers.testing_utils import require_peft
|
||||
from transformers.utils import is_peft_available
|
||||
|
||||
from trl import OnlineDPOConfig, OnlineDPOTrainer, is_llm_blender_available
|
||||
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
|
||||
|
||||
from .testing_utils import RandomPairwiseJudge
|
||||
|
||||
@ -32,16 +31,17 @@ if is_peft_available():
|
||||
|
||||
class TestOnlineDPOTrainer(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
|
||||
self.reward_tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
|
||||
self.reward_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
|
||||
self.reward_tokenizer.pad_token = self.reward_tokenizer.eos_token
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
self.reward_model_id = "trl-internal-testing/tiny-LlamaForCausalLM-3.2"
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.reward_model_id, num_labels=1)
|
||||
self.reward_tokenizer = AutoTokenizer.from_pretrained(self.reward_model_id)
|
||||
self.reward_tokenizer.pad_token = self.reward_tokenizer.eos_token
|
||||
|
||||
@parameterized.expand([("standard_prompt_only",), ("conversational_prompt_only",)])
|
||||
def test_training(self, config_name):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
|
@ -21,25 +21,27 @@ from transformers import AutoModelForCausalLM, AutoModelForSeq2SeqLM, AutoTokeni
|
||||
from transformers.testing_utils import require_peft
|
||||
|
||||
from trl import ORPOConfig, ORPOTrainer
|
||||
from trl.trainer.utils import SIMPLE_CHAT_TEMPLATE
|
||||
|
||||
|
||||
class ORPOTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
# get t5 as seq2seq example:
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration-correct-vocab"
|
||||
model_id = "trl-internal-testing/tiny-T5ForConditionalGeneration"
|
||||
self.t5_model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.t5_tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
self.t5_tokenizer.chat_template = SIMPLE_CHAT_TEMPLATE
|
||||
|
||||
@parameterized.expand(
|
||||
[
|
||||
("gpt2", "standard_preference"),
|
||||
("qwen", "standard_preference"),
|
||||
("t5", "standard_implicit_prompt_preference"),
|
||||
("gpt2", "conversational_preference"),
|
||||
("qwen", "conversational_preference"),
|
||||
("t5", "conversational_implicit_prompt_preference"),
|
||||
]
|
||||
)
|
||||
@ -59,7 +61,7 @@ class ORPOTrainerTester(unittest.TestCase):
|
||||
|
||||
dummy_dataset = load_dataset("trl-internal-testing/zen", config_name)
|
||||
|
||||
if name == "gpt2":
|
||||
if name == "qwen":
|
||||
model = self.model
|
||||
tokenizer = self.tokenizer
|
||||
elif name == "t5":
|
||||
|
@ -33,7 +33,7 @@ if is_peft_available():
|
||||
@require_peft
|
||||
class PeftModelTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.causal_lm_model_id = "trl-internal-testing/tiny-random-GPTNeoXForCausalLM"
|
||||
self.causal_lm_model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.lora_config = LoraConfig(
|
||||
r=16,
|
||||
lora_alpha=32,
|
||||
@ -74,12 +74,12 @@ class PeftModelTester(unittest.TestCase):
|
||||
|
||||
# Check that the number of trainable parameters is correct
|
||||
nb_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 10273)
|
||||
self.assertEqual(nb_trainable_params, 905)
|
||||
|
||||
# Check that the number of trainable param for the non-peft model is correct
|
||||
non_peft_model = AutoModelForCausalLMWithValueHead.from_pretrained(self.causal_lm_model_id)
|
||||
nb_trainable_params = sum(p.numel() for p in non_peft_model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 99578)
|
||||
self.assertEqual(nb_trainable_params, 2428641)
|
||||
|
||||
def test_create_peft_model_from_config(self):
|
||||
r"""
|
||||
@ -90,13 +90,13 @@ class PeftModelTester(unittest.TestCase):
|
||||
)
|
||||
# Check that the number of trainable parameters is correct
|
||||
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 10273)
|
||||
self.assertEqual(nb_trainable_params, 905)
|
||||
|
||||
causal_lm_model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id)
|
||||
trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(causal_lm_model, peft_config=self.lora_config)
|
||||
# Check that the number of trainable parameters is correct
|
||||
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 10273)
|
||||
self.assertEqual(nb_trainable_params, 905)
|
||||
|
||||
@require_torch_gpu_if_bnb_not_multi_backend_enabled
|
||||
def test_create_bnb_peft_model_from_config(self):
|
||||
@ -110,7 +110,7 @@ class PeftModelTester(unittest.TestCase):
|
||||
)
|
||||
# Check that the number of trainable parameters is correct
|
||||
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 10273)
|
||||
self.assertEqual(nb_trainable_params, 905)
|
||||
self.assertEqual(trl_model.pretrained_model.model.gpt_neox.layers[0].mlp.dense_h_to_4h.__class__, Linear8bitLt)
|
||||
|
||||
causal_lm_model = AutoModelForCausalLM.from_pretrained(
|
||||
@ -119,7 +119,7 @@ class PeftModelTester(unittest.TestCase):
|
||||
trl_model = AutoModelForCausalLMWithValueHead.from_pretrained(causal_lm_model, peft_config=self.lora_config)
|
||||
# Check that the number of trainable parameters is correct
|
||||
nb_trainable_params = sum(p.numel() for p in trl_model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 10273)
|
||||
self.assertEqual(nb_trainable_params, 905)
|
||||
self.assertEqual(trl_model.pretrained_model.model.gpt_neox.layers[0].mlp.dense_h_to_4h.__class__, Linear8bitLt)
|
||||
|
||||
def test_save_pretrained_peft(self):
|
||||
@ -201,4 +201,4 @@ class PeftModelTester(unittest.TestCase):
|
||||
model = AutoModelForCausalLMWithValueHead.from_pretrained(tmp_dir, is_trainable=True)
|
||||
# Check that the number of trainable parameters is correct
|
||||
nb_trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
|
||||
self.assertEqual(nb_trainable_params, 10273)
|
||||
self.assertEqual(nb_trainable_params, 905)
|
||||
|
@ -27,7 +27,9 @@ python examples/scripts/ppo/ppo.py \
|
||||
--per_device_train_batch_size 4 \
|
||||
--gradient_accumulation_steps 1 \
|
||||
--total_episodes 10 \
|
||||
--model_name_or_path EleutherAI/pythia-14m \
|
||||
--model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--missing_eos_penalty 1.0 \
|
||||
--save_strategy no \
|
||||
--stop_token eos
|
||||
@ -53,7 +55,9 @@ python examples/scripts/ppo/ppo.py \
|
||||
--per_device_train_batch_size 4 \
|
||||
--gradient_accumulation_steps 1 \
|
||||
--num_train_epochs 0.003 \
|
||||
--model_name_or_path EleutherAI/pythia-14m \
|
||||
--model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--missing_eos_penalty 1.0 \
|
||||
--save_strategy no \
|
||||
--stop_token eos
|
||||
|
@ -31,10 +31,10 @@ if is_peft_available():
|
||||
|
||||
class RewardTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "hf-internal-testing/tiny-random-LlamaForCausalLM"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.chat_template = "{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}"
|
||||
self.model = AutoModelForSequenceClassification.from_pretrained(self.model_id)
|
||||
self.model.config.pad_token_id = self.tokenizer.pad_token_id
|
||||
|
||||
def test_accuracy_metrics(self):
|
||||
dummy_eval_predictions = EvalPrediction(torch.FloatTensor([[0.1, 0.9], [0.9, 0.1]]), torch.LongTensor([0, 0]))
|
||||
|
@ -33,9 +33,9 @@ python examples/scripts/rloo/rloo.py \
|
||||
--per_device_train_batch_size 4 \
|
||||
--gradient_accumulation_steps 1 \
|
||||
--total_episodes 10 \
|
||||
--model_name_or_path EleutherAI/pythia-14m \
|
||||
--sft_model_path EleutherAI/pythia-14m \
|
||||
--reward_model_path EleutherAI/pythia-14m \
|
||||
--model_name_or_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--sft_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--reward_model_path trl-internal-testing/tiny-Qwen2ForCausalLM-2.5 \
|
||||
--missing_eos_penalty 1.0 \
|
||||
--save_strategy no \
|
||||
--stop_token eos
|
||||
@ -53,15 +53,13 @@ python examples/scripts/rloo/rloo.py \
|
||||
|
||||
class RLOOTrainerTester(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.sft_model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.reward_model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
|
||||
self.policy_model = AutoModelForCausalLM.from_pretrained(self.sft_model_id)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.reward_model_id)
|
||||
self.policy_ref_model = AutoModelForCausalLM.from_pretrained(self.sft_model_id)
|
||||
self.policy_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.model_id)
|
||||
self.policy_ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.sft_model_id, padding_side="left")
|
||||
self.tokenizer.chat_template = "{% for message in messages %}{% if message['role'] == 'user' %}{{ ' ' }}{% endif %}{{ message['content'] }}{% if not loop.last %}{{ ' ' }}{% endif %}{% endfor %}{{ eos_token }}"
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id, padding_side="left")
|
||||
self.tokenizer.add_special_tokens({"pad_token": "[PAD]"})
|
||||
|
||||
def test_rloo_checkpoint(self):
|
||||
@ -73,7 +71,7 @@ class RLOOTrainerTester(unittest.TestCase):
|
||||
report_to="none",
|
||||
)
|
||||
|
||||
dummy_text = {"content": "Hello World!", "role": "user"}
|
||||
dummy_text = [{"content": "Hello World!", "role": "user"}]
|
||||
dummy_data = self.tokenizer.apply_chat_template(dummy_text)
|
||||
dummy_dataset = Dataset.from_dict({"input_ids": dummy_data})
|
||||
|
||||
|
@ -62,7 +62,7 @@ class SFTTrainerTester(unittest.TestCase):
|
||||
r""" """
|
||||
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
@ -1146,7 +1146,7 @@ class SFTTrainerTester(unittest.TestCase):
|
||||
eval_dataset=self.conversational_lm_dataset["test"],
|
||||
)
|
||||
|
||||
self.assertEqual(len(trainer.train_dataset["input_ids"]), 21)
|
||||
self.assertEqual(len(trainer.train_dataset["input_ids"]), 46) # w/ this dataset, we end up with 46 seqs
|
||||
self.assertEqual(len(trainer.eval_dataset["input_ids"]), len(self.conversational_lm_dataset["test"]))
|
||||
|
||||
def test_sft_trainer_eval_packing(self):
|
||||
@ -1171,8 +1171,8 @@ class SFTTrainerTester(unittest.TestCase):
|
||||
eval_dataset=self.conversational_lm_dataset["test"],
|
||||
)
|
||||
|
||||
self.assertEqual(len(trainer.train_dataset["input_ids"]), 21)
|
||||
self.assertEqual(len(trainer.eval_dataset["input_ids"]), 2)
|
||||
self.assertEqual(len(trainer.train_dataset["input_ids"]), 46) # w/ this dataset, we end up with 46 seqs
|
||||
self.assertEqual(len(trainer.eval_dataset["input_ids"]), 5) # w/ this dataset, we end up with 5 seqs
|
||||
|
||||
def test_sft_trainer_no_packing(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
@ -1265,9 +1265,9 @@ class SFTTrainerTester(unittest.TestCase):
|
||||
report_to="none",
|
||||
)
|
||||
tiny_llava = LlavaForConditionalGeneration.from_pretrained(
|
||||
"trl-internal-testing/tiny-random-LlavaForConditionalGeneration"
|
||||
"trl-internal-testing/tiny-LlavaForConditionalGeneration"
|
||||
)
|
||||
processor = AutoProcessor.from_pretrained("trl-internal-testing/tiny-random-LlavaForConditionalGeneration")
|
||||
processor = AutoProcessor.from_pretrained("trl-internal-testing/tiny-LlavaForConditionalGeneration")
|
||||
|
||||
processor.chat_template = """{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}A chat between a curious user and an artificial intelligence assistant. The assistant gives helpful, detailed, and polite answers to the user's questions. {% for message in messages %}{% if message['role'] == 'user' %}USER: {% else %}ASSISTANT: {% endif %}{% for item in message['content'] %}{% if item['type'] == 'text' %}{{ item['text'] }}{% elif item['type'] == 'image' %}<image>{% endif %}{% endfor %}{% if message['role'] == 'user' %} {% else %}{{eos_token}}{% endif %}{% endfor %}{% if add_generation_prompt %}ASSISTANT: {% endif %}"""
|
||||
|
||||
|
@ -48,7 +48,8 @@ from .testing_utils import require_sklearn
|
||||
class TrainerArgTester(unittest.TestCase):
|
||||
@require_sklearn
|
||||
def test_bco(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = BCOConfig(
|
||||
@ -71,7 +72,11 @@ class TrainerArgTester(unittest.TestCase):
|
||||
max_density_ratio=20.0,
|
||||
)
|
||||
trainer = BCOTrainer(
|
||||
model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer
|
||||
model=model_id,
|
||||
ref_model=model_id,
|
||||
args=training_args,
|
||||
train_dataset=dataset,
|
||||
processing_class=tokenizer,
|
||||
)
|
||||
self.assertEqual(trainer.args.max_length, 256)
|
||||
self.assertEqual(trainer.args.max_prompt_length, 64)
|
||||
@ -91,7 +96,8 @@ class TrainerArgTester(unittest.TestCase):
|
||||
self.assertEqual(trainer.args.max_density_ratio, 20.0)
|
||||
|
||||
def test_cpo(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = CPOConfig(
|
||||
@ -113,7 +119,7 @@ class TrainerArgTester(unittest.TestCase):
|
||||
model_init_kwargs={"trust_remote_code": True},
|
||||
dataset_num_proc=4,
|
||||
)
|
||||
trainer = CPOTrainer(model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer)
|
||||
trainer = CPOTrainer(model=model_id, args=training_args, train_dataset=dataset, processing_class=tokenizer)
|
||||
self.assertEqual(trainer.args.max_length, 256)
|
||||
self.assertEqual(trainer.args.max_prompt_length, 64)
|
||||
self.assertEqual(trainer.args.max_completion_length, 64)
|
||||
@ -132,7 +138,8 @@ class TrainerArgTester(unittest.TestCase):
|
||||
self.assertEqual(trainer.args.dataset_num_proc, 4)
|
||||
|
||||
def test_dpo(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = DPOConfig(
|
||||
@ -166,7 +173,11 @@ class TrainerArgTester(unittest.TestCase):
|
||||
discopop_tau=0.1,
|
||||
)
|
||||
trainer = DPOTrainer(
|
||||
model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer
|
||||
model=model_id,
|
||||
ref_model=model_id,
|
||||
args=training_args,
|
||||
train_dataset=dataset,
|
||||
processing_class=tokenizer,
|
||||
)
|
||||
self.assertEqual(trainer.args.beta, 0.5)
|
||||
self.assertEqual(trainer.args.label_smoothing, 0.5)
|
||||
@ -197,7 +208,8 @@ class TrainerArgTester(unittest.TestCase):
|
||||
self.assertEqual(trainer.args.discopop_tau, 0.1)
|
||||
|
||||
def test_kto(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_unpaired_preference", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = KTOConfig(
|
||||
@ -219,7 +231,11 @@ class TrainerArgTester(unittest.TestCase):
|
||||
dataset_num_proc=4,
|
||||
)
|
||||
trainer = KTOTrainer(
|
||||
model="gpt2", ref_model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer
|
||||
model=model_id,
|
||||
ref_model=model_id,
|
||||
args=training_args,
|
||||
train_dataset=dataset,
|
||||
processing_class=tokenizer,
|
||||
)
|
||||
self.assertEqual(trainer.args.max_length, 256)
|
||||
self.assertEqual(trainer.args.max_prompt_length, 64)
|
||||
@ -239,16 +255,17 @@ class TrainerArgTester(unittest.TestCase):
|
||||
|
||||
@parameterized.expand([(False,), (True,)])
|
||||
def test_nash_md(self, mixtures_coef_list):
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
reward_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = NashMDConfig(
|
||||
tmp_dir,
|
||||
mixture_coef=0.5 if not mixtures_coef_list else [0.5, 0.6],
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
|
||||
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
|
||||
trainer = NashMDTrainer(
|
||||
args=training_args,
|
||||
processing_class=tokenizer,
|
||||
@ -261,6 +278,11 @@ class TrainerArgTester(unittest.TestCase):
|
||||
|
||||
@parameterized.expand([(False,), (True,)])
|
||||
def test_online_dpo(self, beta_list):
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
reward_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = OnlineDPOConfig(
|
||||
@ -272,10 +294,6 @@ class TrainerArgTester(unittest.TestCase):
|
||||
loss_type="hinge",
|
||||
dataset_num_proc=4,
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
|
||||
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
|
||||
trainer = OnlineDPOTrainer(
|
||||
model=model,
|
||||
ref_model=ref_model,
|
||||
@ -293,7 +311,8 @@ class TrainerArgTester(unittest.TestCase):
|
||||
self.assertEqual(trainer.args.dataset_num_proc, 4)
|
||||
|
||||
def test_orpo(self):
|
||||
tokenizer = AutoTokenizer.from_pretrained("gpt2")
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = ORPOConfig(
|
||||
@ -311,8 +330,9 @@ class TrainerArgTester(unittest.TestCase):
|
||||
model_init_kwargs={"trust_remote_code": True},
|
||||
dataset_num_proc=4,
|
||||
)
|
||||
|
||||
trainer = ORPOTrainer(model="gpt2", args=training_args, train_dataset=dataset, processing_class=tokenizer)
|
||||
trainer = ORPOTrainer(
|
||||
model=model_id, args=training_args, train_dataset=dataset, processing_class=tokenizer
|
||||
)
|
||||
self.assertEqual(trainer.args.max_length, 256)
|
||||
self.assertEqual(trainer.args.max_prompt_length, 64)
|
||||
self.assertEqual(trainer.args.max_completion_length, 64)
|
||||
@ -321,6 +341,9 @@ class TrainerArgTester(unittest.TestCase):
|
||||
self.assertEqual(trainer.args.label_pad_token_id, -99)
|
||||
|
||||
def test_reward(self):
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_preference", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = RewardConfig(
|
||||
@ -329,8 +352,6 @@ class TrainerArgTester(unittest.TestCase):
|
||||
dataset_num_proc=4,
|
||||
center_rewards_coefficient=0.1,
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
|
||||
trainer = RewardTrainer(
|
||||
model=model,
|
||||
args=training_args,
|
||||
@ -342,6 +363,7 @@ class TrainerArgTester(unittest.TestCase):
|
||||
self.assertEqual(trainer.args.center_rewards_coefficient, 0.1)
|
||||
|
||||
def test_sft(self):
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_language_modeling", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = SFTConfig(
|
||||
@ -358,7 +380,7 @@ class TrainerArgTester(unittest.TestCase):
|
||||
num_of_sequences=32,
|
||||
chars_per_token=4.2,
|
||||
)
|
||||
trainer = SFTTrainer("gpt2", args=training_args, train_dataset=dataset)
|
||||
trainer = SFTTrainer(model_id, args=training_args, train_dataset=dataset)
|
||||
self.assertEqual(trainer.args.dataset_text_field, "dummy_text_field")
|
||||
self.assertEqual(trainer.args.packing, True)
|
||||
self.assertEqual(trainer.args.max_seq_length, 256)
|
||||
@ -374,16 +396,17 @@ class TrainerArgTester(unittest.TestCase):
|
||||
|
||||
@parameterized.expand([(False,), (True,)])
|
||||
def test_xpo(self, alpha_list):
|
||||
model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id)
|
||||
model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
ref_model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
reward_model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=1)
|
||||
dataset = load_dataset("trl-internal-testing/zen", "standard_prompt_only", split="train")
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
training_args = XPOConfig(
|
||||
tmp_dir,
|
||||
alpha=0.5 if not alpha_list else [0.5, 0.6],
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
ref_model = AutoModelForCausalLM.from_pretrained("EleutherAI/pythia-14m")
|
||||
reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
|
||||
tokenizer = AutoTokenizer.from_pretrained("EleutherAI/pythia-14m")
|
||||
trainer = XPOTrainer(
|
||||
args=training_args,
|
||||
processing_class=tokenizer,
|
||||
|
@ -123,7 +123,7 @@ class TestGetPEFTConfig(unittest.TestCase):
|
||||
|
||||
class TestDecodeAndStripPadding(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2-0.5B-Instruct")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
|
||||
def test_example_with_padding(self):
|
||||
inputs = self.tokenizer(["Hello world", "Hello"], padding=True, return_tensors="pt")
|
||||
@ -182,7 +182,7 @@ class TestGenerateModelCard(unittest.TestCase):
|
||||
class TestDataCollatorForChatML(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Initialize the tokenizer
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("codellama/CodeLlama-7b-Instruct-hf")
|
||||
self.tokenizer = AutoTokenizer.from_pretrained("trl-internal-testing/tiny-Qwen2ForCausalLM-2.5")
|
||||
if self.tokenizer.pad_token is None:
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
@ -205,6 +205,8 @@ class TestDataCollatorForChatML(unittest.TestCase):
|
||||
ignore_index=self.ignore_index,
|
||||
)
|
||||
|
||||
# See https://github.com/huggingface/trl/pull/2287#discussion_r1856594421
|
||||
@unittest.skip("This test must be updated.")
|
||||
def test_data_collator_for_chatml(self):
|
||||
# Process the data
|
||||
data = self.collator(self.examples)
|
||||
@ -256,7 +258,7 @@ class TestDataCollatorForChatML(unittest.TestCase):
|
||||
class TestBatchGeneration(unittest.TestCase):
|
||||
def setUp(self):
|
||||
# Initialize the tokenizer
|
||||
self.model_id = "Qwen/Qwen2-0.5B-Instruct"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
|
||||
|
@ -31,10 +31,10 @@ if is_peft_available():
|
||||
|
||||
class TestXPOTrainer(unittest.TestCase):
|
||||
def setUp(self):
|
||||
self.model_id = "trl-internal-testing/dummy-GPT2-correct-vocab"
|
||||
self.model_id = "trl-internal-testing/tiny-Qwen2ForCausalLM-2.5"
|
||||
self.model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.ref_model = AutoModelForCausalLM.from_pretrained(self.model_id)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained("EleutherAI/pythia-14m", num_labels=1)
|
||||
self.reward_model = AutoModelForSequenceClassification.from_pretrained(self.model_id, num_labels=1)
|
||||
self.tokenizer = AutoTokenizer.from_pretrained(self.model_id)
|
||||
self.tokenizer.pad_token = self.tokenizer.eos_token
|
||||
|
||||
|
Reference in New Issue
Block a user