Remove redundant 'None' from docstrings (#4058)

This commit is contained in:
Albert Villanova del Moral
2025-09-11 08:16:34 +02:00
committed by GitHub
parent 7eb7f42372
commit e8b8499f1f
59 changed files with 263 additions and 263 deletions

View File

@ -331,7 +331,7 @@ def replicate_str(string: str, n: int, sep: str = " ") -> str:
for arguments that can be `None` and aren't required:
```python
foo (`Optional[int]`, *optional*, defaults to `None`):
foo (`Optional[int]`, *optional*):
```
* **String Defaults:**

View File

@ -31,7 +31,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/hh-rlhf-helpful-base"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -31,7 +31,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/llava-instruct-mix"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/lm-human-preferences-descriptiveness"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/lm-human-preferences-sentiment"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -32,7 +32,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/math_shepherd"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/prm800k"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/rlaif-v"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/tldr"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/tldr-preference"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -30,7 +30,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/ultrafeedback-prompt"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -34,7 +34,7 @@ class ScriptArguments:
Whether to push the dataset to the Hugging Face Hub.
repo_id (`str`, *optional*, defaults to `"trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness"`):
Hugging Face repository ID to push the dataset to.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of workers to use for dataset processing.
"""

View File

@ -63,7 +63,7 @@ class ScriptArguments:
judge_model (`str`, *optional*, defaults to `"meta-llama/Meta-Llama-3-70B-Instruct"`):
Model name or path to the model to use as a judge. E.g., 'gpt-3.5-turbo-0125' or
'meta-llama/Meta-Llama-3-70B-Instruct'.
num_examples (`int` or `None`, *optional*, defaults to `None`):
num_examples (`int`, *optional*):
Number of examples to evaluate.
"""

View File

@ -149,7 +149,7 @@ def ignore_warnings(message: str = None, category: type[Warning] = Warning) -> c
Decorator to ignore warnings with a specific message and/or category.
Args:
message (`str`, *optional*, defaults to `None`):
message (`str`, *optional*):
Regex pattern for the warning message to ignore. If `None`, all messages are ignored.
category (`type[Warning]`, *optional*, defaults to `Warning`):
Warning class to ignore. Defaults to `Warning`, which ignores all warnings.

View File

@ -242,7 +242,7 @@ def maybe_apply_chat_template(
messages, where each message is a dictionary with keys `"role"` and `"content"`.
tokenizer (`PreTrainedTokenizerBase`):
Tokenizer to apply the chat template with.
tools (`list[Union[dict, Callable]]` or `None`, *optional*, defaults to `None`):
tools (`list[Union[dict, Callable]]`, *optional*):
A list of tools (callable functions) that will be accessible to the model. If the template does not support
function calling, this argument will have no effect.
**template_kwargs (`Any`, *optional*):
@ -300,9 +300,9 @@ def unpair_preference_dataset(
dataset (`Dataset` or `DatasetDict`):
Preference dataset to unpair. The dataset must have columns `"chosen"`, `"rejected"` and optionally
`"prompt"`.
num_proc (`int` or `None`, *optional*, defaults to `None`):
num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
desc (`str` or `None`, *optional*, defaults to `None`):
desc (`str`, *optional*):
Meaningful description to be displayed alongside with the progress bar while mapping examples.
Returns:
@ -343,9 +343,9 @@ def maybe_unpair_preference_dataset(
dataset (`Dataset` or `DatasetDict`):
Preference dataset to unpair. The dataset must have columns `"chosen"`, `"rejected"` and optionally
`"prompt"`.
num_proc (`int` or `None`, *optional*, defaults to `None`):
num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
desc (`str` or `None`, *optional*, defaults to `None`):
desc (`str`, *optional*):
Meaningful description to be displayed alongside with the progress bar while mapping examples.
Returns:
@ -644,7 +644,7 @@ def pack_dataset(
middle.
- `"wrapped"`: Faster but more aggressive. Ignores sequence boundaries and will cut sequences in the middle
to completely fill each packed sequence with data.
map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
map_kwargs (`dict`, *optional*):
Additional keyword arguments to pass to the dataset's map method when packing examples.
Returns:
@ -693,7 +693,7 @@ def truncate_dataset(
Dataset to truncate.
max_length (`int`):
Maximum sequence length to truncate to.
map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
map_kwargs (`dict`, *optional*):
Additional keyword arguments to pass to the dataset's map method when truncating examples.
Returns:

View File

@ -51,7 +51,7 @@ class VLLMClient:
weights in a distributed setting. Before using it, start the vLLM server with `trl vllm-serve`.
Args:
base_url (`str` or `None`, *optional*, defaults to `None`):
base_url (`str`, *optional*):
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `host` and `server_port` are
ignored.
host (`str`, *optional*, defaults to `"0.0.0.0"`):
@ -185,7 +185,7 @@ class VLLMClient:
Args:
prompts (`list[str]`):
List of text prompts for which the model will generate completions.
images (`list[PIL.Image]` or `None`, *optional*, defaults to `None`):
images (`list[PIL.Image]`, *optional*):
List of PIL Images to send along with the prompts.
n (`int`, *optional*, defaults to `1`):
Number of completions to generate for each prompt.
@ -201,9 +201,9 @@ class VLLMClient:
Minimum probability for sampling.
max_tokens (`int`, *optional*, defaults to `16`):
Maximum number of tokens to generate for each prompt.
guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
guided_decoding_regex (`str`, *optional*):
Regular expression to guide the decoding process.
generation_kwargs (`dict` or `None`, *optional*, defaults to `None`):
generation_kwargs (`dict`, *optional*):
Additional generation parameters to pass to the vLLM `SamplingParams`. This can include parameters like
`seed`, `frequency_penalty`, etc. If it contains keys that conflict with the other parameters, they
will override them.

View File

@ -564,7 +564,7 @@ def pipeline_step_with_grad(
Args:
pipeline (`StableDiffusionPipeline`): Pipeline to be used for image generation.
prompt (`str` or `list[str]`, *optional*, defaults to `None`):
prompt (`str` or `list[str]`, *optional*):
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`
instead.
height (`int`, *optional*, defaults to `pipeline.unet.config.sample_size * pipeline.vae_scale_factor`):

View File

@ -62,10 +62,10 @@ class GRPOScriptArguments(ScriptArguments):
Script arguments for the GRPO training script.
Args:
reward_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
reward_model_name_or_path (`str`, *optional*):
Reward model id of a pretrained model hosted inside a model repo on huggingface.co or local path to a
directory containing model weights saved using [`~transformers.PreTrainedModel.save_pretrained`].
reward_funcs (`list[str]` or `None`, *optional*, defaults to `None`):
reward_funcs (`list[str]`, *optional*):
Reward functions to use. Supported values are:
- `"think_format_reward"`

View File

@ -56,10 +56,10 @@ class RLOOScriptArguments(ScriptArguments):
Script arguments for the RLOO training script.
Args:
reward_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
reward_model_name_or_path (`str`, *optional*):
Reward model id of a pretrained model hosted inside a model repo on huggingface.co or local path to a
directory containing model weights saved using [`~transformers.PreTrainedModel.save_pretrained`].
reward_funcs (`list[str]` or `None`, *optional*, defaults to `None`):
reward_funcs (`list[str]`, *optional*):
Reward functions to use. It can be either one of `"think_format_reward"`; or a dotted import path " (e.g.,
`'my_lib.rewards.custom_reward'`).
"""

View File

@ -45,17 +45,17 @@ class DatasetConfig:
Parameters:
path (`str`):
Path or name of the dataset.
name (`str`, *optional*, defaults to `None`):
name (`str`, *optional*):
Defining the name of the dataset configuration.
data_dir (`str`, *optional*, defaults to `None`):
data_dir (`str`, *optional*):
Defining the `data_dir` of the dataset configuration. If specified for the generic builders(csv, text etc.)
or the Hub datasets and `data_files` is `None`, the behavior is equal to passing `os.path.join(data_dir,
**)` as `data_files` to reference all the files in a directory.
data_files (`str` or `Sequence` or `Mapping`, *optional*, defaults to `None`):
data_files (`str` or `Sequence` or `Mapping`, *optional*):
Path(s) to source data file(s).
split (`str`, *optional*, defaults to `"train"`):
Which split of the data to load.
columns (`list[str]`, *optional*, defaults to `None`):
columns (`list[str]`, *optional*):
List of column names to select from the dataset. If `None`, all columns are selected.
"""
@ -81,7 +81,7 @@ class DatasetMixtureConfig:
List of dataset configurations to include in the mixture.
streaming (`bool`, *optional*, defaults to `False`):
Whether to stream the datasets. If `True`, the datasets will be loaded in streaming mode.
test_split_size (`float` or `None`, *optional*, defaults to `None`):
test_split_size (`float`, *optional*):
Size of the test split. Refer to the `test_size` parameter in the [`~datasets.train_test_split`] function
for more details. If `None`, the dataset will not be split into train and test sets.
@ -137,9 +137,9 @@ class ScriptArguments:
Arguments common to all scripts.
Args:
dataset_name (`str`, or `None`, *optional*, defaults to `None`):
dataset_name (`str`,, *optional*):
Path or name of the dataset to load. If `datasets` is provided, this will be ignored.
dataset_config (`str` or `None`, *optional*, defaults to `None`):
dataset_config (`str`, *optional*):
Dataset configuration name. Corresponds to the `name` argument of the [`~datasets.load_dataset`] function.
If `datasets` is provided, this will be ignored.
dataset_train_split (`str`, *optional*, defaults to `"train"`):
@ -230,7 +230,7 @@ class TrlParser(HfArgumentParser):
configurations, while also supporting configuration file loading and environment variable management.
Args:
dataclass_types (`Union[DataClassType, Iterable[DataClassType]]` or `None`, *optional*, defaults to `None`):
dataclass_types (`Union[DataClassType, Iterable[DataClassType]]`, *optional*):
Dataclass types to use for argument parsing.
**kwargs:
Additional keyword arguments passed to the [`transformers.HfArgumentParser`] constructor.

View File

@ -173,7 +173,7 @@ class ScriptArguments:
Args:
model (`str`):
Model name or path to load the model from.
revision (`str` or `None`, *optional*, defaults to `None`):
revision (`str`, *optional*):
Revision to use for the model. If not specified, the default branch will be used.
tensor_parallel_size (`int`, *optional*, defaults to `1`):
Number of tensor parallel workers to use.
@ -191,11 +191,11 @@ class ScriptArguments:
dtype (`str`, *optional*, defaults to `"auto"`):
Data type to use for vLLM generation. If set to `"auto"`, the data type will be automatically determined
based on the model configuration. Find the supported values in the vLLM documentation.
max_model_len (`int` or `None`, *optional*, defaults to `None`):
max_model_len (`int`, *optional*):
If set, the `max_model_len` to use for vLLM. This can be useful when running with reduced
`vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model
context size, which might be much larger than the KV cache, leading to inefficiencies.
enable_prefix_caching (`bool` or `None`, *optional*, defaults to `None`):
enable_prefix_caching (`bool`, *optional*):
Whether to enable prefix caching in vLLM. If set to `True`, ensure that the model and the hardware support
this feature.
enforce_eager (`bool`, *optional*, defaults to `False`):

View File

@ -38,7 +38,7 @@ class AlignPropConfig:
Name of this run.
seed (`int`, *optional*, defaults to `0`):
Random seed for reproducibility.
log_with (`str` or `None`, *optional*, defaults to `None`):
log_with (`str`, *optional*):
Log with either `"wandb"` or `"tensorboard"`. Check
[tracking](https://huggingface.co/docs/accelerate/usage_guides/tracking) for more details.
log_image_freq (`int`, *optional*, defaults to `1`):
@ -89,7 +89,7 @@ class AlignPropConfig:
Number of gradient accumulation steps.
train_max_grad_norm (`float`, *optional*, defaults to `1.0`):
Maximum gradient norm for gradient clipping.
negative_prompts (`str` or `None`, *optional*, defaults to `None`):
negative_prompts (`str`, *optional*):
Comma-separated list of prompts to use as negative examples.
truncated_backprop_rand (`bool`, *optional*, defaults to `True`):
If `True`, randomized truncation to different diffusion timesteps is used.

View File

@ -416,11 +416,11 @@ class AlignPropTrainer(PyTorchModelHubMixin):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -37,7 +37,7 @@ class BCOConfig(TrainingArguments):
to use the default data collator.
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
Maximum length of the prompt. This argument is required if you want to use the default data collator.
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
max_completion_length (`int`, *optional*):
Maximum length of the completion. This argument is required if you want to use the default data collator
and your model is an encoder-decoder.
beta (`float`, *optional*, defaults to `0.1`):
@ -45,7 +45,7 @@ class BCOConfig(TrainingArguments):
reference model.
label_pad_token_id (`int`, *optional*, defaults to `-100`):
Label pad token id. This argument is required if you want to use the default data collator.
padding_value (`int` or `None`, *optional*, defaults to `None`):
padding_value (`int`, *optional*):
Padding value to use. If `None`, the padding value of the tokenizer is used.
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
@ -55,19 +55,19 @@ class BCOConfig(TrainingArguments):
generate_during_eval (`bool`, *optional*, defaults to `False`):
If `True`, generates and logs completions from both the model and the reference model to W&B or Comet
during evaluation.
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
is_encoder_decoder (`bool`, *optional*):
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
you need to specify if the model returned by the callable is an encoder-decoder model.
precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
Whether to precompute reference model log probabilities for training and evaluation datasets. This is
useful when training without the reference model to reduce the total GPU memory needed.
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
string.
ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
ref_model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
from a string.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
prompt_sample_size (`int`, *optional*, defaults to `1024`):
Number of prompts that are fed to density ratio classifier.

View File

@ -296,11 +296,11 @@ class BCOTrainer(Trainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
data_collator (`transformers.DataCollator`, *optional*):
The data collator to use for training. If None is specified, the default data collator
(`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
sequences in the batch, given a dataset of paired sequences.
@ -1461,7 +1461,7 @@ class BCOTrainer(Trainer):
Args:
logs (`dict[str, float]`):
The values to log.
start_time (`float` or `None`, *optional*, defaults to `None`):
start_time (`float`, *optional*):
Start time of the training.
"""
# logs either has 'loss' or 'eval_loss'
@ -1508,11 +1508,11 @@ class BCOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -254,7 +254,7 @@ class WinRateCallback(TrainerCallback):
otherwise, it defaults to using the initial model.
generation_config (`GenerationConfig`, *optional*):
The generation config to use for generating completions.
num_prompts (`int` or `None`, *optional*, defaults to `None`):
num_prompts (`int`, *optional*):
The number of prompts to generate completions for. If not provided, defaults to the number of examples in
the evaluation dataset.
shuffle_order (`bool`, *optional*, defaults to `True`):
@ -439,10 +439,10 @@ class LogCompletionsCallback(TrainerCallback):
column containing the prompts for generating completions.
generation_config (`GenerationConfig`, *optional*):
The generation config to use for generating completions.
num_prompts (`int` or `None`, *optional*):
num_prompts (`int`, *optional*):
The number of prompts to generate completions for. If not provided, defaults to the number of examples in
the evaluation dataset.
freq (`int` or `None`, *optional*):
freq (`int`, *optional*):
The frequency at which to log completions. If not provided, defaults to the trainer's `eval_steps`.
"""
@ -520,7 +520,7 @@ class MergeModelCallback(TrainerCallback):
on a merge configuration.
Args:
merge_config ([`MergeConfig`], *optional*, defaults to `None`):
merge_config ([`MergeConfig`], *optional*):
Configuration used for the merging process. If not provided, the default [`MergeConfig`] is used.
merge_at_every_checkpoint (`bool`, *optional*, defaults to `False`):
Whether to merge the model at every checkpoint.

View File

@ -37,7 +37,7 @@ class CPOConfig(TrainingArguments):
to use the default data collator.
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
Maximum length of the prompt. This argument is required if you want to use the default data collator.
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
max_completion_length (`int`, *optional*):
Maximum length of the completion. This argument is required if you want to use the default data collator
and your model is an encoder-decoder.
beta (`float`, *optional*, defaults to `0.1`):
@ -70,20 +70,20 @@ class CPOConfig(TrainingArguments):
loss types.
label_pad_token_id (`int`, *optional*, defaults to `-100`):
Label pad token id. This argument is required if you want to use the default data collator.
padding_value (`int` or `None`, *optional*, defaults to `None`):
padding_value (`int`, *optional*):
Padding value to use. If `None`, the padding value of the tokenizer is used.
truncation_mode (`str`,*optional*, defaults to `"keep_end"`):
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
This argument is required if you want to use the default data collator.
generate_during_eval (`bool`, *optional*, defaults to `False`):
If `True`, generates and logs completions from the model to W&B or Comet during evaluation.
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
is_encoder_decoder (`bool`, *optional*):
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
you need to specify if the model returned by the callable is an encoder-decoder model.
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
string.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
"""

View File

@ -90,7 +90,7 @@ class CPOTrainer(Trainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
@ -1027,7 +1027,7 @@ class CPOTrainer(Trainer):
Args:
logs (`dict[str, float]`):
The values to log.
start_time (`float` or `None`, *optional*, defaults to `None`):
start_time (`float`, *optional*):
Start time of the training.
"""
# logs either has 'loss' or 'eval_loss'
@ -1080,11 +1080,11 @@ class CPOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -38,7 +38,7 @@ class DDPOConfig:
Name of this run.
seed (`int`, *optional*, defaults to `0`):
Random seed.
log_with (`Literal["wandb", "tensorboard"]]` or `None`, *optional*, defaults to `None`):
log_with (`Literal["wandb", "tensorboard"]]`, *optional*):
Log with either 'wandb' or 'tensorboard', check
https://huggingface.co/docs/accelerate/usage_guides/tracking for more details.
tracker_kwargs (`Dict`, *optional*, defaults to `{}`):

View File

@ -618,11 +618,11 @@ class DDPOTrainer(PyTorchModelHubMixin):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -46,15 +46,15 @@ class DPOConfig(TrainingArguments):
Parameters:
> Parameters that control the model and reference model
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `model` argument of the
[`DPOTrainer`] is provided as a string.
ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
ref_model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `ref_model` argument of the
[`DPOTrainer`] is provided as a string.
model_adapter_name (`str` or `None`, *optional*, defaults to `None`):
model_adapter_name (`str`, *optional*):
Name of the train target PEFT adapter, when using LoRA with multiple adapters.
ref_adapter_name (`str` or `None`, *optional*, defaults to `None`):
ref_adapter_name (`str`, *optional*):
Name of the reference PEFT adapter, when using LoRA with multiple adapters.
force_use_ref_model (`bool`, *optional*, defaults to `False`):
If you provide a PEFT model as the active model and wish to use a different model for the `ref_model`, set
@ -68,15 +68,15 @@ class DPOConfig(TrainingArguments):
> Parameters that control the data preprocessing
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
padding_value (`int` or `None`, *optional*, defaults to `None`):
padding_value (`int`, *optional*):
Padding value to use. If `None`, the padding value of the tokenizer is used.
label_pad_token_id (`int`, *optional*, defaults to `-100`):
Padding value to use for labels.
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
Maximum length of the prompt.
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
max_completion_length (`int`, *optional*):
Maximum length of the completion.
max_length (`int` or `None`, *optional*, defaults to `1024`):
Maximum length of the full sequence (prompt + completion).
@ -93,11 +93,11 @@ class DPOConfig(TrainingArguments):
training without needing the reference model during training, which can help reduce GPU memory usage. If
set to `False` (default), the reference model will be used during training to compute log probabilities
on-the-fly.
precompute_ref_batch_size (`int` or `None`, *optional*, defaults to `None`):
precompute_ref_batch_size (`int`, *optional*):
Batch size to use when precomputing reference model log probabilities. This can be set higher than the
training batch size to speed up preprocessing. If `None`, defaults to `per_device_train_batch_size` for
training and `per_device_eval_batch_size` for evaluation.
tools (`Optional[list[Union[dict, Callable]]]`, *optional*, defaults to `None`):
tools (`Optional[list[Union[dict, Callable]]]`, *optional*):
List of tools (callable functions) that will be accessible to the model. If the template does not support
function calling, this argument will have no effect.
@ -151,11 +151,11 @@ class DPOConfig(TrainingArguments):
DPO](https://huggingface.co/papers/2403.00409) paper that should be between `0.0` and `0.5`.
use_weighting (`bool`, *optional*, defaults to `False`):
Whether to weight the loss as done in the [WPO paper](https://huggingface.co/papers/2406.11827).
rpo_alpha (`float`, *optional*, defaults to `None`):
rpo_alpha (`float`, *optional*):
α parameter from the [RPO paper](https://huggingface.co/papers/2404.19733) (v3), which controls the
weighting of the NLL term in the loss. If `None`, no weighting is applied and the loss is the same as the
DPO loss. The paper recommends `rpo_alpha=1.0`.
ld_alpha (`float` or `None`, *optional*, defaults to `None`):
ld_alpha (`float`, *optional*):
α parameter from the [LD-DPO paper](https://huggingface.co/papers/2409.06411), which controls the weighting
of the verbose token log-probabilities in responses. If `None`, no weighting is applied to the verbose
part, and the loss is equivalent to the standard DPO loss. The paper recommends setting `ld_alpha` between
@ -163,7 +163,7 @@ class DPOConfig(TrainingArguments):
discopop_tau (`float`, *optional*, defaults to `0.05`):
τ/temperature parameter from the [DiscoPOP](https://huggingface.co/papers/2406.08414) paper, which controls
the shape of log ratio modulated loss. The paper recommends the default value `discopop_tau=0.05`.
loss_weights (`list[float]` or `None`, *optional*, defaults to `None`):
loss_weights (`list[float]`, *optional*):
List of loss weights for multi-loss combinations. Used when combining multiple loss types. Example: `[0.8,
0.2, 1.0]` for [MPO](https://huggingface.co/papers/2411.10442). If not provided, defaults to equal weights
(`1.0`) for all loss types.

View File

@ -203,7 +203,7 @@ class DPOTrainer(Trainer):
Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
and loss. If no reference model is provided, the trainer will create a reference model with the same
architecture as the model to be optimized.
args ([`DPOConfig`], *optional*, defaults to `None`):
args ([`DPOConfig`], *optional*):
Configuration for this trainer. If `None`, a default configuration is used.
data_collator (`DataCollator`, *optional*):
Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
@ -217,7 +217,7 @@ class DPOTrainer(Trainer):
and content).
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If `None`, the processing class is loaded from the model's name
with [`~transformers.AutoTokenizer.from_pretrained`].
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
@ -226,7 +226,7 @@ class DPOTrainer(Trainer):
`True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered
after the last eval batch to signal that the function needs to calculate and return the global summary
statistics rather than accumulating the batch-level statistics.
callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
@ -235,16 +235,16 @@ class DPOTrainer(Trainer):
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*):
A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
`args`. Incompatible with the `optimizers` argument.
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*):
A function that preprocess the logits right before caching them at each evaluation step. Must take two
tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
by this function will be reflected in the predictions received by `compute_metrics`.
Note that the labels (second parameter) will be `None` if the dataset does not have them.
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
"""
@ -1929,7 +1929,7 @@ class DPOTrainer(Trainer):
Args:
logs (`dict[str, float]`):
The values to log.
start_time (`float` or `None`, *optional*, defaults to `None`):
start_time (`float`, *optional*):
Start time of the training.
"""
# logs either has 'loss' or 'eval_loss'
@ -1959,11 +1959,11 @@ class DPOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -39,10 +39,10 @@ class GKDConfig(SFTConfig):
beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence.
max_new_tokens (`int`, *optional*, defaults to `128`):
Maximum number of tokens to generate per completion.
teacher_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
teacher_model_name_or_path (`str`, *optional*):
Model name or path of the teacher model. If `None`, the teacher model will be the same as the model being
trained.
teacher_model_init_kwargs (`dict[str, Any]]` or `None`, *optional*, defaults to `None`):
teacher_model_init_kwargs (`dict[str, Any]]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model
from a string.
disable_dropout (`bool`, *optional*, defaults to `True`):

View File

@ -435,11 +435,11 @@ class GKDTrainer(SFTTrainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -34,7 +34,7 @@ class GRPOConfig(TrainingArguments):
Parameters:
> Parameters that control the model and reference model
model_init_kwargs (`str`, `dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`str`, `dict[str, Any]`, *optional*):
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
argument of the [`GRPOTrainer`] is provided as a string.
disable_dropout (`bool`, *optional*, defaults to `False`):
@ -63,11 +63,11 @@ class GRPOConfig(TrainingArguments):
> Parameters that control generation
generation_batch_size: (`int` or `None`, *optional*, defaults to `None`):
generation_batch_size: (`int`, *optional*):
Batch size to use for generation. If `None`, it defaults to the effective training batch size:
`per_device_train_batch_size * num_processes * steps_per_generation`. In other words, there is one
generation batch processed per optimization step. Mutually exclusive with `steps_per_generation`.
steps_per_generation: (`int` or `None`, *optional*, defaults to `None`):
steps_per_generation: (`int`, *optional*):
Number of steps per generation. If `None`, it defaults to `gradient_accumulation_steps`. Mutually exclusive
with `generation_batch_size`.
temperature (`float`, defaults to `1.0`):
@ -75,10 +75,10 @@ class GRPOConfig(TrainingArguments):
top_p (`float`, *optional*, defaults to `1.0`):
Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
`1.0` to consider all tokens.
top_k (`int` or `None`, *optional*, defaults to `None`):
top_k (`int`, *optional*):
Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
disabled and all tokens are considered.
min_p (`float` or `None`, *optional*, defaults to `None`):
min_p (`float`, *optional*):
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
repetition_penalty (`float`, *optional*, defaults to `1.0`):
@ -89,9 +89,9 @@ class GRPOConfig(TrainingArguments):
Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers`
paged implementation will be used for generation instead of the default padded implementation. This
parameter is only effective when `use_vllm` is set to `False`.
cache_implementation (`str` or `None`, *optional*, defaults to `None`):
cache_implementation (`str`, *optional*):
Implementation of the cache method for faster generation when `use_vllm` is set to `False`.
generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
generation_kwargs (`dict[str, Any]`, *optional*):
Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if
using vLLM) when sampling completions. This can be used to further customize the generation behavior, such
as setting `suppress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation
@ -114,12 +114,12 @@ class GRPOConfig(TrainingArguments):
Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use
the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model
implementation.
vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
vllm_guided_decoding_regex (`str`, *optional*):
Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
> Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`)
vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
vllm_server_base_url (`str`, *optional*):
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and
`vllm_server_port` are ignored.
vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
@ -153,11 +153,11 @@ class GRPOConfig(TrainingArguments):
Number of iterations per batch (denoted as μ in the algorithm).
epsilon (`float`, *optional*, defaults to `0.2`):
Epsilon value for clipping.
delta (`float` or `None`, *optional*, defaults to `None`):
delta (`float`, *optional*):
Enables the upper clipping bound in two-sided GRPO loss when set to a float. If `None` (default), standard
GRPO clipping is used. Recommended to be greater than `1 + ε` when enabled. This method is introduced in
the [INTELLECT-2 tech report](https://huggingface.co/papers/2505.07291).
epsilon_high (`float` or `None`, *optional*, defaults to `None`):
epsilon_high (`float`, *optional*):
Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
importance_sampling_level (`str`, *optional*, defaults to `"token"`):
@ -166,7 +166,7 @@ class GRPOConfig(TrainingArguments):
log-probability ratios across valid tokens to produce a single ratio per sequence. The [GSPO
paper](https://huggingface.co/papers/2507.18071) shows that sequence-level sampling often yields more
stable training and better alignment with sequence-level rewards.
reward_weights (`list[float]` or `None`, *optional*, defaults to `None`):
reward_weights (`list[float]`, *optional*):
Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are
weighted equally with weight `1.0`.
scale_rewards (`str` or `bool`, *optional*, defaults to `"group"`):
@ -235,7 +235,7 @@ class GRPOConfig(TrainingArguments):
log_completions (`bool`, *optional*, defaults to `False`):
Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`.
num_completions_to_print (`int` or `None`, *optional*, defaults to `None`):
num_completions_to_print (`int`, *optional*):
Number of completions to print with `rich`. If `None`, all completions are logged.
wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`):
Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts

View File

@ -162,7 +162,7 @@ class GRPOTrainer(Trainer):
reward function's signature.
- A list of reward functions, where each item can independently be any of the above types. Mixing different
types within the list (e.g., a string model ID and a custom reward function) is allowed.
args ([`GRPOConfig`], *optional*, defaults to `None`):
args ([`GRPOConfig`], *optional*):
Configuration for this trainer. If `None`, a default configuration is used.
train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is
@ -173,12 +173,12 @@ class GRPOTrainer(Trainer):
and content).
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. The padding side must be set to "left". If `None`, the
processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A
padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token,
`tokenizer.eos_token` will be used as the default.
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
- A single processing class: Used when `reward_funcs` contains only one reward function.
@ -188,7 +188,7 @@ class GRPOTrainer(Trainer):
[`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward
functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes`
are ignored.
callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
@ -197,7 +197,7 @@ class GRPOTrainer(Trainer):
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
"""
@ -1825,11 +1825,11 @@ class GRPOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -40,13 +40,13 @@ class IterativeSFTConfig(TrainingArguments):
Parameters:
> Parameters that control the model
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
argument of the [`IterativeSFTTrainer`] is provided as a string.
> Parameters that control the data preprocessing
max_length (`int` or `None`, *optional*, defaults to `None`):
max_length (`int`, *optional*):
Maximum length of the tokenized sequence. Sequences longer than `max_length` are truncated.
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
The truncation mode to use, either `"keep_end"` or `"keep_start"`.

View File

@ -74,7 +74,7 @@ class IterativeSFTTrainer(Trainer):
using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
`args.model_init_kwargs`.
- A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
args ([`IterativeSFTConfig`], *optional*, defaults to `None`):
args ([`IterativeSFTConfig`], *optional*):
Configuration for this trainer. If `None`, a default configuration is used.
data_collator (`DataCollator`, *optional*):
Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
@ -83,7 +83,7 @@ class IterativeSFTTrainer(Trainer):
tokenizer.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If `None`, the processing class is loaded from the model's name
with [`~transformers.AutoTokenizer.from_pretrained`].
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
@ -457,11 +457,11 @@ class IterativeSFTTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -297,7 +297,7 @@ class HfPairwiseJudge(BasePairwiseJudge):
Model to use for the judge.
token (`str`, *optional*):
Hugging Face API token to use for the [`huggingface_hub.InferenceClient`].
system_prompt (`str` or `None`, *optional*, defaults to `None`):
system_prompt (`str`, *optional*):
The system prompt to be used for the judge. If not provided, a default prompt is used. Note that the system
prompt should contain the following placeholders: `{prompt}`, `{response0}`, and `{response1}`. Also, the
inference is called with `max_tokens=1`, consequently the system prompt should ask for a single token
@ -351,7 +351,7 @@ class OpenAIPairwiseJudge(BasePairwiseJudge):
Args:
model (`str`, *optional*, defaults to `"gpt-4-turbo-preview"`):
Model to use for the judge.
system_prompt (`str` or `None`, *optional*, defaults to `None`):
system_prompt (`str`, *optional*):
System prompt to be used for the judge. If not provided, a default prompt is used. Note that the system
prompt should contain the following placeholders: `{prompt}`, `{response0}`, and `{response1}`. Also, the
inference is called with `max_tokens=1`, consequently the system prompt should ask for a single token

View File

@ -37,7 +37,7 @@ class KTOConfig(TrainingArguments):
to use the default data collator.
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
Maximum length of the prompt. This argument is required if you want to use the default data collator.
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
max_completion_length (`int`, *optional*):
Maximum length of the completion. This argument is required if you want to use the default data collator
and your model is an encoder-decoder.
beta (`float`, *optional*, defaults to `0.1`):
@ -56,7 +56,7 @@ class KTOConfig(TrainingArguments):
Undesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs.
label_pad_token_id (`int`, *optional*, defaults to `-100`):
Label pad token id. This argument is required if you want to use the default data collator.
padding_value (`int` or `None`, *optional*, defaults to `None`):
padding_value (`int`, *optional*):
Padding value to use. If `None`, the padding value of the tokenizer is used.
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
@ -64,19 +64,19 @@ class KTOConfig(TrainingArguments):
generate_during_eval (`bool`, *optional*, defaults to `False`):
If `True`, generates and logs completions from both the model and the reference model to W&B or Comet
during evaluation.
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
is_encoder_decoder (`bool`, *optional*):
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
you need to specify if the model returned by the callable is an encoder-decoder model.
precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
Whether to precompute reference model log probabilities for training and evaluation datasets. This is
useful when training without the reference model to reduce the total GPU memory needed.
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
string.
ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
ref_model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
from a string.
dataset_num_proc: (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc: (`int`, *optional*):
Number of processes to use for processing the dataset.
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model and reference model.

View File

@ -292,11 +292,11 @@ class KTOTrainer(Trainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
data_collator (`transformers.DataCollator`, *optional*):
The data collator to use for training. If None is specified, the default data collator
(`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
sequences in the batch, given a dataset of paired sequences.
@ -1641,7 +1641,7 @@ class KTOTrainer(Trainer):
Args:
logs (`dict[str, float]`):
The values to log.
start_time (`float` or `None`, *optional*, defaults to `None`):
start_time (`float`, *optional*):
Start time of the training.
"""
# logs either has 'loss' or 'eval_loss'
@ -1688,11 +1688,11 @@ class KTOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -27,11 +27,11 @@ class ModelConfig:
command line.
Parameters:
model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
model_name_or_path (`str`, *optional*):
Model checkpoint for weights initialization.
model_revision (`str`, *optional*, defaults to `"main"`):
Specific model version to use. It can be a branch name, a tag name, or a commit id.
dtype (`Literal["auto", "bfloat16", "float16", "float32"]` or `None`, *optional*, defaults to `None`):
dtype (`Literal["auto", "bfloat16", "float16", "float32"]`, *optional*):
Override the default `torch.dtype` and load the model under this dtype. Possible values are
- `"bfloat16"`: `torch.bfloat16`
@ -43,7 +43,7 @@ class ModelConfig:
Whether to allow for custom models defined on the Hub in their own modeling files. This option should only
be set to `True` for repositories you trust and in which you have read the code, as it will execute code
present on the Hub on your local machine.
attn_implementation (`str` or `None`, *optional*, defaults to `None`):
attn_implementation (`str`, *optional*):
Which attention implementation to use. You can run `--attn_implementation=flash_attention_2`, in which case
you must install this manually by running `pip install flash-attn --no-build-isolation`.
use_peft (`bool`, *optional*, defaults to `False`):
@ -54,11 +54,11 @@ class ModelConfig:
LoRA alpha.
lora_dropout (`float`, *optional*, defaults to `0.05`):
LoRA dropout.
lora_target_modules (`Union[str, list[str]]` or `None`, *optional*, defaults to `None`):
lora_target_modules (`Union[str, list[str]]`, *optional*):
LoRA target modules.
lora_target_parameters (`Union[str, list[str]]` or `None`, *optional*, defaults to `None`):
lora_target_parameters (`Union[str, list[str]]`, *optional*):
List of target parameters for LoRA.
lora_modules_to_save (`list[str]` or `None`, *optional*, defaults to `None`):
lora_modules_to_save (`list[str]`, *optional*):
Model layers to unfreeze & train.
lora_task_type (`str`, *optional*, defaults to `"CAUSAL_LM"`):
Task type to pass for LoRA (use `"SEQ_CLS"` for reward modeling).

View File

@ -88,7 +88,7 @@ class NashMDTrainer(OnlineDPOTrainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
@ -507,11 +507,11 @@ class NashMDTrainer(OnlineDPOTrainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -33,9 +33,9 @@ class OnlineDPOConfig(TrainingArguments):
command line.
Parameters:
reward_model_path (`str` or `None`, *optional*, defaults to `None`):
reward_model_path (`str`, *optional*):
Path to the reward model. Either `judge` or `reward_model_path` must be set, but not both.
judge (`str` or `None`, *optional*, defaults to `None`):
judge (`str`, *optional*):
Name of the judge to use. Either `judge` or `reward_model_path` must be set, but not both.
max_new_tokens (`int`, *optional*, defaults to `64`):
Maximum number of tokens to generate per completion.
@ -45,7 +45,7 @@ class OnlineDPOConfig(TrainingArguments):
possible.
temperature (`float`, *optional*, defaults to `0.9`):
Temperature for sampling. The higher the temperature, the more random the completions.
missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
missing_eos_penalty (`float`, *optional*):
Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
value. This parameter only works when using `reward_funcs` and not when using `judge`.
@ -60,7 +60,7 @@ class OnlineDPOConfig(TrainingArguments):
- `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
- `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model and reference model.
@ -70,10 +70,10 @@ class OnlineDPOConfig(TrainingArguments):
top_p (`float`, *optional*, defaults to `1.0`):
Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
`1.0` to consider all tokens.
top_k (`int` or `None`, *optional*, defaults to `None`):
top_k (`int`, *optional*):
Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
disabled and all tokens are considered.
min_p (`float` or `None`, *optional*, defaults to `None`):
min_p (`float`, *optional*):
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
repetition_penalty (`float`, *optional*, defaults to `1.0`):
@ -84,9 +84,9 @@ class OnlineDPOConfig(TrainingArguments):
Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers`
paged implementation will be used for generation instead of the default padded implementation. This
parameter is only effective when `use_vllm` is set to `False`.
cache_implementation (`str` or `None`, *optional*, defaults to `None`):
cache_implementation (`str`, *optional*):
Implementation of the cache method for faster generation when `use_vllm` is set to `False`.
generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
generation_kwargs (`dict[str, Any]`, *optional*):
Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if
using vLLM) when sampling completions. This can be used to further customize the generation behavior, such
as setting `supress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation
@ -109,12 +109,12 @@ class OnlineDPOConfig(TrainingArguments):
server is running (start with `trl vllm-serve`).
- `"colocate"`: vLLM will run in the same process and share the training GPUs. This avoids the need for a
separate server but may cause resource contention with training.
vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
vllm_guided_decoding_regex (`str`, *optional*):
Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
> Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`)
vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
vllm_server_base_url (`str`, *optional*):
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and
`vllm_server_port` are ignored.
vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
@ -143,7 +143,7 @@ class OnlineDPOConfig(TrainingArguments):
improving generation speed. However, disabling this option allows training models that exceed the VRAM
capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible
with vLLM generation.
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
string.
"""

View File

@ -125,7 +125,7 @@ class OnlineDPOTrainer(Trainer):
model.
judge (`BasePairwiseJudge`):
The judge to use for pairwise comparison of model completions.
reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*, defaults to `None`):
reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*):
Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward
functions with the prompts and completions and sum the rewards. Can be either:
@ -144,11 +144,11 @@ class OnlineDPOTrainer(Trainer):
The dataset to use for training.
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
- A single processing class: Used when `reward_funcs` contains only one reward function.
@ -156,7 +156,7 @@ class OnlineDPOTrainer(Trainer):
If set to `None`, the tokenizer for each model-based reward function is automatically loaded using
[`~transformers.AutoTokenizer.from_pretrained`].
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
@ -1519,11 +1519,11 @@ class OnlineDPOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -37,7 +37,7 @@ class ORPOConfig(TrainingArguments):
to use the default data collator.
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
Maximum length of the prompt. This argument is required if you want to use the default data collator.
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
max_completion_length (`int`, *optional*):
Maximum length of the completion. This argument is required if you want to use the default data collator
and your model is an encoder-decoder.
beta (`float`, *optional*, defaults to `0.1`):
@ -48,20 +48,20 @@ class ORPOConfig(TrainingArguments):
Whether to disable dropout in the model.
label_pad_token_id (`int`, *optional*, defaults to `-100`):
Label pad token id. This argument is required if you want to use the default data collator.
padding_value (`int` or `None`, *optional*, defaults to `None`):
padding_value (`int`, *optional*):
Padding value to use. If `None`, the padding value of the tokenizer is used.
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
This argument is required if you want to use the default data collator.
generate_during_eval (`bool`, *optional*, defaults to `False`):
If `True`, generates and logs completions from the model to W&B or Comet during evaluation.
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
is_encoder_decoder (`bool`, *optional*):
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
you need to specify if the model returned by the callable is an encoder-decoder model.
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
string.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
"""

View File

@ -94,7 +94,7 @@ class ORPOTrainer(Trainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
@ -989,7 +989,7 @@ class ORPOTrainer(Trainer):
Args:
logs (`dict[str, float]`):
The values to log.
start_time (`float` or `None`, *optional*, defaults to `None`):
start_time (`float`, *optional*):
Start time of the training.
"""
# logs either has 'loss' or 'eval_loss'
@ -1042,11 +1042,11 @@ class ORPOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -37,9 +37,9 @@ class PPOConfig(OnPolicyConfig):
Name of this experiment.
reward_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
Path to the reward model.
model_adapter_name (`str` or `None`, *optional*, defaults to `None`):
model_adapter_name (`str`, *optional*):
Name of the train target PEFT adapter, when using LoRA with multiple adapters.
ref_adapter_name (`str` or `None`, *optional*, defaults to `None`):
ref_adapter_name (`str`, *optional*):
Name of the reference PEFT adapter, when using LoRA with multiple adapters.
num_ppo_epochs (`int`, *optional*, defaults to `4`):
Number of epochs to train.

View File

@ -804,11 +804,11 @@ class PPOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -36,7 +36,7 @@ class PRMConfig(TrainingArguments):
Maximum length of the sequences (prompt + completion) used for truncation.
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
Maximum length of the prompt used for truncation.
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
max_completion_length (`int`, *optional*):
Maximum length of the completion used for truncation. The completion is the concatenation of the steps.
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model.
@ -44,7 +44,7 @@ class PRMConfig(TrainingArguments):
Separator used to separate each step of the reasoning process.
train_on_last_step_only (`bool`, *optional*, defaults to `False`):
Whether to train only on the last step.
dataset_num_proc (`int`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
"""

View File

@ -66,7 +66,7 @@ class PRMTrainer(Trainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
@ -299,11 +299,11 @@ class PRMTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -37,9 +37,9 @@ class RewardConfig(TrainingArguments):
limit. This argument is required if you want to use the default data collator.
disable_dropout (`bool`, *optional*, defaults to `True`):
Whether to disable dropout in the model.
dataset_num_proc (`int`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
center_rewards_coefficient (`float`, *optional*, defaults to `None`):
center_rewards_coefficient (`float`, *optional*):
Coefficient to incentivize the reward model to output mean-zero rewards (proposed by
https://huggingface.co/papers/2312.09244, Eq. 2). Recommended value: `0.01`.
remove_unused_columns (`bool`, *optional*, defaults to `False`):

View File

@ -368,11 +368,11 @@ class RewardTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -35,7 +35,7 @@ class RLOOConfig(TrainingArguments):
Parameters:
> Parameters that control the model and reference model
model_init_kwargs (`str`, `dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`str`, `dict[str, Any]`, *optional*):
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
argument of the [`GRPOTrainer`] is provided as a string.
disable_dropout (`bool`, *optional*, defaults to `False`):
@ -64,11 +64,11 @@ class RLOOConfig(TrainingArguments):
> Parameters that control generation
generation_batch_size: (`int` or `None`, *optional*, defaults to `None`):
generation_batch_size: (`int`, *optional*):
Batch size to use for generation. If `None`, it defaults to the effective training batch size:
`per_device_train_batch_size * num_processes * steps_per_generation`. In other words, there is one
generation batch processed per optimization step. Mutually exclusive with `steps_per_generation`.
steps_per_generation: (`int` or `None`, *optional*, defaults to `None`):
steps_per_generation: (`int`, *optional*):
Number of steps per generation. If `None`, it defaults to `gradient_accumulation_steps`. Mutually exclusive
with `generation_batch_size`.
temperature (`float`, defaults to `1.0`):
@ -76,10 +76,10 @@ class RLOOConfig(TrainingArguments):
top_p (`float`, *optional*, defaults to `1.0`):
Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
`1.0` to consider all tokens.
top_k (`int` or `None`, *optional*, defaults to `None`):
top_k (`int`, *optional*):
Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
disabled and all tokens are considered.
min_p (`float` or `None`, *optional*, defaults to `None`):
min_p (`float`, *optional*):
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
repetition_penalty (`float`, *optional*, defaults to `1.0`):
@ -90,9 +90,9 @@ class RLOOConfig(TrainingArguments):
Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers`
paged implementation will be used for generation instead of the default padded implementation. This
parameter is only effective when `use_vllm` is set to `False`.
cache_implementation (`str` or `None`, *optional*, defaults to `None`):
cache_implementation (`str`, *optional*):
Implementation of the cache method for faster generation when `use_vllm` is set to `False`.
generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
generation_kwargs (`dict[str, Any]`, *optional*):
Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if
using vLLM) when sampling completions. This can be used to further customize the generation behavior, such
as setting `suppress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation
@ -115,12 +115,12 @@ class RLOOConfig(TrainingArguments):
Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use
the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model
implementation.
vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
vllm_guided_decoding_regex (`str`, *optional*):
Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
> Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`)
vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
vllm_server_base_url (`str`, *optional*):
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and
`vllm_server_port` are ignored.
vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
@ -151,16 +151,16 @@ class RLOOConfig(TrainingArguments):
Number of iterations per batch (denoted as μ in the algorithm).
epsilon (`float`, *optional*, defaults to `0.2`):
Epsilon value for clipping.
epsilon_high (`float` or `None`, *optional*, defaults to `None`):
epsilon_high (`float`, *optional*):
Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
reward_weights (`list[float]` or `None`, *optional*, defaults to `None`):
reward_weights (`list[float]`, *optional*):
Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are
weighted equally with weight `1.0`.
normalize_advantages (`bool`, *optional*, defaults to `False`):
Whether to normalize advantages. Normalization is done per generation batch to have mean `0.0` and standard
deviation of `1.0`.
reward_clip_range (`tuple[float, float]` or `None`, *optional*, defaults to `None`):
reward_clip_range (`tuple[float, float]`, *optional*):
Clip range for rewards as (min, max). If `None`, no clipping is applied.
mask_truncated_completions (`bool`, *optional*, defaults to `False`):
When enabled, truncated completions are excluded from the loss calculation, preventing them from being
@ -185,7 +185,7 @@ class RLOOConfig(TrainingArguments):
log_completions (`bool`, *optional*, defaults to `False`):
Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`.
num_completions_to_print (`int` or `None`, *optional*, defaults to `None`):
num_completions_to_print (`int`, *optional*):
Number of completions to print with `rich`. If `None`, all completions are logged.
wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`):
Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts

View File

@ -160,7 +160,7 @@ class RLOOTrainer(Trainer):
reward function's signature.
- A list of reward functions, where each item can independently be any of the above types. Mixing different
types within the list (e.g., a string model ID and a custom reward function) is allowed.
args ([`RLOOConfig`], *optional*, defaults to `None`):
args ([`RLOOConfig`], *optional*):
Configuration for this trainer. If `None`, a default configuration is used.
train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is
@ -171,12 +171,12 @@ class RLOOTrainer(Trainer):
and content).
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. The padding side must be set to "left". If `None`, the
processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A
padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token,
`tokenizer.eos_token` will be used as the default.
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
- A single processing class: Used when `reward_funcs` contains only one reward function.
@ -186,7 +186,7 @@ class RLOOTrainer(Trainer):
[`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward
functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes`
are ignored.
callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
@ -195,7 +195,7 @@ class RLOOTrainer(Trainer):
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
"""
@ -1452,11 +1452,11 @@ class RLOOTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -34,12 +34,12 @@ class SFTConfig(TrainingArguments):
Parameters:
> Parameters that control the model
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
model_init_kwargs (`dict[str, Any]`, *optional*):
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
argument of the [`SFTTrainer`] is provided as a string. If you're training a MoE architecture and want to
include the load balancing/auxilliary loss as a part of the final loss, remember to set
`output_router_logits=True` in this dictionary.
chat_template_path (`str` or `None`, *optional*, defaults to `None`):
chat_template_path (`str`, *optional*):
If specified, sets the model's chat template. This can either be the path to a tokenizer (local directory
or Hugging Face Hub model) or a direct path to a Jinja template file. When using a Jinja file, you must
ensure that any special tokens referenced in the template are added to the tokenizer and that the model's
@ -49,16 +49,16 @@ class SFTConfig(TrainingArguments):
dataset_text_field (`str`, *optional*, defaults to `"text"`):
Name of the column that contains text data in the dataset.
dataset_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
dataset_kwargs (`dict[str, Any]`, *optional*):
Dictionary of optional keyword arguments for the dataset preparation. The only supported key is
`skip_prepare_dataset`. When the model is a VLM, `skip_prepare_dataset` is automatically treated as `True`
regardless of the provided value, since preprocessing is done on the fly.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
eos_token (`str` or `None`, *optional*, defaults to `None`):
eos_token (`str`, *optional*):
Token used to indicate the end of a turn or sequence. If `None`, it defaults to
`processing_class.eos_token`.
pad_token (`int` or `None`, *optional*, defaults to `None`):
pad_token (`int`, *optional*):
Token used for padding. If `None`, it defaults to `processing_class.pad_token`, or if that is also `None`,
it falls back to `processing_class.eos_token`.
max_length (`int` or `None`, *optional*, defaults to `1024`):
@ -75,14 +75,14 @@ class SFTConfig(TrainingArguments):
supported with the FlashAttention 2 or 3, which can efficiently handle the flattened batch structure. When
packing is enabled with strategy `"bfd"`, padding-free is enabled, regardless of the value of this
parameter.
pad_to_multiple_of (`int` or `None`, *optional*, defaults to `None`):
pad_to_multiple_of (`int`, *optional*):
If set, the sequences will be padded to a multiple of this value.
eval_packing (`bool` or `None`, *optional*, defaults to `None`):
eval_packing (`bool`, *optional*):
Whether to pack the eval dataset. If `None`, uses the same value as `packing`.
> Parameters that control the training
completion_only_loss (`bool` or `None`, *optional*, defaults to `None`):
completion_only_loss (`bool`, *optional*):
Whether to compute loss only on the completion part of the sequence. If set to `True`, loss is computed
only on the completion, which is supported only for [prompt-completion](#prompt-completion) datasets. If
`False`, loss is computed on the entire sequence. If `None` (default), the behavior depends on the dataset:

View File

@ -132,7 +132,7 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
padding_free (`bool`, *optional*, defaults to `False`):
If set to `True`, the sequences will be flattened into a single sequence, and the position IDs will be
generated accordingly.
pad_to_multiple_of (`int` or `None`, *optional*, defaults to `None`):
pad_to_multiple_of (`int`, *optional*):
If set, the sequences will be padded to a multiple of this value.
return_tensors (`str`, *optional*, defaults to `"pt"`):
Type of Tensor to return. Only `"pt"` is currently supported.
@ -524,9 +524,9 @@ class SFTTrainer(Trainer):
- A [`~transformers.PreTrainedModel`] object.
If you're training a model with an MoE architecture and want to include the load balancing/auxilliary loss
as a part of the final loss, remember to set the `output_router_logits` config of the model to `True`.
args ([`SFTConfig`], *optional*, defaults to `None`):
args ([`SFTConfig`], *optional*):
Configuration for this trainer. If `None`, a default configuration is used.
data_collator ([`~transformers.DataCollator`] or `None`, *optional*):
data_collator ([`~transformers.DataCollator`], *optional*):
Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
Will default to [`~trainer.sft_trainer.DataCollatorForLanguageModeling`] if the model is a language model
and [`~trainer.sft_trainer.DataCollatorForVisionLanguageModeling`] if the model is a vision-language model.
@ -541,23 +541,23 @@ class SFTTrainer(Trainer):
The trainer also supports processed datasets (tokenized) as long as they contain an `input_ids` field.
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If `None`, the processing class is loaded from the model's name
with [`~transformers.AutoProcessor.from_pretrained`]. A padding token, `tokenizer.pad_token`, must be set.
If the processing class has not set a padding token, `tokenizer.eos_token` will be used as the default.
compute_loss_func (`Callable` or `None`, *optional*, defaults to `None`):
compute_loss_func (`Callable`, *optional*):
A function that accepts the raw model outputs, labels, and the number of items in the entire accumulated
batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default [loss
function](https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618)
used by [`Trainer`].
compute_metrics (`Callable[[EvalPrediction], dict]` or `None`, *optional*, defaults to `None`):
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
The function that will be used to compute metrics at evaluation. Must take a
[`~transformers.EvalPrediction`] and return a dictionary string to metric values. When passing
[`SFTConfig`] with `batch_eval_metrics` set to `True`, your `compute_metrics` function must take a boolean
`compute_result` argument. This will be triggered after the last eval batch to signal that the function
needs to calculate and return the global summary statistics rather than accumulating the batch-level
statistics.
callbacks (list of [`~transformers.TrainerCallback`] or `None`, *optional*, defaults to `None`):
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
@ -566,21 +566,21 @@ class SFTTrainer(Trainer):
optimizers (`tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]]`, *optional*, defaults to `(None, None)`):
A tuple containing the optimizer and the scheduler to use. Will default to an instance of `AdamW` on your
model and a scheduler given by [`~transformers.get_linear_schedule_with_warmup`] controlled by `args`.
optimizer_cls_and_kwargs (`tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
optimizer_cls_and_kwargs (`tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*):
A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
`args`. Incompatible with the `optimizers` argument.
Unlike `optimizers`, this argument avoids the need to place model parameters on the correct devices before
initializing the Trainer.
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*):
A function that preprocess the logits right before caching them at each evaluation step. Must take two
tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
by this function will be reflected in the predictions received by `compute_metrics`.
Note that the labels (second parameter) will be `None` if the dataset does not have them.
peft_config ([`~peft.PeftConfig`] or `None`, *optional*, defaults to `None`):
peft_config ([`~peft.PeftConfig`], *optional*):
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
formatting_func (`Callable` or `None`, *optional*, defaults to `None`):
formatting_func (`Callable`, *optional*):
Formatting function applied to the dataset before tokenization. Applying the formatting function explicitly
converts the dataset into a [language modeling](#language-modeling) type.
"""
@ -1220,11 +1220,11 @@ class SFTTrainer(Trainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():

View File

@ -262,7 +262,7 @@ def pad(
Value to use for padding. Default is 0.
padding_side (`str`):
Side on which to add padding. Must be 'left' or 'right'. Default is 'right'.
pad_to_multiple_of (`int`, *optional*, defaults to `None`):
pad_to_multiple_of (`int`, *optional*):
If set will pad the sequence to a multiple of the provided value.
Returns:
@ -709,13 +709,13 @@ class OnPolicyConfig(TrainingArguments):
command line.
Parameters:
run_name (`str` or `None`, *optional*, defaults to `None`):
run_name (`str`, *optional*):
Name of the run.
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
dataset_num_proc (`int`, *optional*):
Number of processes to use for processing the dataset.
num_mini_batches (`int`, *optional*, defaults to `1`):
Number of minibatches to split a batch into.
total_episodes (`int` or `None`, *optional*, defaults to `None`):
total_episodes (`int`, *optional*):
Total number of episodes in the dataset.
local_rollout_forward_batch_size (`int`, *optional*, defaults to `64`):
Per rank no grad forward pass in the rollout phase.
@ -723,38 +723,38 @@ class OnPolicyConfig(TrainingArguments):
Number of debugging samples generations (i.e., `generate_completions` calls) throughout training.
response_length (`int`, *optional*, defaults to `53`):
Length of the response.
stop_token (`str` or `None`, *optional*, defaults to `None`):
stop_token (`str`, *optional*):
Specifies the stop token to use for text generation. This parameter is mutually exclusive with
`stop_token_id`.
- `None`: No stop token is applied, unless `stop_token_id` is specified.
- `'eos'`: Uses the tokenizer's `eos_token`.
stop_token_id (`int` or `None`, *optional*, defaults to `None`):
stop_token_id (`int`, *optional*):
Specifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied,
unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.
temperature (`float`, *optional*, defaults to `0.7`):
Sampling temperature.
missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
missing_eos_penalty (`float`, *optional*):
Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
value.
sft_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
Path to the SFT model.
world_size (`int` or `None`, *optional*, defaults to `None`):
world_size (`int`, *optional*):
Number of processes (GPUs) to use for the training.
num_total_batches (`int` or `None`, *optional*, defaults to `None`):
num_total_batches (`int`, *optional*):
Number of total batches to train.
micro_batch_size (`int` or `None`, *optional*, defaults to `None`):
micro_batch_size (`int`, *optional*):
Micro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).
local_batch_size (`int` or `None`, *optional*, defaults to `None`):
local_batch_size (`int`, *optional*):
Batch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).
batch_size (`int` or `None`, *optional*, defaults to `None`):
batch_size (`int`, *optional*):
Batch size across devices (HF's `per_device_train_batch_size` * `world_size` *
`gradient_accumulation_steps`).
local_mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
local_mini_batch_size (`int`, *optional*):
Mini batch size per GPU.
mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
mini_batch_size (`int`, *optional*):
Mini batch size across GPUs.
push_to_hub (`bool`, *optional*, defaults to `False`):
Whether to push the model to the Hub after training.
@ -1539,7 +1539,7 @@ def print_prompt_completions_sample(
List of advantages corresponding to the prompts and completions.
step (`int`):
Current training step number, used in the output title.
num_samples (`int` or `None`, *optional*, defaults to `None`):
num_samples (`int`, *optional*):
Number of random samples to display. If `None` (default), all items will be displayed.
Example:
@ -1616,7 +1616,7 @@ class RepeatSampler(Sampler):
Number of times to repeat the full sampling process.
shuffle (`bool`, *optional*, defaults to `True`):
Whether to shuffle the dataset.
seed (`int` or `None`, *optional*, defaults to `None`):
seed (`int`, *optional*):
Random seed for reproducibility (only affects this sampler).
Example:

View File

@ -88,7 +88,7 @@ class XPOTrainer(OnlineDPOTrainer):
The dataset to use for training.
eval_dataset (`datasets.Dataset`):
The dataset to use for evaluation.
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
Processing class used to process the data. If provided, will be used to automatically process the inputs
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
reuse the fine-tuned model.
@ -555,11 +555,11 @@ class XPOTrainer(OnlineDPOTrainer):
Creates a draft of a model card using the information available to the `Trainer`.
Args:
model_name (`str` or `None`, *optional*, defaults to `None`):
model_name (`str`, *optional*):
Name of the model.
dataset_name (`str` or `None`, *optional*, defaults to `None`):
dataset_name (`str`, *optional*):
Name of the dataset used for training.
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
tags (`str`, `list[str]`, *optional*):
Tags to be associated with the model card.
"""
if not self.is_world_process_zero():