mirror of
https://github.com/huggingface/trl.git
synced 2025-10-20 18:43:52 +08:00
Remove redundant 'None' from docstrings (#4058)
This commit is contained in:
committed by
GitHub
parent
7eb7f42372
commit
e8b8499f1f
@ -331,7 +331,7 @@ def replicate_str(string: str, n: int, sep: str = " ") -> str:
|
||||
for arguments that can be `None` and aren't required:
|
||||
|
||||
```python
|
||||
foo (`Optional[int]`, *optional*, defaults to `None`):
|
||||
foo (`Optional[int]`, *optional*):
|
||||
```
|
||||
|
||||
* **String Defaults:**
|
||||
|
@ -31,7 +31,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/hh-rlhf-helpful-base"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -31,7 +31,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/llava-instruct-mix"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/lm-human-preferences-descriptiveness"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/lm-human-preferences-sentiment"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -32,7 +32,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/math_shepherd"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/prm800k"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/rlaif-v"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/tldr"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/tldr-preference"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -30,7 +30,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/ultrafeedback-prompt"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -34,7 +34,7 @@ class ScriptArguments:
|
||||
Whether to push the dataset to the Hugging Face Hub.
|
||||
repo_id (`str`, *optional*, defaults to `"trl-lib/ultrafeedback-gpt-3.5-turbo-helpfulness"`):
|
||||
Hugging Face repository ID to push the dataset to.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of workers to use for dataset processing.
|
||||
"""
|
||||
|
||||
|
@ -63,7 +63,7 @@ class ScriptArguments:
|
||||
judge_model (`str`, *optional*, defaults to `"meta-llama/Meta-Llama-3-70B-Instruct"`):
|
||||
Model name or path to the model to use as a judge. E.g., 'gpt-3.5-turbo-0125' or
|
||||
'meta-llama/Meta-Llama-3-70B-Instruct'.
|
||||
num_examples (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_examples (`int`, *optional*):
|
||||
Number of examples to evaluate.
|
||||
"""
|
||||
|
||||
|
@ -149,7 +149,7 @@ def ignore_warnings(message: str = None, category: type[Warning] = Warning) -> c
|
||||
Decorator to ignore warnings with a specific message and/or category.
|
||||
|
||||
Args:
|
||||
message (`str`, *optional*, defaults to `None`):
|
||||
message (`str`, *optional*):
|
||||
Regex pattern for the warning message to ignore. If `None`, all messages are ignored.
|
||||
category (`type[Warning]`, *optional*, defaults to `Warning`):
|
||||
Warning class to ignore. Defaults to `Warning`, which ignores all warnings.
|
||||
|
@ -242,7 +242,7 @@ def maybe_apply_chat_template(
|
||||
messages, where each message is a dictionary with keys `"role"` and `"content"`.
|
||||
tokenizer (`PreTrainedTokenizerBase`):
|
||||
Tokenizer to apply the chat template with.
|
||||
tools (`list[Union[dict, Callable]]` or `None`, *optional*, defaults to `None`):
|
||||
tools (`list[Union[dict, Callable]]`, *optional*):
|
||||
A list of tools (callable functions) that will be accessible to the model. If the template does not support
|
||||
function calling, this argument will have no effect.
|
||||
**template_kwargs (`Any`, *optional*):
|
||||
@ -300,9 +300,9 @@ def unpair_preference_dataset(
|
||||
dataset (`Dataset` or `DatasetDict`):
|
||||
Preference dataset to unpair. The dataset must have columns `"chosen"`, `"rejected"` and optionally
|
||||
`"prompt"`.
|
||||
num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
desc (`str` or `None`, *optional*, defaults to `None`):
|
||||
desc (`str`, *optional*):
|
||||
Meaningful description to be displayed alongside with the progress bar while mapping examples.
|
||||
|
||||
Returns:
|
||||
@ -343,9 +343,9 @@ def maybe_unpair_preference_dataset(
|
||||
dataset (`Dataset` or `DatasetDict`):
|
||||
Preference dataset to unpair. The dataset must have columns `"chosen"`, `"rejected"` and optionally
|
||||
`"prompt"`.
|
||||
num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
desc (`str` or `None`, *optional*, defaults to `None`):
|
||||
desc (`str`, *optional*):
|
||||
Meaningful description to be displayed alongside with the progress bar while mapping examples.
|
||||
|
||||
Returns:
|
||||
@ -644,7 +644,7 @@ def pack_dataset(
|
||||
middle.
|
||||
- `"wrapped"`: Faster but more aggressive. Ignores sequence boundaries and will cut sequences in the middle
|
||||
to completely fill each packed sequence with data.
|
||||
map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
|
||||
map_kwargs (`dict`, *optional*):
|
||||
Additional keyword arguments to pass to the dataset's map method when packing examples.
|
||||
|
||||
Returns:
|
||||
@ -693,7 +693,7 @@ def truncate_dataset(
|
||||
Dataset to truncate.
|
||||
max_length (`int`):
|
||||
Maximum sequence length to truncate to.
|
||||
map_kwargs (`dict` or `None`, *optional*, defaults to `None`):
|
||||
map_kwargs (`dict`, *optional*):
|
||||
Additional keyword arguments to pass to the dataset's map method when truncating examples.
|
||||
|
||||
Returns:
|
||||
|
@ -51,7 +51,7 @@ class VLLMClient:
|
||||
weights in a distributed setting. Before using it, start the vLLM server with `trl vllm-serve`.
|
||||
|
||||
Args:
|
||||
base_url (`str` or `None`, *optional*, defaults to `None`):
|
||||
base_url (`str`, *optional*):
|
||||
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `host` and `server_port` are
|
||||
ignored.
|
||||
host (`str`, *optional*, defaults to `"0.0.0.0"`):
|
||||
@ -185,7 +185,7 @@ class VLLMClient:
|
||||
Args:
|
||||
prompts (`list[str]`):
|
||||
List of text prompts for which the model will generate completions.
|
||||
images (`list[PIL.Image]` or `None`, *optional*, defaults to `None`):
|
||||
images (`list[PIL.Image]`, *optional*):
|
||||
List of PIL Images to send along with the prompts.
|
||||
n (`int`, *optional*, defaults to `1`):
|
||||
Number of completions to generate for each prompt.
|
||||
@ -201,9 +201,9 @@ class VLLMClient:
|
||||
Minimum probability for sampling.
|
||||
max_tokens (`int`, *optional*, defaults to `16`):
|
||||
Maximum number of tokens to generate for each prompt.
|
||||
guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
|
||||
guided_decoding_regex (`str`, *optional*):
|
||||
Regular expression to guide the decoding process.
|
||||
generation_kwargs (`dict` or `None`, *optional*, defaults to `None`):
|
||||
generation_kwargs (`dict`, *optional*):
|
||||
Additional generation parameters to pass to the vLLM `SamplingParams`. This can include parameters like
|
||||
`seed`, `frequency_penalty`, etc. If it contains keys that conflict with the other parameters, they
|
||||
will override them.
|
||||
|
@ -564,7 +564,7 @@ def pipeline_step_with_grad(
|
||||
|
||||
Args:
|
||||
pipeline (`StableDiffusionPipeline`): Pipeline to be used for image generation.
|
||||
prompt (`str` or `list[str]`, *optional*, defaults to `None`):
|
||||
prompt (`str` or `list[str]`, *optional*):
|
||||
The prompt or prompts to guide the image generation. If not defined, one has to pass `prompt_embeds`
|
||||
instead.
|
||||
height (`int`, *optional*, defaults to `pipeline.unet.config.sample_size * pipeline.vae_scale_factor`):
|
||||
|
@ -62,10 +62,10 @@ class GRPOScriptArguments(ScriptArguments):
|
||||
Script arguments for the GRPO training script.
|
||||
|
||||
Args:
|
||||
reward_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
|
||||
reward_model_name_or_path (`str`, *optional*):
|
||||
Reward model id of a pretrained model hosted inside a model repo on huggingface.co or local path to a
|
||||
directory containing model weights saved using [`~transformers.PreTrainedModel.save_pretrained`].
|
||||
reward_funcs (`list[str]` or `None`, *optional*, defaults to `None`):
|
||||
reward_funcs (`list[str]`, *optional*):
|
||||
Reward functions to use. Supported values are:
|
||||
|
||||
- `"think_format_reward"`
|
||||
|
@ -56,10 +56,10 @@ class RLOOScriptArguments(ScriptArguments):
|
||||
Script arguments for the RLOO training script.
|
||||
|
||||
Args:
|
||||
reward_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
|
||||
reward_model_name_or_path (`str`, *optional*):
|
||||
Reward model id of a pretrained model hosted inside a model repo on huggingface.co or local path to a
|
||||
directory containing model weights saved using [`~transformers.PreTrainedModel.save_pretrained`].
|
||||
reward_funcs (`list[str]` or `None`, *optional*, defaults to `None`):
|
||||
reward_funcs (`list[str]`, *optional*):
|
||||
Reward functions to use. It can be either one of `"think_format_reward"`; or a dotted import path " (e.g.,
|
||||
`'my_lib.rewards.custom_reward'`).
|
||||
"""
|
||||
|
@ -45,17 +45,17 @@ class DatasetConfig:
|
||||
Parameters:
|
||||
path (`str`):
|
||||
Path or name of the dataset.
|
||||
name (`str`, *optional*, defaults to `None`):
|
||||
name (`str`, *optional*):
|
||||
Defining the name of the dataset configuration.
|
||||
data_dir (`str`, *optional*, defaults to `None`):
|
||||
data_dir (`str`, *optional*):
|
||||
Defining the `data_dir` of the dataset configuration. If specified for the generic builders(csv, text etc.)
|
||||
or the Hub datasets and `data_files` is `None`, the behavior is equal to passing `os.path.join(data_dir,
|
||||
**)` as `data_files` to reference all the files in a directory.
|
||||
data_files (`str` or `Sequence` or `Mapping`, *optional*, defaults to `None`):
|
||||
data_files (`str` or `Sequence` or `Mapping`, *optional*):
|
||||
Path(s) to source data file(s).
|
||||
split (`str`, *optional*, defaults to `"train"`):
|
||||
Which split of the data to load.
|
||||
columns (`list[str]`, *optional*, defaults to `None`):
|
||||
columns (`list[str]`, *optional*):
|
||||
List of column names to select from the dataset. If `None`, all columns are selected.
|
||||
"""
|
||||
|
||||
@ -81,7 +81,7 @@ class DatasetMixtureConfig:
|
||||
List of dataset configurations to include in the mixture.
|
||||
streaming (`bool`, *optional*, defaults to `False`):
|
||||
Whether to stream the datasets. If `True`, the datasets will be loaded in streaming mode.
|
||||
test_split_size (`float` or `None`, *optional*, defaults to `None`):
|
||||
test_split_size (`float`, *optional*):
|
||||
Size of the test split. Refer to the `test_size` parameter in the [`~datasets.train_test_split`] function
|
||||
for more details. If `None`, the dataset will not be split into train and test sets.
|
||||
|
||||
@ -137,9 +137,9 @@ class ScriptArguments:
|
||||
Arguments common to all scripts.
|
||||
|
||||
Args:
|
||||
dataset_name (`str`, or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`,, *optional*):
|
||||
Path or name of the dataset to load. If `datasets` is provided, this will be ignored.
|
||||
dataset_config (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_config (`str`, *optional*):
|
||||
Dataset configuration name. Corresponds to the `name` argument of the [`~datasets.load_dataset`] function.
|
||||
If `datasets` is provided, this will be ignored.
|
||||
dataset_train_split (`str`, *optional*, defaults to `"train"`):
|
||||
@ -230,7 +230,7 @@ class TrlParser(HfArgumentParser):
|
||||
configurations, while also supporting configuration file loading and environment variable management.
|
||||
|
||||
Args:
|
||||
dataclass_types (`Union[DataClassType, Iterable[DataClassType]]` or `None`, *optional*, defaults to `None`):
|
||||
dataclass_types (`Union[DataClassType, Iterable[DataClassType]]`, *optional*):
|
||||
Dataclass types to use for argument parsing.
|
||||
**kwargs:
|
||||
Additional keyword arguments passed to the [`transformers.HfArgumentParser`] constructor.
|
||||
|
@ -173,7 +173,7 @@ class ScriptArguments:
|
||||
Args:
|
||||
model (`str`):
|
||||
Model name or path to load the model from.
|
||||
revision (`str` or `None`, *optional*, defaults to `None`):
|
||||
revision (`str`, *optional*):
|
||||
Revision to use for the model. If not specified, the default branch will be used.
|
||||
tensor_parallel_size (`int`, *optional*, defaults to `1`):
|
||||
Number of tensor parallel workers to use.
|
||||
@ -191,11 +191,11 @@ class ScriptArguments:
|
||||
dtype (`str`, *optional*, defaults to `"auto"`):
|
||||
Data type to use for vLLM generation. If set to `"auto"`, the data type will be automatically determined
|
||||
based on the model configuration. Find the supported values in the vLLM documentation.
|
||||
max_model_len (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_model_len (`int`, *optional*):
|
||||
If set, the `max_model_len` to use for vLLM. This can be useful when running with reduced
|
||||
`vllm_gpu_memory_utilization`, leading to a reduced KV cache size. If not set, vLLM will use the model
|
||||
context size, which might be much larger than the KV cache, leading to inefficiencies.
|
||||
enable_prefix_caching (`bool` or `None`, *optional*, defaults to `None`):
|
||||
enable_prefix_caching (`bool`, *optional*):
|
||||
Whether to enable prefix caching in vLLM. If set to `True`, ensure that the model and the hardware support
|
||||
this feature.
|
||||
enforce_eager (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -38,7 +38,7 @@ class AlignPropConfig:
|
||||
Name of this run.
|
||||
seed (`int`, *optional*, defaults to `0`):
|
||||
Random seed for reproducibility.
|
||||
log_with (`str` or `None`, *optional*, defaults to `None`):
|
||||
log_with (`str`, *optional*):
|
||||
Log with either `"wandb"` or `"tensorboard"`. Check
|
||||
[tracking](https://huggingface.co/docs/accelerate/usage_guides/tracking) for more details.
|
||||
log_image_freq (`int`, *optional*, defaults to `1`):
|
||||
@ -89,7 +89,7 @@ class AlignPropConfig:
|
||||
Number of gradient accumulation steps.
|
||||
train_max_grad_norm (`float`, *optional*, defaults to `1.0`):
|
||||
Maximum gradient norm for gradient clipping.
|
||||
negative_prompts (`str` or `None`, *optional*, defaults to `None`):
|
||||
negative_prompts (`str`, *optional*):
|
||||
Comma-separated list of prompts to use as negative examples.
|
||||
truncated_backprop_rand (`bool`, *optional*, defaults to `True`):
|
||||
If `True`, randomized truncation to different diffusion timesteps is used.
|
||||
|
@ -416,11 +416,11 @@ class AlignPropTrainer(PyTorchModelHubMixin):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -37,7 +37,7 @@ class BCOConfig(TrainingArguments):
|
||||
to use the default data collator.
|
||||
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
|
||||
Maximum length of the prompt. This argument is required if you want to use the default data collator.
|
||||
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_completion_length (`int`, *optional*):
|
||||
Maximum length of the completion. This argument is required if you want to use the default data collator
|
||||
and your model is an encoder-decoder.
|
||||
beta (`float`, *optional*, defaults to `0.1`):
|
||||
@ -45,7 +45,7 @@ class BCOConfig(TrainingArguments):
|
||||
reference model.
|
||||
label_pad_token_id (`int`, *optional*, defaults to `-100`):
|
||||
Label pad token id. This argument is required if you want to use the default data collator.
|
||||
padding_value (`int` or `None`, *optional*, defaults to `None`):
|
||||
padding_value (`int`, *optional*):
|
||||
Padding value to use. If `None`, the padding value of the tokenizer is used.
|
||||
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
|
||||
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
|
||||
@ -55,19 +55,19 @@ class BCOConfig(TrainingArguments):
|
||||
generate_during_eval (`bool`, *optional*, defaults to `False`):
|
||||
If `True`, generates and logs completions from both the model and the reference model to W&B or Comet
|
||||
during evaluation.
|
||||
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
|
||||
is_encoder_decoder (`bool`, *optional*):
|
||||
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
|
||||
you need to specify if the model returned by the callable is an encoder-decoder model.
|
||||
precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
|
||||
Whether to precompute reference model log probabilities for training and evaluation datasets. This is
|
||||
useful when training without the reference model to reduce the total GPU memory needed.
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
|
||||
string.
|
||||
ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
ref_model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
|
||||
from a string.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
prompt_sample_size (`int`, *optional*, defaults to `1024`):
|
||||
Number of prompts that are fed to density ratio classifier.
|
||||
|
@ -296,11 +296,11 @@ class BCOTrainer(Trainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
|
||||
data_collator (`transformers.DataCollator`, *optional*):
|
||||
The data collator to use for training. If None is specified, the default data collator
|
||||
(`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
|
||||
sequences in the batch, given a dataset of paired sequences.
|
||||
@ -1461,7 +1461,7 @@ class BCOTrainer(Trainer):
|
||||
Args:
|
||||
logs (`dict[str, float]`):
|
||||
The values to log.
|
||||
start_time (`float` or `None`, *optional*, defaults to `None`):
|
||||
start_time (`float`, *optional*):
|
||||
Start time of the training.
|
||||
"""
|
||||
# logs either has 'loss' or 'eval_loss'
|
||||
@ -1508,11 +1508,11 @@ class BCOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -254,7 +254,7 @@ class WinRateCallback(TrainerCallback):
|
||||
otherwise, it defaults to using the initial model.
|
||||
generation_config (`GenerationConfig`, *optional*):
|
||||
The generation config to use for generating completions.
|
||||
num_prompts (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_prompts (`int`, *optional*):
|
||||
The number of prompts to generate completions for. If not provided, defaults to the number of examples in
|
||||
the evaluation dataset.
|
||||
shuffle_order (`bool`, *optional*, defaults to `True`):
|
||||
@ -439,10 +439,10 @@ class LogCompletionsCallback(TrainerCallback):
|
||||
column containing the prompts for generating completions.
|
||||
generation_config (`GenerationConfig`, *optional*):
|
||||
The generation config to use for generating completions.
|
||||
num_prompts (`int` or `None`, *optional*):
|
||||
num_prompts (`int`, *optional*):
|
||||
The number of prompts to generate completions for. If not provided, defaults to the number of examples in
|
||||
the evaluation dataset.
|
||||
freq (`int` or `None`, *optional*):
|
||||
freq (`int`, *optional*):
|
||||
The frequency at which to log completions. If not provided, defaults to the trainer's `eval_steps`.
|
||||
"""
|
||||
|
||||
@ -520,7 +520,7 @@ class MergeModelCallback(TrainerCallback):
|
||||
on a merge configuration.
|
||||
|
||||
Args:
|
||||
merge_config ([`MergeConfig`], *optional*, defaults to `None`):
|
||||
merge_config ([`MergeConfig`], *optional*):
|
||||
Configuration used for the merging process. If not provided, the default [`MergeConfig`] is used.
|
||||
merge_at_every_checkpoint (`bool`, *optional*, defaults to `False`):
|
||||
Whether to merge the model at every checkpoint.
|
||||
|
@ -37,7 +37,7 @@ class CPOConfig(TrainingArguments):
|
||||
to use the default data collator.
|
||||
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
|
||||
Maximum length of the prompt. This argument is required if you want to use the default data collator.
|
||||
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_completion_length (`int`, *optional*):
|
||||
Maximum length of the completion. This argument is required if you want to use the default data collator
|
||||
and your model is an encoder-decoder.
|
||||
beta (`float`, *optional*, defaults to `0.1`):
|
||||
@ -70,20 +70,20 @@ class CPOConfig(TrainingArguments):
|
||||
loss types.
|
||||
label_pad_token_id (`int`, *optional*, defaults to `-100`):
|
||||
Label pad token id. This argument is required if you want to use the default data collator.
|
||||
padding_value (`int` or `None`, *optional*, defaults to `None`):
|
||||
padding_value (`int`, *optional*):
|
||||
Padding value to use. If `None`, the padding value of the tokenizer is used.
|
||||
truncation_mode (`str`,*optional*, defaults to `"keep_end"`):
|
||||
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
|
||||
This argument is required if you want to use the default data collator.
|
||||
generate_during_eval (`bool`, *optional*, defaults to `False`):
|
||||
If `True`, generates and logs completions from the model to W&B or Comet during evaluation.
|
||||
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
|
||||
is_encoder_decoder (`bool`, *optional*):
|
||||
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
|
||||
you need to specify if the model returned by the callable is an encoder-decoder model.
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
|
||||
string.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
"""
|
||||
|
||||
|
@ -90,7 +90,7 @@ class CPOTrainer(Trainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
@ -1027,7 +1027,7 @@ class CPOTrainer(Trainer):
|
||||
Args:
|
||||
logs (`dict[str, float]`):
|
||||
The values to log.
|
||||
start_time (`float` or `None`, *optional*, defaults to `None`):
|
||||
start_time (`float`, *optional*):
|
||||
Start time of the training.
|
||||
"""
|
||||
# logs either has 'loss' or 'eval_loss'
|
||||
@ -1080,11 +1080,11 @@ class CPOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -38,7 +38,7 @@ class DDPOConfig:
|
||||
Name of this run.
|
||||
seed (`int`, *optional*, defaults to `0`):
|
||||
Random seed.
|
||||
log_with (`Literal["wandb", "tensorboard"]]` or `None`, *optional*, defaults to `None`):
|
||||
log_with (`Literal["wandb", "tensorboard"]]`, *optional*):
|
||||
Log with either 'wandb' or 'tensorboard', check
|
||||
https://huggingface.co/docs/accelerate/usage_guides/tracking for more details.
|
||||
tracker_kwargs (`Dict`, *optional*, defaults to `{}`):
|
||||
|
@ -618,11 +618,11 @@ class DDPOTrainer(PyTorchModelHubMixin):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -46,15 +46,15 @@ class DPOConfig(TrainingArguments):
|
||||
Parameters:
|
||||
> Parameters that control the model and reference model
|
||||
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `model` argument of the
|
||||
[`DPOTrainer`] is provided as a string.
|
||||
ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
ref_model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments for `AutoModelForCausalLM.from_pretrained`, used when the `ref_model` argument of the
|
||||
[`DPOTrainer`] is provided as a string.
|
||||
model_adapter_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_adapter_name (`str`, *optional*):
|
||||
Name of the train target PEFT adapter, when using LoRA with multiple adapters.
|
||||
ref_adapter_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
ref_adapter_name (`str`, *optional*):
|
||||
Name of the reference PEFT adapter, when using LoRA with multiple adapters.
|
||||
force_use_ref_model (`bool`, *optional*, defaults to `False`):
|
||||
If you provide a PEFT model as the active model and wish to use a different model for the `ref_model`, set
|
||||
@ -68,15 +68,15 @@ class DPOConfig(TrainingArguments):
|
||||
|
||||
> Parameters that control the data preprocessing
|
||||
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
padding_value (`int` or `None`, *optional*, defaults to `None`):
|
||||
padding_value (`int`, *optional*):
|
||||
Padding value to use. If `None`, the padding value of the tokenizer is used.
|
||||
label_pad_token_id (`int`, *optional*, defaults to `-100`):
|
||||
Padding value to use for labels.
|
||||
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
|
||||
Maximum length of the prompt.
|
||||
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_completion_length (`int`, *optional*):
|
||||
Maximum length of the completion.
|
||||
max_length (`int` or `None`, *optional*, defaults to `1024`):
|
||||
Maximum length of the full sequence (prompt + completion).
|
||||
@ -93,11 +93,11 @@ class DPOConfig(TrainingArguments):
|
||||
training without needing the reference model during training, which can help reduce GPU memory usage. If
|
||||
set to `False` (default), the reference model will be used during training to compute log probabilities
|
||||
on-the-fly.
|
||||
precompute_ref_batch_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
precompute_ref_batch_size (`int`, *optional*):
|
||||
Batch size to use when precomputing reference model log probabilities. This can be set higher than the
|
||||
training batch size to speed up preprocessing. If `None`, defaults to `per_device_train_batch_size` for
|
||||
training and `per_device_eval_batch_size` for evaluation.
|
||||
tools (`Optional[list[Union[dict, Callable]]]`, *optional*, defaults to `None`):
|
||||
tools (`Optional[list[Union[dict, Callable]]]`, *optional*):
|
||||
List of tools (callable functions) that will be accessible to the model. If the template does not support
|
||||
function calling, this argument will have no effect.
|
||||
|
||||
@ -151,11 +151,11 @@ class DPOConfig(TrainingArguments):
|
||||
DPO](https://huggingface.co/papers/2403.00409) paper that should be between `0.0` and `0.5`.
|
||||
use_weighting (`bool`, *optional*, defaults to `False`):
|
||||
Whether to weight the loss as done in the [WPO paper](https://huggingface.co/papers/2406.11827).
|
||||
rpo_alpha (`float`, *optional*, defaults to `None`):
|
||||
rpo_alpha (`float`, *optional*):
|
||||
α parameter from the [RPO paper](https://huggingface.co/papers/2404.19733) (v3), which controls the
|
||||
weighting of the NLL term in the loss. If `None`, no weighting is applied and the loss is the same as the
|
||||
DPO loss. The paper recommends `rpo_alpha=1.0`.
|
||||
ld_alpha (`float` or `None`, *optional*, defaults to `None`):
|
||||
ld_alpha (`float`, *optional*):
|
||||
α parameter from the [LD-DPO paper](https://huggingface.co/papers/2409.06411), which controls the weighting
|
||||
of the verbose token log-probabilities in responses. If `None`, no weighting is applied to the verbose
|
||||
part, and the loss is equivalent to the standard DPO loss. The paper recommends setting `ld_alpha` between
|
||||
@ -163,7 +163,7 @@ class DPOConfig(TrainingArguments):
|
||||
discopop_tau (`float`, *optional*, defaults to `0.05`):
|
||||
τ/temperature parameter from the [DiscoPOP](https://huggingface.co/papers/2406.08414) paper, which controls
|
||||
the shape of log ratio modulated loss. The paper recommends the default value `discopop_tau=0.05`.
|
||||
loss_weights (`list[float]` or `None`, *optional*, defaults to `None`):
|
||||
loss_weights (`list[float]`, *optional*):
|
||||
List of loss weights for multi-loss combinations. Used when combining multiple loss types. Example: `[0.8,
|
||||
0.2, 1.0]` for [MPO](https://huggingface.co/papers/2411.10442). If not provided, defaults to equal weights
|
||||
(`1.0`) for all loss types.
|
||||
|
@ -203,7 +203,7 @@ class DPOTrainer(Trainer):
|
||||
Hugging Face transformer model with a casual language modelling head. Used for implicit reward computation
|
||||
and loss. If no reference model is provided, the trainer will create a reference model with the same
|
||||
architecture as the model to be optimized.
|
||||
args ([`DPOConfig`], *optional*, defaults to `None`):
|
||||
args ([`DPOConfig`], *optional*):
|
||||
Configuration for this trainer. If `None`, a default configuration is used.
|
||||
data_collator (`DataCollator`, *optional*):
|
||||
Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
|
||||
@ -217,7 +217,7 @@ class DPOTrainer(Trainer):
|
||||
and content).
|
||||
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
|
||||
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If `None`, the processing class is loaded from the model's name
|
||||
with [`~transformers.AutoTokenizer.from_pretrained`].
|
||||
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
|
||||
@ -226,7 +226,7 @@ class DPOTrainer(Trainer):
|
||||
`True`, your compute_metrics function must take a boolean `compute_result` argument. This will be triggered
|
||||
after the last eval batch to signal that the function needs to calculate and return the global summary
|
||||
statistics rather than accumulating the batch-level statistics.
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
|
||||
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
|
||||
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
|
||||
|
||||
@ -235,16 +235,16 @@ class DPOTrainer(Trainer):
|
||||
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
|
||||
A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
|
||||
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
|
||||
optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
|
||||
optimizer_cls_and_kwargs (`Tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*):
|
||||
A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
|
||||
`args`. Incompatible with the `optimizers` argument.
|
||||
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
|
||||
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*):
|
||||
A function that preprocess the logits right before caching them at each evaluation step. Must take two
|
||||
tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
|
||||
by this function will be reflected in the predictions received by `compute_metrics`.
|
||||
|
||||
Note that the labels (second parameter) will be `None` if the dataset does not have them.
|
||||
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
|
||||
peft_config ([`~peft.PeftConfig`], *optional*):
|
||||
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
|
||||
"""
|
||||
|
||||
@ -1929,7 +1929,7 @@ class DPOTrainer(Trainer):
|
||||
Args:
|
||||
logs (`dict[str, float]`):
|
||||
The values to log.
|
||||
start_time (`float` or `None`, *optional*, defaults to `None`):
|
||||
start_time (`float`, *optional*):
|
||||
Start time of the training.
|
||||
"""
|
||||
# logs either has 'loss' or 'eval_loss'
|
||||
@ -1959,11 +1959,11 @@ class DPOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -39,10 +39,10 @@ class GKDConfig(SFTConfig):
|
||||
beta is `0.0`, the loss is the KL divergence. When beta is `1.0`, the loss is the Inverse KL Divergence.
|
||||
max_new_tokens (`int`, *optional*, defaults to `128`):
|
||||
Maximum number of tokens to generate per completion.
|
||||
teacher_model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
|
||||
teacher_model_name_or_path (`str`, *optional*):
|
||||
Model name or path of the teacher model. If `None`, the teacher model will be the same as the model being
|
||||
trained.
|
||||
teacher_model_init_kwargs (`dict[str, Any]]` or `None`, *optional*, defaults to `None`):
|
||||
teacher_model_init_kwargs (`dict[str, Any]]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the teacher model
|
||||
from a string.
|
||||
disable_dropout (`bool`, *optional*, defaults to `True`):
|
||||
|
@ -435,11 +435,11 @@ class GKDTrainer(SFTTrainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -34,7 +34,7 @@ class GRPOConfig(TrainingArguments):
|
||||
Parameters:
|
||||
> Parameters that control the model and reference model
|
||||
|
||||
model_init_kwargs (`str`, `dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`str`, `dict[str, Any]`, *optional*):
|
||||
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
|
||||
argument of the [`GRPOTrainer`] is provided as a string.
|
||||
disable_dropout (`bool`, *optional*, defaults to `False`):
|
||||
@ -63,11 +63,11 @@ class GRPOConfig(TrainingArguments):
|
||||
|
||||
> Parameters that control generation
|
||||
|
||||
generation_batch_size: (`int` or `None`, *optional*, defaults to `None`):
|
||||
generation_batch_size: (`int`, *optional*):
|
||||
Batch size to use for generation. If `None`, it defaults to the effective training batch size:
|
||||
`per_device_train_batch_size * num_processes * steps_per_generation`. In other words, there is one
|
||||
generation batch processed per optimization step. Mutually exclusive with `steps_per_generation`.
|
||||
steps_per_generation: (`int` or `None`, *optional*, defaults to `None`):
|
||||
steps_per_generation: (`int`, *optional*):
|
||||
Number of steps per generation. If `None`, it defaults to `gradient_accumulation_steps`. Mutually exclusive
|
||||
with `generation_batch_size`.
|
||||
temperature (`float`, defaults to `1.0`):
|
||||
@ -75,10 +75,10 @@ class GRPOConfig(TrainingArguments):
|
||||
top_p (`float`, *optional*, defaults to `1.0`):
|
||||
Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
|
||||
`1.0` to consider all tokens.
|
||||
top_k (`int` or `None`, *optional*, defaults to `None`):
|
||||
top_k (`int`, *optional*):
|
||||
Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
|
||||
disabled and all tokens are considered.
|
||||
min_p (`float` or `None`, *optional*, defaults to `None`):
|
||||
min_p (`float`, *optional*):
|
||||
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
|
||||
value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
|
||||
repetition_penalty (`float`, *optional*, defaults to `1.0`):
|
||||
@ -89,9 +89,9 @@ class GRPOConfig(TrainingArguments):
|
||||
Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers`
|
||||
paged implementation will be used for generation instead of the default padded implementation. This
|
||||
parameter is only effective when `use_vllm` is set to `False`.
|
||||
cache_implementation (`str` or `None`, *optional*, defaults to `None`):
|
||||
cache_implementation (`str`, *optional*):
|
||||
Implementation of the cache method for faster generation when `use_vllm` is set to `False`.
|
||||
generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
generation_kwargs (`dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if
|
||||
using vLLM) when sampling completions. This can be used to further customize the generation behavior, such
|
||||
as setting `suppress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation
|
||||
@ -114,12 +114,12 @@ class GRPOConfig(TrainingArguments):
|
||||
Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use
|
||||
the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model
|
||||
implementation.
|
||||
vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
|
||||
vllm_guided_decoding_regex (`str`, *optional*):
|
||||
Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
|
||||
|
||||
> Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`)
|
||||
|
||||
vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
|
||||
vllm_server_base_url (`str`, *optional*):
|
||||
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and
|
||||
`vllm_server_port` are ignored.
|
||||
vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
|
||||
@ -153,11 +153,11 @@ class GRPOConfig(TrainingArguments):
|
||||
Number of iterations per batch (denoted as μ in the algorithm).
|
||||
epsilon (`float`, *optional*, defaults to `0.2`):
|
||||
Epsilon value for clipping.
|
||||
delta (`float` or `None`, *optional*, defaults to `None`):
|
||||
delta (`float`, *optional*):
|
||||
Enables the upper clipping bound in two-sided GRPO loss when set to a float. If `None` (default), standard
|
||||
GRPO clipping is used. Recommended to be greater than `1 + ε` when enabled. This method is introduced in
|
||||
the [INTELLECT-2 tech report](https://huggingface.co/papers/2505.07291).
|
||||
epsilon_high (`float` or `None`, *optional*, defaults to `None`):
|
||||
epsilon_high (`float`, *optional*):
|
||||
Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
|
||||
specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
|
||||
importance_sampling_level (`str`, *optional*, defaults to `"token"`):
|
||||
@ -166,7 +166,7 @@ class GRPOConfig(TrainingArguments):
|
||||
log-probability ratios across valid tokens to produce a single ratio per sequence. The [GSPO
|
||||
paper](https://huggingface.co/papers/2507.18071) shows that sequence-level sampling often yields more
|
||||
stable training and better alignment with sequence-level rewards.
|
||||
reward_weights (`list[float]` or `None`, *optional*, defaults to `None`):
|
||||
reward_weights (`list[float]`, *optional*):
|
||||
Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are
|
||||
weighted equally with weight `1.0`.
|
||||
scale_rewards (`str` or `bool`, *optional*, defaults to `"group"`):
|
||||
@ -235,7 +235,7 @@ class GRPOConfig(TrainingArguments):
|
||||
log_completions (`bool`, *optional*, defaults to `False`):
|
||||
Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
|
||||
it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`.
|
||||
num_completions_to_print (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_completions_to_print (`int`, *optional*):
|
||||
Number of completions to print with `rich`. If `None`, all completions are logged.
|
||||
wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`):
|
||||
Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts
|
||||
|
@ -162,7 +162,7 @@ class GRPOTrainer(Trainer):
|
||||
reward function's signature.
|
||||
- A list of reward functions, where each item can independently be any of the above types. Mixing different
|
||||
types within the list (e.g., a string model ID and a custom reward function) is allowed.
|
||||
args ([`GRPOConfig`], *optional*, defaults to `None`):
|
||||
args ([`GRPOConfig`], *optional*):
|
||||
Configuration for this trainer. If `None`, a default configuration is used.
|
||||
train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
|
||||
Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is
|
||||
@ -173,12 +173,12 @@ class GRPOTrainer(Trainer):
|
||||
and content).
|
||||
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
|
||||
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. The padding side must be set to "left". If `None`, the
|
||||
processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A
|
||||
padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token,
|
||||
`tokenizer.eos_token` will be used as the default.
|
||||
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
|
||||
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
|
||||
Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
|
||||
|
||||
- A single processing class: Used when `reward_funcs` contains only one reward function.
|
||||
@ -188,7 +188,7 @@ class GRPOTrainer(Trainer):
|
||||
[`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward
|
||||
functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes`
|
||||
are ignored.
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
|
||||
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
|
||||
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
|
||||
|
||||
@ -197,7 +197,7 @@ class GRPOTrainer(Trainer):
|
||||
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
|
||||
A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
|
||||
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
|
||||
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
|
||||
peft_config ([`~peft.PeftConfig`], *optional*):
|
||||
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
|
||||
"""
|
||||
|
||||
@ -1825,11 +1825,11 @@ class GRPOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -40,13 +40,13 @@ class IterativeSFTConfig(TrainingArguments):
|
||||
Parameters:
|
||||
> Parameters that control the model
|
||||
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
|
||||
argument of the [`IterativeSFTTrainer`] is provided as a string.
|
||||
|
||||
> Parameters that control the data preprocessing
|
||||
|
||||
max_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_length (`int`, *optional*):
|
||||
Maximum length of the tokenized sequence. Sequences longer than `max_length` are truncated.
|
||||
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
|
||||
The truncation mode to use, either `"keep_end"` or `"keep_start"`.
|
||||
|
@ -74,7 +74,7 @@ class IterativeSFTTrainer(Trainer):
|
||||
using [`~transformers.AutoModelForCausalLM.from_pretrained`] with the keyword arguments in
|
||||
`args.model_init_kwargs`.
|
||||
- A [`~transformers.PreTrainedModel`] object. Only causal language models are supported.
|
||||
args ([`IterativeSFTConfig`], *optional*, defaults to `None`):
|
||||
args ([`IterativeSFTConfig`], *optional*):
|
||||
Configuration for this trainer. If `None`, a default configuration is used.
|
||||
data_collator (`DataCollator`, *optional*):
|
||||
Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
|
||||
@ -83,7 +83,7 @@ class IterativeSFTTrainer(Trainer):
|
||||
tokenizer.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If `None`, the processing class is loaded from the model's name
|
||||
with [`~transformers.AutoTokenizer.from_pretrained`].
|
||||
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`):
|
||||
@ -457,11 +457,11 @@ class IterativeSFTTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -297,7 +297,7 @@ class HfPairwiseJudge(BasePairwiseJudge):
|
||||
Model to use for the judge.
|
||||
token (`str`, *optional*):
|
||||
Hugging Face API token to use for the [`huggingface_hub.InferenceClient`].
|
||||
system_prompt (`str` or `None`, *optional*, defaults to `None`):
|
||||
system_prompt (`str`, *optional*):
|
||||
The system prompt to be used for the judge. If not provided, a default prompt is used. Note that the system
|
||||
prompt should contain the following placeholders: `{prompt}`, `{response0}`, and `{response1}`. Also, the
|
||||
inference is called with `max_tokens=1`, consequently the system prompt should ask for a single token
|
||||
@ -351,7 +351,7 @@ class OpenAIPairwiseJudge(BasePairwiseJudge):
|
||||
Args:
|
||||
model (`str`, *optional*, defaults to `"gpt-4-turbo-preview"`):
|
||||
Model to use for the judge.
|
||||
system_prompt (`str` or `None`, *optional*, defaults to `None`):
|
||||
system_prompt (`str`, *optional*):
|
||||
System prompt to be used for the judge. If not provided, a default prompt is used. Note that the system
|
||||
prompt should contain the following placeholders: `{prompt}`, `{response0}`, and `{response1}`. Also, the
|
||||
inference is called with `max_tokens=1`, consequently the system prompt should ask for a single token
|
||||
|
@ -37,7 +37,7 @@ class KTOConfig(TrainingArguments):
|
||||
to use the default data collator.
|
||||
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
|
||||
Maximum length of the prompt. This argument is required if you want to use the default data collator.
|
||||
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_completion_length (`int`, *optional*):
|
||||
Maximum length of the completion. This argument is required if you want to use the default data collator
|
||||
and your model is an encoder-decoder.
|
||||
beta (`float`, *optional*, defaults to `0.1`):
|
||||
@ -56,7 +56,7 @@ class KTOConfig(TrainingArguments):
|
||||
Undesirable losses are weighed by this factor to counter unequal number of desirable and undesirable pairs.
|
||||
label_pad_token_id (`int`, *optional*, defaults to `-100`):
|
||||
Label pad token id. This argument is required if you want to use the default data collator.
|
||||
padding_value (`int` or `None`, *optional*, defaults to `None`):
|
||||
padding_value (`int`, *optional*):
|
||||
Padding value to use. If `None`, the padding value of the tokenizer is used.
|
||||
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
|
||||
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
|
||||
@ -64,19 +64,19 @@ class KTOConfig(TrainingArguments):
|
||||
generate_during_eval (`bool`, *optional*, defaults to `False`):
|
||||
If `True`, generates and logs completions from both the model and the reference model to W&B or Comet
|
||||
during evaluation.
|
||||
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
|
||||
is_encoder_decoder (`bool`, *optional*):
|
||||
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
|
||||
you need to specify if the model returned by the callable is an encoder-decoder model.
|
||||
precompute_ref_log_probs (`bool`, *optional*, defaults to `False`):
|
||||
Whether to precompute reference model log probabilities for training and evaluation datasets. This is
|
||||
useful when training without the reference model to reduce the total GPU memory needed.
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
|
||||
string.
|
||||
ref_model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
ref_model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the reference model
|
||||
from a string.
|
||||
dataset_num_proc: (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc: (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
disable_dropout (`bool`, *optional*, defaults to `True`):
|
||||
Whether to disable dropout in the model and reference model.
|
||||
|
@ -292,11 +292,11 @@ class KTOTrainer(Trainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
data_collator (`transformers.DataCollator`, *optional*, defaults to `None`):
|
||||
data_collator (`transformers.DataCollator`, *optional*):
|
||||
The data collator to use for training. If None is specified, the default data collator
|
||||
(`DPODataCollatorWithPadding`) will be used which will pad the sequences to the maximum length of the
|
||||
sequences in the batch, given a dataset of paired sequences.
|
||||
@ -1641,7 +1641,7 @@ class KTOTrainer(Trainer):
|
||||
Args:
|
||||
logs (`dict[str, float]`):
|
||||
The values to log.
|
||||
start_time (`float` or `None`, *optional*, defaults to `None`):
|
||||
start_time (`float`, *optional*):
|
||||
Start time of the training.
|
||||
"""
|
||||
# logs either has 'loss' or 'eval_loss'
|
||||
@ -1688,11 +1688,11 @@ class KTOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -27,11 +27,11 @@ class ModelConfig:
|
||||
command line.
|
||||
|
||||
Parameters:
|
||||
model_name_or_path (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name_or_path (`str`, *optional*):
|
||||
Model checkpoint for weights initialization.
|
||||
model_revision (`str`, *optional*, defaults to `"main"`):
|
||||
Specific model version to use. It can be a branch name, a tag name, or a commit id.
|
||||
dtype (`Literal["auto", "bfloat16", "float16", "float32"]` or `None`, *optional*, defaults to `None`):
|
||||
dtype (`Literal["auto", "bfloat16", "float16", "float32"]`, *optional*):
|
||||
Override the default `torch.dtype` and load the model under this dtype. Possible values are
|
||||
|
||||
- `"bfloat16"`: `torch.bfloat16`
|
||||
@ -43,7 +43,7 @@ class ModelConfig:
|
||||
Whether to allow for custom models defined on the Hub in their own modeling files. This option should only
|
||||
be set to `True` for repositories you trust and in which you have read the code, as it will execute code
|
||||
present on the Hub on your local machine.
|
||||
attn_implementation (`str` or `None`, *optional*, defaults to `None`):
|
||||
attn_implementation (`str`, *optional*):
|
||||
Which attention implementation to use. You can run `--attn_implementation=flash_attention_2`, in which case
|
||||
you must install this manually by running `pip install flash-attn --no-build-isolation`.
|
||||
use_peft (`bool`, *optional*, defaults to `False`):
|
||||
@ -54,11 +54,11 @@ class ModelConfig:
|
||||
LoRA alpha.
|
||||
lora_dropout (`float`, *optional*, defaults to `0.05`):
|
||||
LoRA dropout.
|
||||
lora_target_modules (`Union[str, list[str]]` or `None`, *optional*, defaults to `None`):
|
||||
lora_target_modules (`Union[str, list[str]]`, *optional*):
|
||||
LoRA target modules.
|
||||
lora_target_parameters (`Union[str, list[str]]` or `None`, *optional*, defaults to `None`):
|
||||
lora_target_parameters (`Union[str, list[str]]`, *optional*):
|
||||
List of target parameters for LoRA.
|
||||
lora_modules_to_save (`list[str]` or `None`, *optional*, defaults to `None`):
|
||||
lora_modules_to_save (`list[str]`, *optional*):
|
||||
Model layers to unfreeze & train.
|
||||
lora_task_type (`str`, *optional*, defaults to `"CAUSAL_LM"`):
|
||||
Task type to pass for LoRA (use `"SEQ_CLS"` for reward modeling).
|
||||
|
@ -88,7 +88,7 @@ class NashMDTrainer(OnlineDPOTrainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
@ -507,11 +507,11 @@ class NashMDTrainer(OnlineDPOTrainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -33,9 +33,9 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
command line.
|
||||
|
||||
Parameters:
|
||||
reward_model_path (`str` or `None`, *optional*, defaults to `None`):
|
||||
reward_model_path (`str`, *optional*):
|
||||
Path to the reward model. Either `judge` or `reward_model_path` must be set, but not both.
|
||||
judge (`str` or `None`, *optional*, defaults to `None`):
|
||||
judge (`str`, *optional*):
|
||||
Name of the judge to use. Either `judge` or `reward_model_path` must be set, but not both.
|
||||
max_new_tokens (`int`, *optional*, defaults to `64`):
|
||||
Maximum number of tokens to generate per completion.
|
||||
@ -45,7 +45,7 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
possible.
|
||||
temperature (`float`, *optional*, defaults to `0.9`):
|
||||
Temperature for sampling. The higher the temperature, the more random the completions.
|
||||
missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
|
||||
missing_eos_penalty (`float`, *optional*):
|
||||
Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
|
||||
generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
|
||||
value. This parameter only works when using `reward_funcs` and not when using `judge`.
|
||||
@ -60,7 +60,7 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
- `"sigmoid"`: sigmoid loss from the original [DPO](https://huggingface.co/papers/2305.18290) paper.
|
||||
- `"ipo"`: IPO loss from the [IPO](https://huggingface.co/papers/2310.12036) paper.
|
||||
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
disable_dropout (`bool`, *optional*, defaults to `True`):
|
||||
Whether to disable dropout in the model and reference model.
|
||||
@ -70,10 +70,10 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
top_p (`float`, *optional*, defaults to `1.0`):
|
||||
Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
|
||||
`1.0` to consider all tokens.
|
||||
top_k (`int` or `None`, *optional*, defaults to `None`):
|
||||
top_k (`int`, *optional*):
|
||||
Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
|
||||
disabled and all tokens are considered.
|
||||
min_p (`float` or `None`, *optional*, defaults to `None`):
|
||||
min_p (`float`, *optional*):
|
||||
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
|
||||
value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
|
||||
repetition_penalty (`float`, *optional*, defaults to `1.0`):
|
||||
@ -84,9 +84,9 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers`
|
||||
paged implementation will be used for generation instead of the default padded implementation. This
|
||||
parameter is only effective when `use_vllm` is set to `False`.
|
||||
cache_implementation (`str` or `None`, *optional*, defaults to `None`):
|
||||
cache_implementation (`str`, *optional*):
|
||||
Implementation of the cache method for faster generation when `use_vllm` is set to `False`.
|
||||
generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
generation_kwargs (`dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if
|
||||
using vLLM) when sampling completions. This can be used to further customize the generation behavior, such
|
||||
as setting `supress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation
|
||||
@ -109,12 +109,12 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
server is running (start with `trl vllm-serve`).
|
||||
- `"colocate"`: vLLM will run in the same process and share the training GPUs. This avoids the need for a
|
||||
separate server but may cause resource contention with training.
|
||||
vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
|
||||
vllm_guided_decoding_regex (`str`, *optional*):
|
||||
Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
|
||||
|
||||
> Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`)
|
||||
|
||||
vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
|
||||
vllm_server_base_url (`str`, *optional*):
|
||||
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and
|
||||
`vllm_server_port` are ignored.
|
||||
vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
|
||||
@ -143,7 +143,7 @@ class OnlineDPOConfig(TrainingArguments):
|
||||
improving generation speed. However, disabling this option allows training models that exceed the VRAM
|
||||
capacity of a single GPU, albeit at the cost of slower generation. Disabling this option is not compatible
|
||||
with vLLM generation.
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
|
||||
string.
|
||||
"""
|
||||
|
@ -125,7 +125,7 @@ class OnlineDPOTrainer(Trainer):
|
||||
model.
|
||||
judge (`BasePairwiseJudge`):
|
||||
The judge to use for pairwise comparison of model completions.
|
||||
reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*, defaults to `None`):
|
||||
reward_funcs (`Union[RewardFunc, list[RewardFunc]]`, *optional*):
|
||||
Reward functions to be used for computing the rewards. To compute the rewards, we call all the reward
|
||||
functions with the prompts and completions and sum the rewards. Can be either:
|
||||
|
||||
@ -144,11 +144,11 @@ class OnlineDPOTrainer(Trainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
|
||||
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
|
||||
Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
|
||||
|
||||
- A single processing class: Used when `reward_funcs` contains only one reward function.
|
||||
@ -156,7 +156,7 @@ class OnlineDPOTrainer(Trainer):
|
||||
|
||||
If set to `None`, the tokenizer for each model-based reward function is automatically loaded using
|
||||
[`~transformers.AutoTokenizer.from_pretrained`].
|
||||
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
|
||||
peft_config ([`~peft.PeftConfig`], *optional*):
|
||||
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
|
||||
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
|
||||
The function to use to compute the metrics. Must take a `EvalPrediction` and return a dictionary string to
|
||||
@ -1519,11 +1519,11 @@ class OnlineDPOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -37,7 +37,7 @@ class ORPOConfig(TrainingArguments):
|
||||
to use the default data collator.
|
||||
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
|
||||
Maximum length of the prompt. This argument is required if you want to use the default data collator.
|
||||
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_completion_length (`int`, *optional*):
|
||||
Maximum length of the completion. This argument is required if you want to use the default data collator
|
||||
and your model is an encoder-decoder.
|
||||
beta (`float`, *optional*, defaults to `0.1`):
|
||||
@ -48,20 +48,20 @@ class ORPOConfig(TrainingArguments):
|
||||
Whether to disable dropout in the model.
|
||||
label_pad_token_id (`int`, *optional*, defaults to `-100`):
|
||||
Label pad token id. This argument is required if you want to use the default data collator.
|
||||
padding_value (`int` or `None`, *optional*, defaults to `None`):
|
||||
padding_value (`int`, *optional*):
|
||||
Padding value to use. If `None`, the padding value of the tokenizer is used.
|
||||
truncation_mode (`str`, *optional*, defaults to `"keep_end"`):
|
||||
Truncation mode to use when the prompt is too long. Possible values are `"keep_end"` or `"keep_start"`.
|
||||
This argument is required if you want to use the default data collator.
|
||||
generate_during_eval (`bool`, *optional*, defaults to `False`):
|
||||
If `True`, generates and logs completions from the model to W&B or Comet during evaluation.
|
||||
is_encoder_decoder (`bool` or `None`, *optional*, defaults to `None`):
|
||||
is_encoder_decoder (`bool`, *optional*):
|
||||
When using the `model_init` argument (callable) to instantiate the model instead of the `model` argument,
|
||||
you need to specify if the model returned by the callable is an encoder-decoder model.
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments to pass to `AutoModelForCausalLM.from_pretrained` when instantiating the model from a
|
||||
string.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
"""
|
||||
|
||||
|
@ -94,7 +94,7 @@ class ORPOTrainer(Trainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
@ -989,7 +989,7 @@ class ORPOTrainer(Trainer):
|
||||
Args:
|
||||
logs (`dict[str, float]`):
|
||||
The values to log.
|
||||
start_time (`float` or `None`, *optional*, defaults to `None`):
|
||||
start_time (`float`, *optional*):
|
||||
Start time of the training.
|
||||
"""
|
||||
# logs either has 'loss' or 'eval_loss'
|
||||
@ -1042,11 +1042,11 @@ class ORPOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -37,9 +37,9 @@ class PPOConfig(OnPolicyConfig):
|
||||
Name of this experiment.
|
||||
reward_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
|
||||
Path to the reward model.
|
||||
model_adapter_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_adapter_name (`str`, *optional*):
|
||||
Name of the train target PEFT adapter, when using LoRA with multiple adapters.
|
||||
ref_adapter_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
ref_adapter_name (`str`, *optional*):
|
||||
Name of the reference PEFT adapter, when using LoRA with multiple adapters.
|
||||
num_ppo_epochs (`int`, *optional*, defaults to `4`):
|
||||
Number of epochs to train.
|
||||
|
@ -804,11 +804,11 @@ class PPOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -36,7 +36,7 @@ class PRMConfig(TrainingArguments):
|
||||
Maximum length of the sequences (prompt + completion) used for truncation.
|
||||
max_prompt_length (`int` or `None`, *optional*, defaults to `512`):
|
||||
Maximum length of the prompt used for truncation.
|
||||
max_completion_length (`int` or `None`, *optional*, defaults to `None`):
|
||||
max_completion_length (`int`, *optional*):
|
||||
Maximum length of the completion used for truncation. The completion is the concatenation of the steps.
|
||||
disable_dropout (`bool`, *optional*, defaults to `True`):
|
||||
Whether to disable dropout in the model.
|
||||
@ -44,7 +44,7 @@ class PRMConfig(TrainingArguments):
|
||||
Separator used to separate each step of the reasoning process.
|
||||
train_on_last_step_only (`bool`, *optional*, defaults to `False`):
|
||||
Whether to train only on the last step.
|
||||
dataset_num_proc (`int`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
"""
|
||||
|
||||
|
@ -66,7 +66,7 @@ class PRMTrainer(Trainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
@ -299,11 +299,11 @@ class PRMTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -37,9 +37,9 @@ class RewardConfig(TrainingArguments):
|
||||
limit. This argument is required if you want to use the default data collator.
|
||||
disable_dropout (`bool`, *optional*, defaults to `True`):
|
||||
Whether to disable dropout in the model.
|
||||
dataset_num_proc (`int`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
center_rewards_coefficient (`float`, *optional*, defaults to `None`):
|
||||
center_rewards_coefficient (`float`, *optional*):
|
||||
Coefficient to incentivize the reward model to output mean-zero rewards (proposed by
|
||||
https://huggingface.co/papers/2312.09244, Eq. 2). Recommended value: `0.01`.
|
||||
remove_unused_columns (`bool`, *optional*, defaults to `False`):
|
||||
|
@ -368,11 +368,11 @@ class RewardTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -35,7 +35,7 @@ class RLOOConfig(TrainingArguments):
|
||||
Parameters:
|
||||
> Parameters that control the model and reference model
|
||||
|
||||
model_init_kwargs (`str`, `dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`str`, `dict[str, Any]`, *optional*):
|
||||
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
|
||||
argument of the [`GRPOTrainer`] is provided as a string.
|
||||
disable_dropout (`bool`, *optional*, defaults to `False`):
|
||||
@ -64,11 +64,11 @@ class RLOOConfig(TrainingArguments):
|
||||
|
||||
> Parameters that control generation
|
||||
|
||||
generation_batch_size: (`int` or `None`, *optional*, defaults to `None`):
|
||||
generation_batch_size: (`int`, *optional*):
|
||||
Batch size to use for generation. If `None`, it defaults to the effective training batch size:
|
||||
`per_device_train_batch_size * num_processes * steps_per_generation`. In other words, there is one
|
||||
generation batch processed per optimization step. Mutually exclusive with `steps_per_generation`.
|
||||
steps_per_generation: (`int` or `None`, *optional*, defaults to `None`):
|
||||
steps_per_generation: (`int`, *optional*):
|
||||
Number of steps per generation. If `None`, it defaults to `gradient_accumulation_steps`. Mutually exclusive
|
||||
with `generation_batch_size`.
|
||||
temperature (`float`, defaults to `1.0`):
|
||||
@ -76,10 +76,10 @@ class RLOOConfig(TrainingArguments):
|
||||
top_p (`float`, *optional*, defaults to `1.0`):
|
||||
Float that controls the cumulative probability of the top tokens to consider. Must be in (0, 1]. Set to
|
||||
`1.0` to consider all tokens.
|
||||
top_k (`int` or `None`, *optional*, defaults to `None`):
|
||||
top_k (`int`, *optional*):
|
||||
Number of highest probability vocabulary tokens to keep for top-k-filtering. If `None`, top-k-filtering is
|
||||
disabled and all tokens are considered.
|
||||
min_p (`float` or `None`, *optional*, defaults to `None`):
|
||||
min_p (`float`, *optional*):
|
||||
Minimum token probability, which will be scaled by the probability of the most likely token. It must be a
|
||||
value between `0.0` and `1.0`. Typical values are in the `0.01-0.2` range.
|
||||
repetition_penalty (`float`, *optional*, defaults to `1.0`):
|
||||
@ -90,9 +90,9 @@ class RLOOConfig(TrainingArguments):
|
||||
Whether to use the `transformers` paged implementation for generation. If set to `True`, the `transformers`
|
||||
paged implementation will be used for generation instead of the default padded implementation. This
|
||||
parameter is only effective when `use_vllm` is set to `False`.
|
||||
cache_implementation (`str` or `None`, *optional*, defaults to `None`):
|
||||
cache_implementation (`str`, *optional*):
|
||||
Implementation of the cache method for faster generation when `use_vllm` is set to `False`.
|
||||
generation_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
generation_kwargs (`dict[str, Any]`, *optional*):
|
||||
Additional keyword arguments to pass to `GenerationConfig` (if using transformers) or `SamplingParams` (if
|
||||
using vLLM) when sampling completions. This can be used to further customize the generation behavior, such
|
||||
as setting `suppress_tokens`, `num_beams`, etc. If it contains keys that conflict with the other generation
|
||||
@ -115,12 +115,12 @@ class RLOOConfig(TrainingArguments):
|
||||
Model implementation to use for vLLM. Must be one of `"transformers"` or `"vllm"`. `"transformers"`: Use
|
||||
the `transformers` backend for model implementation. `"vllm"`: Use the `vllm` library for model
|
||||
implementation.
|
||||
vllm_guided_decoding_regex (`str` or `None`, *optional*, defaults to `None`):
|
||||
vllm_guided_decoding_regex (`str`, *optional*):
|
||||
Regex for vLLM guided decoding. If `None` (default), guided decoding is disabled.
|
||||
|
||||
> Parameters that control the vLLM server (only used when `vllm_mode` is `"server"`)
|
||||
|
||||
vllm_server_base_url (`str` or `None`, *optional*, defaults to `None`):
|
||||
vllm_server_base_url (`str`, *optional*):
|
||||
Base URL for the vLLM server (e.g., `"http://localhost:8000"`). If provided, `vllm_server_host` and
|
||||
`vllm_server_port` are ignored.
|
||||
vllm_server_host (`str`, *optional*, defaults to `"0.0.0.0"`):
|
||||
@ -151,16 +151,16 @@ class RLOOConfig(TrainingArguments):
|
||||
Number of iterations per batch (denoted as μ in the algorithm).
|
||||
epsilon (`float`, *optional*, defaults to `0.2`):
|
||||
Epsilon value for clipping.
|
||||
epsilon_high (`float` or `None`, *optional*, defaults to `None`):
|
||||
epsilon_high (`float`, *optional*):
|
||||
Upper-bound epsilon value for clipping. If not specified, it defaults to the same value as the lower-bound
|
||||
specified in argument `epsilon`. Paper [DAPO](https://huggingface.co/papers/2503.14476) recommends `0.28`.
|
||||
reward_weights (`list[float]` or `None`, *optional*, defaults to `None`):
|
||||
reward_weights (`list[float]`, *optional*):
|
||||
Weights for each reward function. Must match the number of reward functions. If `None`, all rewards are
|
||||
weighted equally with weight `1.0`.
|
||||
normalize_advantages (`bool`, *optional*, defaults to `False`):
|
||||
Whether to normalize advantages. Normalization is done per generation batch to have mean `0.0` and standard
|
||||
deviation of `1.0`.
|
||||
reward_clip_range (`tuple[float, float]` or `None`, *optional*, defaults to `None`):
|
||||
reward_clip_range (`tuple[float, float]`, *optional*):
|
||||
Clip range for rewards as (min, max). If `None`, no clipping is applied.
|
||||
mask_truncated_completions (`bool`, *optional*, defaults to `False`):
|
||||
When enabled, truncated completions are excluded from the loss calculation, preventing them from being
|
||||
@ -185,7 +185,7 @@ class RLOOConfig(TrainingArguments):
|
||||
log_completions (`bool`, *optional*, defaults to `False`):
|
||||
Whether to log a sample of (prompt, completion) pairs every `logging_steps` steps. If `rich` is installed,
|
||||
it prints the sample. If `wandb` logging is enabled, it logs it to `wandb`.
|
||||
num_completions_to_print (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_completions_to_print (`int`, *optional*):
|
||||
Number of completions to print with `rich`. If `None`, all completions are logged.
|
||||
wandb_log_unique_prompts (`bool`, *optional*, defaults to `False`):
|
||||
Whether to log unique prompts in wandb. If `True`, only unique prompts are logged. If `False`, all prompts
|
||||
|
@ -160,7 +160,7 @@ class RLOOTrainer(Trainer):
|
||||
reward function's signature.
|
||||
- A list of reward functions, where each item can independently be any of the above types. Mixing different
|
||||
types within the list (e.g., a string model ID and a custom reward function) is allowed.
|
||||
args ([`RLOOConfig`], *optional*, defaults to `None`):
|
||||
args ([`RLOOConfig`], *optional*):
|
||||
Configuration for this trainer. If `None`, a default configuration is used.
|
||||
train_dataset ([`~datasets.Dataset`] or [`~datasets.IterableDataset`]):
|
||||
Dataset to use for training. It must include a column `"prompt"`. Any additional columns in the dataset is
|
||||
@ -171,12 +171,12 @@ class RLOOTrainer(Trainer):
|
||||
and content).
|
||||
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
|
||||
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. The padding side must be set to "left". If `None`, the
|
||||
processing class is loaded from the model's name with [`~transformers.AutoProcessor.from_pretrained`]. A
|
||||
padding token, `tokenizer.pad_token`, must be set. If the processing class has not set a padding token,
|
||||
`tokenizer.eos_token` will be used as the default.
|
||||
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*, defaults to `None`):
|
||||
reward_processing_classes (`Union[PreTrainedTokenizerBase, list[PreTrainedTokenizerBase]]`, *optional*):
|
||||
Processing classes corresponding to the reward functions specified in `reward_funcs`. Can be either:
|
||||
|
||||
- A single processing class: Used when `reward_funcs` contains only one reward function.
|
||||
@ -186,7 +186,7 @@ class RLOOTrainer(Trainer):
|
||||
[`~transformers.AutoTokenizer.from_pretrained`]. For elements in `reward_funcs` that are custom reward
|
||||
functions (not [`~transformers.PreTrainedModel`]), the corresponding entries in `reward_processing_classes`
|
||||
are ignored.
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*, defaults to `None`):
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
|
||||
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
|
||||
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
|
||||
|
||||
@ -195,7 +195,7 @@ class RLOOTrainer(Trainer):
|
||||
optimizers (`tuple[torch.optim.Optimizer, torch.optim.lr_scheduler.LambdaLR]`, *optional*, defaults to `(None, None)`):
|
||||
A tuple containing the optimizer and the scheduler to use. Will default to an instance of [`AdamW`] on your
|
||||
model and a scheduler given by [`get_linear_schedule_with_warmup`] controlled by `args`.
|
||||
peft_config ([`~peft.PeftConfig`], *optional*, defaults to `None`):
|
||||
peft_config ([`~peft.PeftConfig`], *optional*):
|
||||
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
|
||||
"""
|
||||
|
||||
@ -1452,11 +1452,11 @@ class RLOOTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -34,12 +34,12 @@ class SFTConfig(TrainingArguments):
|
||||
Parameters:
|
||||
> Parameters that control the model
|
||||
|
||||
model_init_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
model_init_kwargs (`dict[str, Any]`, *optional*):
|
||||
Keyword arguments for [`~transformers.AutoModelForCausalLM.from_pretrained`], used when the `model`
|
||||
argument of the [`SFTTrainer`] is provided as a string. If you're training a MoE architecture and want to
|
||||
include the load balancing/auxilliary loss as a part of the final loss, remember to set
|
||||
`output_router_logits=True` in this dictionary.
|
||||
chat_template_path (`str` or `None`, *optional*, defaults to `None`):
|
||||
chat_template_path (`str`, *optional*):
|
||||
If specified, sets the model's chat template. This can either be the path to a tokenizer (local directory
|
||||
or Hugging Face Hub model) or a direct path to a Jinja template file. When using a Jinja file, you must
|
||||
ensure that any special tokens referenced in the template are added to the tokenizer and that the model's
|
||||
@ -49,16 +49,16 @@ class SFTConfig(TrainingArguments):
|
||||
|
||||
dataset_text_field (`str`, *optional*, defaults to `"text"`):
|
||||
Name of the column that contains text data in the dataset.
|
||||
dataset_kwargs (`dict[str, Any]` or `None`, *optional*, defaults to `None`):
|
||||
dataset_kwargs (`dict[str, Any]`, *optional*):
|
||||
Dictionary of optional keyword arguments for the dataset preparation. The only supported key is
|
||||
`skip_prepare_dataset`. When the model is a VLM, `skip_prepare_dataset` is automatically treated as `True`
|
||||
regardless of the provided value, since preprocessing is done on the fly.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
eos_token (`str` or `None`, *optional*, defaults to `None`):
|
||||
eos_token (`str`, *optional*):
|
||||
Token used to indicate the end of a turn or sequence. If `None`, it defaults to
|
||||
`processing_class.eos_token`.
|
||||
pad_token (`int` or `None`, *optional*, defaults to `None`):
|
||||
pad_token (`int`, *optional*):
|
||||
Token used for padding. If `None`, it defaults to `processing_class.pad_token`, or if that is also `None`,
|
||||
it falls back to `processing_class.eos_token`.
|
||||
max_length (`int` or `None`, *optional*, defaults to `1024`):
|
||||
@ -75,14 +75,14 @@ class SFTConfig(TrainingArguments):
|
||||
supported with the FlashAttention 2 or 3, which can efficiently handle the flattened batch structure. When
|
||||
packing is enabled with strategy `"bfd"`, padding-free is enabled, regardless of the value of this
|
||||
parameter.
|
||||
pad_to_multiple_of (`int` or `None`, *optional*, defaults to `None`):
|
||||
pad_to_multiple_of (`int`, *optional*):
|
||||
If set, the sequences will be padded to a multiple of this value.
|
||||
eval_packing (`bool` or `None`, *optional*, defaults to `None`):
|
||||
eval_packing (`bool`, *optional*):
|
||||
Whether to pack the eval dataset. If `None`, uses the same value as `packing`.
|
||||
|
||||
> Parameters that control the training
|
||||
|
||||
completion_only_loss (`bool` or `None`, *optional*, defaults to `None`):
|
||||
completion_only_loss (`bool`, *optional*):
|
||||
Whether to compute loss only on the completion part of the sequence. If set to `True`, loss is computed
|
||||
only on the completion, which is supported only for [prompt-completion](#prompt-completion) datasets. If
|
||||
`False`, loss is computed on the entire sequence. If `None` (default), the behavior depends on the dataset:
|
||||
|
@ -132,7 +132,7 @@ class DataCollatorForLanguageModeling(DataCollatorMixin):
|
||||
padding_free (`bool`, *optional*, defaults to `False`):
|
||||
If set to `True`, the sequences will be flattened into a single sequence, and the position IDs will be
|
||||
generated accordingly.
|
||||
pad_to_multiple_of (`int` or `None`, *optional*, defaults to `None`):
|
||||
pad_to_multiple_of (`int`, *optional*):
|
||||
If set, the sequences will be padded to a multiple of this value.
|
||||
return_tensors (`str`, *optional*, defaults to `"pt"`):
|
||||
Type of Tensor to return. Only `"pt"` is currently supported.
|
||||
@ -524,9 +524,9 @@ class SFTTrainer(Trainer):
|
||||
- A [`~transformers.PreTrainedModel`] object.
|
||||
If you're training a model with an MoE architecture and want to include the load balancing/auxilliary loss
|
||||
as a part of the final loss, remember to set the `output_router_logits` config of the model to `True`.
|
||||
args ([`SFTConfig`], *optional*, defaults to `None`):
|
||||
args ([`SFTConfig`], *optional*):
|
||||
Configuration for this trainer. If `None`, a default configuration is used.
|
||||
data_collator ([`~transformers.DataCollator`] or `None`, *optional*):
|
||||
data_collator ([`~transformers.DataCollator`], *optional*):
|
||||
Function to use to form a batch from a list of elements of the processed `train_dataset` or `eval_dataset`.
|
||||
Will default to [`~trainer.sft_trainer.DataCollatorForLanguageModeling`] if the model is a language model
|
||||
and [`~trainer.sft_trainer.DataCollatorForVisionLanguageModeling`] if the model is a vision-language model.
|
||||
@ -541,23 +541,23 @@ class SFTTrainer(Trainer):
|
||||
The trainer also supports processed datasets (tokenized) as long as they contain an `input_ids` field.
|
||||
eval_dataset ([`~datasets.Dataset`], [`~datasets.IterableDataset`] or `dict[str, Union[Dataset, IterableDataset]]`):
|
||||
Dataset to use for evaluation. It must meet the same requirements as `train_dataset`.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`] or `None`, *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If `None`, the processing class is loaded from the model's name
|
||||
with [`~transformers.AutoProcessor.from_pretrained`]. A padding token, `tokenizer.pad_token`, must be set.
|
||||
If the processing class has not set a padding token, `tokenizer.eos_token` will be used as the default.
|
||||
compute_loss_func (`Callable` or `None`, *optional*, defaults to `None`):
|
||||
compute_loss_func (`Callable`, *optional*):
|
||||
A function that accepts the raw model outputs, labels, and the number of items in the entire accumulated
|
||||
batch (batch_size * gradient_accumulation_steps) and returns the loss. For example, see the default [loss
|
||||
function](https://github.com/huggingface/transformers/blob/052e652d6d53c2b26ffde87e039b723949a53493/src/transformers/trainer.py#L3618)
|
||||
used by [`Trainer`].
|
||||
compute_metrics (`Callable[[EvalPrediction], dict]` or `None`, *optional*, defaults to `None`):
|
||||
compute_metrics (`Callable[[EvalPrediction], dict]`, *optional*):
|
||||
The function that will be used to compute metrics at evaluation. Must take a
|
||||
[`~transformers.EvalPrediction`] and return a dictionary string to metric values. When passing
|
||||
[`SFTConfig`] with `batch_eval_metrics` set to `True`, your `compute_metrics` function must take a boolean
|
||||
`compute_result` argument. This will be triggered after the last eval batch to signal that the function
|
||||
needs to calculate and return the global summary statistics rather than accumulating the batch-level
|
||||
statistics.
|
||||
callbacks (list of [`~transformers.TrainerCallback`] or `None`, *optional*, defaults to `None`):
|
||||
callbacks (list of [`~transformers.TrainerCallback`], *optional*):
|
||||
List of callbacks to customize the training loop. Will add those to the list of default callbacks detailed
|
||||
in [here](https://huggingface.co/docs/transformers/main_classes/callback).
|
||||
|
||||
@ -566,21 +566,21 @@ class SFTTrainer(Trainer):
|
||||
optimizers (`tuple[Optional[torch.optim.Optimizer], Optional[torch.optim.lr_scheduler.LambdaLR]]`, *optional*, defaults to `(None, None)`):
|
||||
A tuple containing the optimizer and the scheduler to use. Will default to an instance of `AdamW` on your
|
||||
model and a scheduler given by [`~transformers.get_linear_schedule_with_warmup`] controlled by `args`.
|
||||
optimizer_cls_and_kwargs (`tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*, defaults to `None`):
|
||||
optimizer_cls_and_kwargs (`tuple[Type[torch.optim.Optimizer], Dict[str, Any]]`, *optional*):
|
||||
A tuple containing the optimizer class and keyword arguments to use. Overrides `optim` and `optim_args` in
|
||||
`args`. Incompatible with the `optimizers` argument.
|
||||
|
||||
Unlike `optimizers`, this argument avoids the need to place model parameters on the correct devices before
|
||||
initializing the Trainer.
|
||||
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*, defaults to `None`):
|
||||
preprocess_logits_for_metrics (`Callable[[torch.Tensor, torch.Tensor], torch.Tensor]`, *optional*):
|
||||
A function that preprocess the logits right before caching them at each evaluation step. Must take two
|
||||
tensors, the logits and the labels, and return the logits once processed as desired. The modifications made
|
||||
by this function will be reflected in the predictions received by `compute_metrics`.
|
||||
|
||||
Note that the labels (second parameter) will be `None` if the dataset does not have them.
|
||||
peft_config ([`~peft.PeftConfig`] or `None`, *optional*, defaults to `None`):
|
||||
peft_config ([`~peft.PeftConfig`], *optional*):
|
||||
PEFT configuration used to wrap the model. If `None`, the model is not wrapped.
|
||||
formatting_func (`Callable` or `None`, *optional*, defaults to `None`):
|
||||
formatting_func (`Callable`, *optional*):
|
||||
Formatting function applied to the dataset before tokenization. Applying the formatting function explicitly
|
||||
converts the dataset into a [language modeling](#language-modeling) type.
|
||||
"""
|
||||
@ -1220,11 +1220,11 @@ class SFTTrainer(Trainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
@ -262,7 +262,7 @@ def pad(
|
||||
Value to use for padding. Default is 0.
|
||||
padding_side (`str`):
|
||||
Side on which to add padding. Must be 'left' or 'right'. Default is 'right'.
|
||||
pad_to_multiple_of (`int`, *optional*, defaults to `None`):
|
||||
pad_to_multiple_of (`int`, *optional*):
|
||||
If set will pad the sequence to a multiple of the provided value.
|
||||
|
||||
Returns:
|
||||
@ -709,13 +709,13 @@ class OnPolicyConfig(TrainingArguments):
|
||||
command line.
|
||||
|
||||
Parameters:
|
||||
run_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
run_name (`str`, *optional*):
|
||||
Name of the run.
|
||||
dataset_num_proc (`int` or `None`, *optional*, defaults to `None`):
|
||||
dataset_num_proc (`int`, *optional*):
|
||||
Number of processes to use for processing the dataset.
|
||||
num_mini_batches (`int`, *optional*, defaults to `1`):
|
||||
Number of minibatches to split a batch into.
|
||||
total_episodes (`int` or `None`, *optional*, defaults to `None`):
|
||||
total_episodes (`int`, *optional*):
|
||||
Total number of episodes in the dataset.
|
||||
local_rollout_forward_batch_size (`int`, *optional*, defaults to `64`):
|
||||
Per rank no grad forward pass in the rollout phase.
|
||||
@ -723,38 +723,38 @@ class OnPolicyConfig(TrainingArguments):
|
||||
Number of debugging samples generations (i.e., `generate_completions` calls) throughout training.
|
||||
response_length (`int`, *optional*, defaults to `53`):
|
||||
Length of the response.
|
||||
stop_token (`str` or `None`, *optional*, defaults to `None`):
|
||||
stop_token (`str`, *optional*):
|
||||
Specifies the stop token to use for text generation. This parameter is mutually exclusive with
|
||||
`stop_token_id`.
|
||||
|
||||
- `None`: No stop token is applied, unless `stop_token_id` is specified.
|
||||
- `'eos'`: Uses the tokenizer's `eos_token`.
|
||||
|
||||
stop_token_id (`int` or `None`, *optional*, defaults to `None`):
|
||||
stop_token_id (`int`, *optional*):
|
||||
Specifies the ID of the stop token to use for text generation. If `None`, no stop token ID is applied,
|
||||
unless `stop_token` is specified. This parameter is mutually exclusive with `stop_token`.
|
||||
temperature (`float`, *optional*, defaults to `0.7`):
|
||||
Sampling temperature.
|
||||
missing_eos_penalty (`float` or `None`, *optional*, defaults to `None`):
|
||||
missing_eos_penalty (`float`, *optional*):
|
||||
Penalty applied to the score when the model fails to generate an EOS token. This is useful to encourage to
|
||||
generate completions shorter than the maximum length (`max_new_tokens`). The penalty must be a positive
|
||||
value.
|
||||
sft_model_path (`str`, *optional*, defaults to `"EleutherAI/pythia-160m"`):
|
||||
Path to the SFT model.
|
||||
world_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
world_size (`int`, *optional*):
|
||||
Number of processes (GPUs) to use for the training.
|
||||
num_total_batches (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_total_batches (`int`, *optional*):
|
||||
Number of total batches to train.
|
||||
micro_batch_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
micro_batch_size (`int`, *optional*):
|
||||
Micro batch size across devices (HF's `per_device_train_batch_size` * `world_size`).
|
||||
local_batch_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
local_batch_size (`int`, *optional*):
|
||||
Batch size per GPU (HF's `per_device_train_batch_size` * `gradient_accumulation_steps`).
|
||||
batch_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
batch_size (`int`, *optional*):
|
||||
Batch size across devices (HF's `per_device_train_batch_size` * `world_size` *
|
||||
`gradient_accumulation_steps`).
|
||||
local_mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
local_mini_batch_size (`int`, *optional*):
|
||||
Mini batch size per GPU.
|
||||
mini_batch_size (`int` or `None`, *optional*, defaults to `None`):
|
||||
mini_batch_size (`int`, *optional*):
|
||||
Mini batch size across GPUs.
|
||||
push_to_hub (`bool`, *optional*, defaults to `False`):
|
||||
Whether to push the model to the Hub after training.
|
||||
@ -1539,7 +1539,7 @@ def print_prompt_completions_sample(
|
||||
List of advantages corresponding to the prompts and completions.
|
||||
step (`int`):
|
||||
Current training step number, used in the output title.
|
||||
num_samples (`int` or `None`, *optional*, defaults to `None`):
|
||||
num_samples (`int`, *optional*):
|
||||
Number of random samples to display. If `None` (default), all items will be displayed.
|
||||
|
||||
Example:
|
||||
@ -1616,7 +1616,7 @@ class RepeatSampler(Sampler):
|
||||
Number of times to repeat the full sampling process.
|
||||
shuffle (`bool`, *optional*, defaults to `True`):
|
||||
Whether to shuffle the dataset.
|
||||
seed (`int` or `None`, *optional*, defaults to `None`):
|
||||
seed (`int`, *optional*):
|
||||
Random seed for reproducibility (only affects this sampler).
|
||||
|
||||
Example:
|
||||
|
@ -88,7 +88,7 @@ class XPOTrainer(OnlineDPOTrainer):
|
||||
The dataset to use for training.
|
||||
eval_dataset (`datasets.Dataset`):
|
||||
The dataset to use for evaluation.
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*, defaults to `None`):
|
||||
processing_class ([`~transformers.PreTrainedTokenizerBase`], [`~transformers.BaseImageProcessor`], [`~transformers.FeatureExtractionMixin`] or [`~transformers.ProcessorMixin`], *optional*):
|
||||
Processing class used to process the data. If provided, will be used to automatically process the inputs
|
||||
for the model, and it will be saved along the model to make it easier to rerun an interrupted training or
|
||||
reuse the fine-tuned model.
|
||||
@ -555,11 +555,11 @@ class XPOTrainer(OnlineDPOTrainer):
|
||||
Creates a draft of a model card using the information available to the `Trainer`.
|
||||
|
||||
Args:
|
||||
model_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
model_name (`str`, *optional*):
|
||||
Name of the model.
|
||||
dataset_name (`str` or `None`, *optional*, defaults to `None`):
|
||||
dataset_name (`str`, *optional*):
|
||||
Name of the dataset used for training.
|
||||
tags (`str`, `list[str]` or `None`, *optional*, defaults to `None`):
|
||||
tags (`str`, `list[str]`, *optional*):
|
||||
Tags to be associated with the model card.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
|
Reference in New Issue
Block a user