[Trainer] [Breaking change] use_cache default to False (#41585)

* use_cache default to `False` when training

* style

* Fix comment

* add checks

* style

* set

* switch
This commit is contained in:
Marc Sun
2025-10-16 18:51:36 +02:00
committed by GitHub
parent fe11cbb808
commit bdbc2d037b
2 changed files with 15 additions and 0 deletions

View File

@ -738,6 +738,10 @@ class Trainer:
self._train_batch_size = args.train_batch_size
self._created_lr_scheduler = False
# Set use_cache for the model
if getattr(self.model, "config", None) is not None:
self.model.config.use_cache = self.args.use_cache
# very last
self._memory_tracker.stop_and_update_metrics()

View File

@ -752,6 +752,10 @@ class TrainingArguments:
Whether or not to average tokens across devices. If enabled, will use all_reduce to synchronize
num_tokens_in_batch for precise loss calculation. Reference:
https://github.com/huggingface/transformers/issues/34242
use_cache (`bool`, *optional*, defaults to `False`):
Whether or not to enable cache for the model. For training, this is usually not needed apart from some PEFT methods that uses `past_key_values`.
"""
# Sometimes users will pass in a `str` repr of a dict in the CLI
@ -1382,6 +1386,13 @@ class TrainingArguments:
},
)
use_cache: bool = field(
default=False,
metadata={
"help": "Whether or not to use cache for the model For training, this is usually not needed apart from some PEFT methods that uses `past_key_values`."
},
)
def __post_init__(self):
# Set default output_dir if not provided
if self.output_dir is None: