mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
6 Commits
single-fil
...
v4.6.1
Author | SHA1 | Date | |
---|---|---|---|
fb27b276e7 | |||
8c8a5d3661 | |||
8924a5f3de | |||
c81584a292 | |||
265c26e19e | |||
25dee4a423 |
@ -379,6 +379,8 @@ jobs:
|
|||||||
keys:
|
keys:
|
||||||
- v0.4-deploy_doc-{{ checksum "setup.py" }}
|
- v0.4-deploy_doc-{{ checksum "setup.py" }}
|
||||||
- v0.4-{{ checksum "setup.py" }}
|
- v0.4-{{ checksum "setup.py" }}
|
||||||
|
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
||||||
|
- run: pip install --upgrade pip
|
||||||
- run: pip install ."[docs]"
|
- run: pip install ."[docs]"
|
||||||
- save_cache:
|
- save_cache:
|
||||||
key: v0.4-deploy_doc-{{ checksum "setup.py" }}
|
key: v0.4-deploy_doc-{{ checksum "setup.py" }}
|
||||||
|
@ -27,7 +27,8 @@ author = "huggingface"
|
|||||||
# The short X.Y version
|
# The short X.Y version
|
||||||
version = ""
|
version = ""
|
||||||
# The full version, including alpha/beta/rc tags
|
# The full version, including alpha/beta/rc tags
|
||||||
release = "4.5.0.dev0"
|
release = u'4.6.1'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Prefix link to point to master, comment this during version release and uncomment below line
|
# Prefix link to point to master, comment this during version release and uncomment below line
|
||||||
|
@ -27,6 +27,7 @@ There are two categories of pipeline abstractions to be aware about:
|
|||||||
- :class:`~transformers.ConversationalPipeline`
|
- :class:`~transformers.ConversationalPipeline`
|
||||||
- :class:`~transformers.FeatureExtractionPipeline`
|
- :class:`~transformers.FeatureExtractionPipeline`
|
||||||
- :class:`~transformers.FillMaskPipeline`
|
- :class:`~transformers.FillMaskPipeline`
|
||||||
|
- :class:`~transformers.ImageClassificationPipeline`
|
||||||
- :class:`~transformers.QuestionAnsweringPipeline`
|
- :class:`~transformers.QuestionAnsweringPipeline`
|
||||||
- :class:`~transformers.SummarizationPipeline`
|
- :class:`~transformers.SummarizationPipeline`
|
||||||
- :class:`~transformers.TextClassificationPipeline`
|
- :class:`~transformers.TextClassificationPipeline`
|
||||||
@ -36,7 +37,6 @@ There are two categories of pipeline abstractions to be aware about:
|
|||||||
- :class:`~transformers.ZeroShotClassificationPipeline`
|
- :class:`~transformers.ZeroShotClassificationPipeline`
|
||||||
- :class:`~transformers.Text2TextGenerationPipeline`
|
- :class:`~transformers.Text2TextGenerationPipeline`
|
||||||
- :class:`~transformers.TableQuestionAnsweringPipeline`
|
- :class:`~transformers.TableQuestionAnsweringPipeline`
|
||||||
- :class:`~transformers.ImageClassificationPipeline`
|
|
||||||
|
|
||||||
The pipeline abstraction
|
The pipeline abstraction
|
||||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
@ -39,8 +39,9 @@ class QuestionAnsweringTrainer(Trainer):
|
|||||||
# Temporarily disable metric computation, we will do it in the loop here.
|
# Temporarily disable metric computation, we will do it in the loop here.
|
||||||
compute_metrics = self.compute_metrics
|
compute_metrics = self.compute_metrics
|
||||||
self.compute_metrics = None
|
self.compute_metrics = None
|
||||||
|
eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
|
||||||
try:
|
try:
|
||||||
output = self.prediction_loop(
|
output = eval_loop(
|
||||||
eval_dataloader,
|
eval_dataloader,
|
||||||
description="Evaluation",
|
description="Evaluation",
|
||||||
# No point gathering the predictions if there are no metrics, otherwise we defer to
|
# No point gathering the predictions if there are no metrics, otherwise we defer to
|
||||||
@ -72,8 +73,9 @@ class QuestionAnsweringTrainer(Trainer):
|
|||||||
# Temporarily disable metric computation, we will do it in the loop here.
|
# Temporarily disable metric computation, we will do it in the loop here.
|
||||||
compute_metrics = self.compute_metrics
|
compute_metrics = self.compute_metrics
|
||||||
self.compute_metrics = None
|
self.compute_metrics = None
|
||||||
|
eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
|
||||||
try:
|
try:
|
||||||
output = self.prediction_loop(
|
output = eval_loop(
|
||||||
predict_dataloader,
|
predict_dataloader,
|
||||||
description="Prediction",
|
description="Prediction",
|
||||||
# No point gathering the predictions if there are no metrics, otherwise we defer to
|
# No point gathering the predictions if there are no metrics, otherwise we defer to
|
||||||
|
2
setup.py
2
setup.py
@ -320,7 +320,7 @@ install_requires = [
|
|||||||
|
|
||||||
setup(
|
setup(
|
||||||
name="transformers",
|
name="transformers",
|
||||||
version="4.6.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
version="4.6.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||||
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Suraj Patil, Stas Bekman, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Suraj Patil, Stas Bekman, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
||||||
author_email="thomas@huggingface.co",
|
author_email="thomas@huggingface.co",
|
||||||
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
||||||
|
@ -22,7 +22,7 @@
|
|||||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||||
# in the namespace without actually importing anything (and especially none of the backends).
|
# in the namespace without actually importing anything (and especially none of the backends).
|
||||||
|
|
||||||
__version__ = "4.6.0"
|
__version__ = "4.6.1"
|
||||||
|
|
||||||
# Work around to update TensorFlow's absl.logging threshold which alters the
|
# Work around to update TensorFlow's absl.logging threshold which alters the
|
||||||
# default Python logging output behavior when present.
|
# default Python logging output behavior when present.
|
||||||
|
@ -1037,7 +1037,10 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1528,7 +1528,10 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -2671,7 +2671,10 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1023,7 +1023,10 @@ class ConvBertForSequenceClassification(ConvBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -642,7 +642,10 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -964,7 +964,10 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1298,7 +1298,10 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1872,7 +1872,10 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1279,7 +1279,10 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -2445,7 +2445,10 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1178,7 +1178,10 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -798,7 +798,10 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -847,7 +847,10 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1562,7 +1562,10 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
|||||||
|
|
||||||
if self.config.problem_type == "regression":
|
if self.config.problem_type == "regression":
|
||||||
loss_fct = MSELoss()
|
loss_fct = MSELoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
if self.num_labels == 1:
|
||||||
|
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||||
|
else:
|
||||||
|
loss = loss_fct(logits, labels)
|
||||||
elif self.config.problem_type == "single_label_classification":
|
elif self.config.problem_type == "single_label_classification":
|
||||||
loss_fct = CrossEntropyLoss()
|
loss_fct = CrossEntropyLoss()
|
||||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||||
|
@ -1523,10 +1523,6 @@ class Trainer:
|
|||||||
if self.is_world_process_zero():
|
if self.is_world_process_zero():
|
||||||
self.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))
|
self.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))
|
||||||
|
|
||||||
# Maybe delete some older checkpoints.
|
|
||||||
if self.is_world_process_zero():
|
|
||||||
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
|
|
||||||
|
|
||||||
# Save RNG state in non-distributed training
|
# Save RNG state in non-distributed training
|
||||||
rng_states = {
|
rng_states = {
|
||||||
"python": random.getstate(),
|
"python": random.getstate(),
|
||||||
@ -1552,6 +1548,10 @@ class Trainer:
|
|||||||
else:
|
else:
|
||||||
torch.save(rng_states, os.path.join(output_dir, f"rng_state_{local_rank}.pth"))
|
torch.save(rng_states, os.path.join(output_dir, f"rng_state_{local_rank}.pth"))
|
||||||
|
|
||||||
|
# Maybe delete some older checkpoints.
|
||||||
|
if self.is_world_process_zero():
|
||||||
|
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
|
||||||
|
|
||||||
def _load_optimizer_and_scheduler(self, checkpoint):
|
def _load_optimizer_and_scheduler(self, checkpoint):
|
||||||
"""If optimizer and scheduler states exist, load them."""
|
"""If optimizer and scheduler states exist, load them."""
|
||||||
if checkpoint is None:
|
if checkpoint is None:
|
||||||
@ -1924,7 +1924,7 @@ class Trainer:
|
|||||||
ordering_and_checkpoint_path.append((os.path.getmtime(path), path))
|
ordering_and_checkpoint_path.append((os.path.getmtime(path), path))
|
||||||
else:
|
else:
|
||||||
regex_match = re.match(f".*{checkpoint_prefix}-([0-9]+)", path)
|
regex_match = re.match(f".*{checkpoint_prefix}-([0-9]+)", path)
|
||||||
if regex_match and regex_match.groups():
|
if regex_match is not None and regex_match.groups() is not None:
|
||||||
ordering_and_checkpoint_path.append((int(regex_match.groups()[0]), path))
|
ordering_and_checkpoint_path.append((int(regex_match.groups()[0]), path))
|
||||||
|
|
||||||
checkpoints_sorted = sorted(ordering_and_checkpoint_path)
|
checkpoints_sorted = sorted(ordering_and_checkpoint_path)
|
||||||
@ -1932,10 +1932,8 @@ class Trainer:
|
|||||||
# Make sure we don't delete the best model.
|
# Make sure we don't delete the best model.
|
||||||
if self.state.best_model_checkpoint is not None:
|
if self.state.best_model_checkpoint is not None:
|
||||||
best_model_index = checkpoints_sorted.index(str(Path(self.state.best_model_checkpoint)))
|
best_model_index = checkpoints_sorted.index(str(Path(self.state.best_model_checkpoint)))
|
||||||
checkpoints_sorted[best_model_index], checkpoints_sorted[-1] = (
|
for i in range(best_model_index, len(checkpoints_sorted) - 2):
|
||||||
checkpoints_sorted[-1],
|
checkpoints_sorted[i], checkpoints_sorted[i + 1] = checkpoints_sorted[i + 1], checkpoints_sorted[i]
|
||||||
checkpoints_sorted[best_model_index],
|
|
||||||
)
|
|
||||||
return checkpoints_sorted
|
return checkpoints_sorted
|
||||||
|
|
||||||
def _rotate_checkpoints(self, use_mtime=False, output_dir=None) -> None:
|
def _rotate_checkpoints(self, use_mtime=False, output_dir=None) -> None:
|
||||||
@ -1947,7 +1945,17 @@ class Trainer:
|
|||||||
if len(checkpoints_sorted) <= self.args.save_total_limit:
|
if len(checkpoints_sorted) <= self.args.save_total_limit:
|
||||||
return
|
return
|
||||||
|
|
||||||
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit)
|
# If save_total_limit=1 with load_best_mode_at_end=True, we could end up deleting the last checkpoint, which
|
||||||
|
# we don't do to allow resuming.
|
||||||
|
save_total_limit = self.args.save_total_limit
|
||||||
|
if (
|
||||||
|
self.state.best_model_checkpoint is not None
|
||||||
|
and self.args.save_total_limit == 1
|
||||||
|
and checkpoints_sorted[-1] != self.state.best_model_checkpoint
|
||||||
|
):
|
||||||
|
save_total_limit = 2
|
||||||
|
|
||||||
|
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - save_total_limit)
|
||||||
checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
|
checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
|
||||||
for checkpoint in checkpoints_to_be_deleted:
|
for checkpoint in checkpoints_to_be_deleted:
|
||||||
logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
|
logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
|
||||||
|
@ -20,6 +20,7 @@ import os.path
|
|||||||
import random
|
import random
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
import warnings
|
||||||
from typing import List, Tuple
|
from typing import List, Tuple
|
||||||
|
|
||||||
from huggingface_hub import HfApi
|
from huggingface_hub import HfApi
|
||||||
@ -1375,7 +1376,14 @@ class ModelTesterMixin:
|
|||||||
|
|
||||||
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
|
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
|
||||||
|
|
||||||
loss = model(**inputs).loss
|
# This tests that we do not trigger the warning form PyTorch "Using a target size that is different
|
||||||
|
# to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
|
||||||
|
# they have the same size." which is a symptom something in wrong for the regression problem.
|
||||||
|
# See https://github.com/huggingface/transformers/issues/11780
|
||||||
|
with warnings.catch_warnings(record=True) as warning_list:
|
||||||
|
loss = model(**inputs).loss
|
||||||
|
self.assertListEqual(warning_list, [])
|
||||||
|
|
||||||
loss.backward()
|
loss.backward()
|
||||||
|
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@ import random
|
|||||||
import re
|
import re
|
||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
from pathlib import Path
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
@ -45,6 +46,7 @@ from transformers.testing_utils import (
|
|||||||
require_torch_multi_gpu,
|
require_torch_multi_gpu,
|
||||||
slow,
|
slow,
|
||||||
)
|
)
|
||||||
|
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||||
from transformers.utils.hp_naming import TrialShortNamer
|
from transformers.utils.hp_naming import TrialShortNamer
|
||||||
|
|
||||||
|
|
||||||
@ -1048,6 +1050,35 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
|||||||
trainer.train()
|
trainer.train()
|
||||||
self.assertTrue(isinstance(trainer.state.total_flos, float))
|
self.assertTrue(isinstance(trainer.state.total_flos, float))
|
||||||
|
|
||||||
|
def check_checkpoint_deletion(self, trainer, output_dir, expected):
|
||||||
|
# Make fake checkpoints
|
||||||
|
for n in [5, 10, 15, 20, 25]:
|
||||||
|
os.makedirs(os.path.join(output_dir, f"{PREFIX_CHECKPOINT_DIR}-{n}"), exist_ok=True)
|
||||||
|
trainer._rotate_checkpoints(output_dir=output_dir)
|
||||||
|
glob_checkpoints = [str(x) for x in Path(output_dir).glob(f"{PREFIX_CHECKPOINT_DIR}-*")]
|
||||||
|
values = [int(re.match(f".*{PREFIX_CHECKPOINT_DIR}-([0-9]+)", d).groups()[0]) for d in glob_checkpoints]
|
||||||
|
self.assertSetEqual(set(values), set(expected))
|
||||||
|
|
||||||
|
def test_checkpoint_rotation(self):
|
||||||
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
|
# Without best model at end
|
||||||
|
trainer = get_regression_trainer(output_dir=tmp_dir, save_total_limit=2)
|
||||||
|
self.check_checkpoint_deletion(trainer, tmp_dir, [20, 25])
|
||||||
|
|
||||||
|
# With best model at end
|
||||||
|
trainer = get_regression_trainer(output_dir=tmp_dir, load_best_model_at_end=True, save_total_limit=2)
|
||||||
|
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
|
||||||
|
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
|
||||||
|
|
||||||
|
# Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume
|
||||||
|
# from checkpoint
|
||||||
|
trainer = get_regression_trainer(output_dir=tmp_dir, load_best_model_at_end=True, save_total_limit=1)
|
||||||
|
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25")
|
||||||
|
self.check_checkpoint_deletion(trainer, tmp_dir, [25])
|
||||||
|
|
||||||
|
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
|
||||||
|
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
|
||||||
|
|
||||||
def check_mem_metrics(self, trainer, check_func):
|
def check_mem_metrics(self, trainer, check_func):
|
||||||
metrics = trainer.train().metrics
|
metrics = trainer.train().metrics
|
||||||
check_func("init_mem_cpu_alloc_delta", metrics)
|
check_func("init_mem_cpu_alloc_delta", metrics)
|
||||||
|
@ -26,7 +26,7 @@ REPLACE_PATTERNS = {
|
|||||||
"examples": (re.compile(r'^check_min_version\("[^"]+"\)\s*$', re.MULTILINE), 'check_min_version("VERSION")\n'),
|
"examples": (re.compile(r'^check_min_version\("[^"]+"\)\s*$', re.MULTILINE), 'check_min_version("VERSION")\n'),
|
||||||
"init": (re.compile(r'^__version__\s+=\s+"([^"]+)"\s*$', re.MULTILINE), '__version__ = "VERSION"\n'),
|
"init": (re.compile(r'^__version__\s+=\s+"([^"]+)"\s*$', re.MULTILINE), '__version__ = "VERSION"\n'),
|
||||||
"setup": (re.compile(r'^(\s*)version\s*=\s*"[^"]+",', re.MULTILINE), r'\1version="VERSION",'),
|
"setup": (re.compile(r'^(\s*)version\s*=\s*"[^"]+",', re.MULTILINE), r'\1version="VERSION",'),
|
||||||
"doc": (re.compile(r"^(\s*)release\s*=\s*u'[^']+'$", re.MULTILINE), "release = u'VERSION'\n"),
|
"doc": (re.compile(r'^(\s*)release\s*=\s*"[^"]+"$', re.MULTILINE), "release = u'VERSION'\n"),
|
||||||
}
|
}
|
||||||
REPLACE_FILES = {
|
REPLACE_FILES = {
|
||||||
"init": "src/transformers/__init__.py",
|
"init": "src/transformers/__init__.py",
|
||||||
|
Reference in New Issue
Block a user