mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
6 Commits
default-au
...
v4.6.1
Author | SHA1 | Date | |
---|---|---|---|
fb27b276e7 | |||
8c8a5d3661 | |||
8924a5f3de | |||
c81584a292 | |||
265c26e19e | |||
25dee4a423 |
@ -379,6 +379,8 @@ jobs:
|
||||
keys:
|
||||
- v0.4-deploy_doc-{{ checksum "setup.py" }}
|
||||
- v0.4-{{ checksum "setup.py" }}
|
||||
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
|
||||
- run: pip install --upgrade pip
|
||||
- run: pip install ."[docs]"
|
||||
- save_cache:
|
||||
key: v0.4-deploy_doc-{{ checksum "setup.py" }}
|
||||
|
@ -27,7 +27,8 @@ author = "huggingface"
|
||||
# The short X.Y version
|
||||
version = ""
|
||||
# The full version, including alpha/beta/rc tags
|
||||
release = "4.5.0.dev0"
|
||||
release = u'4.6.1'
|
||||
|
||||
|
||||
|
||||
# Prefix link to point to master, comment this during version release and uncomment below line
|
||||
|
@ -27,6 +27,7 @@ There are two categories of pipeline abstractions to be aware about:
|
||||
- :class:`~transformers.ConversationalPipeline`
|
||||
- :class:`~transformers.FeatureExtractionPipeline`
|
||||
- :class:`~transformers.FillMaskPipeline`
|
||||
- :class:`~transformers.ImageClassificationPipeline`
|
||||
- :class:`~transformers.QuestionAnsweringPipeline`
|
||||
- :class:`~transformers.SummarizationPipeline`
|
||||
- :class:`~transformers.TextClassificationPipeline`
|
||||
@ -36,7 +37,6 @@ There are two categories of pipeline abstractions to be aware about:
|
||||
- :class:`~transformers.ZeroShotClassificationPipeline`
|
||||
- :class:`~transformers.Text2TextGenerationPipeline`
|
||||
- :class:`~transformers.TableQuestionAnsweringPipeline`
|
||||
- :class:`~transformers.ImageClassificationPipeline`
|
||||
|
||||
The pipeline abstraction
|
||||
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
@ -39,8 +39,9 @@ class QuestionAnsweringTrainer(Trainer):
|
||||
# Temporarily disable metric computation, we will do it in the loop here.
|
||||
compute_metrics = self.compute_metrics
|
||||
self.compute_metrics = None
|
||||
eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
|
||||
try:
|
||||
output = self.prediction_loop(
|
||||
output = eval_loop(
|
||||
eval_dataloader,
|
||||
description="Evaluation",
|
||||
# No point gathering the predictions if there are no metrics, otherwise we defer to
|
||||
@ -72,8 +73,9 @@ class QuestionAnsweringTrainer(Trainer):
|
||||
# Temporarily disable metric computation, we will do it in the loop here.
|
||||
compute_metrics = self.compute_metrics
|
||||
self.compute_metrics = None
|
||||
eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
|
||||
try:
|
||||
output = self.prediction_loop(
|
||||
output = eval_loop(
|
||||
predict_dataloader,
|
||||
description="Prediction",
|
||||
# No point gathering the predictions if there are no metrics, otherwise we defer to
|
||||
|
2
setup.py
2
setup.py
@ -320,7 +320,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.6.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.6.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Suraj Patil, Stas Bekman, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
||||
author_email="thomas@huggingface.co",
|
||||
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
||||
|
@ -22,7 +22,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.6.0"
|
||||
__version__ = "4.6.1"
|
||||
|
||||
# Work around to update TensorFlow's absl.logging threshold which alters the
|
||||
# default Python logging output behavior when present.
|
||||
|
@ -1037,7 +1037,10 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1528,7 +1528,10 @@ class BertForSequenceClassification(BertPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -2671,7 +2671,10 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1023,7 +1023,10 @@ class ConvBertForSequenceClassification(ConvBertPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -642,7 +642,10 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -964,7 +964,10 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1298,7 +1298,10 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1872,7 +1872,10 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1279,7 +1279,10 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -2445,7 +2445,10 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1178,7 +1178,10 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -798,7 +798,10 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -847,7 +847,10 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1562,7 +1562,10 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
|
||||
|
||||
if self.config.problem_type == "regression":
|
||||
loss_fct = MSELoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels)
|
||||
if self.num_labels == 1:
|
||||
loss = loss_fct(logits.squeeze(), labels.squeeze())
|
||||
else:
|
||||
loss = loss_fct(logits, labels)
|
||||
elif self.config.problem_type == "single_label_classification":
|
||||
loss_fct = CrossEntropyLoss()
|
||||
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))
|
||||
|
@ -1523,10 +1523,6 @@ class Trainer:
|
||||
if self.is_world_process_zero():
|
||||
self.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))
|
||||
|
||||
# Maybe delete some older checkpoints.
|
||||
if self.is_world_process_zero():
|
||||
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
|
||||
|
||||
# Save RNG state in non-distributed training
|
||||
rng_states = {
|
||||
"python": random.getstate(),
|
||||
@ -1552,6 +1548,10 @@ class Trainer:
|
||||
else:
|
||||
torch.save(rng_states, os.path.join(output_dir, f"rng_state_{local_rank}.pth"))
|
||||
|
||||
# Maybe delete some older checkpoints.
|
||||
if self.is_world_process_zero():
|
||||
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
|
||||
|
||||
def _load_optimizer_and_scheduler(self, checkpoint):
|
||||
"""If optimizer and scheduler states exist, load them."""
|
||||
if checkpoint is None:
|
||||
@ -1924,7 +1924,7 @@ class Trainer:
|
||||
ordering_and_checkpoint_path.append((os.path.getmtime(path), path))
|
||||
else:
|
||||
regex_match = re.match(f".*{checkpoint_prefix}-([0-9]+)", path)
|
||||
if regex_match and regex_match.groups():
|
||||
if regex_match is not None and regex_match.groups() is not None:
|
||||
ordering_and_checkpoint_path.append((int(regex_match.groups()[0]), path))
|
||||
|
||||
checkpoints_sorted = sorted(ordering_and_checkpoint_path)
|
||||
@ -1932,10 +1932,8 @@ class Trainer:
|
||||
# Make sure we don't delete the best model.
|
||||
if self.state.best_model_checkpoint is not None:
|
||||
best_model_index = checkpoints_sorted.index(str(Path(self.state.best_model_checkpoint)))
|
||||
checkpoints_sorted[best_model_index], checkpoints_sorted[-1] = (
|
||||
checkpoints_sorted[-1],
|
||||
checkpoints_sorted[best_model_index],
|
||||
)
|
||||
for i in range(best_model_index, len(checkpoints_sorted) - 2):
|
||||
checkpoints_sorted[i], checkpoints_sorted[i + 1] = checkpoints_sorted[i + 1], checkpoints_sorted[i]
|
||||
return checkpoints_sorted
|
||||
|
||||
def _rotate_checkpoints(self, use_mtime=False, output_dir=None) -> None:
|
||||
@ -1947,7 +1945,17 @@ class Trainer:
|
||||
if len(checkpoints_sorted) <= self.args.save_total_limit:
|
||||
return
|
||||
|
||||
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit)
|
||||
# If save_total_limit=1 with load_best_mode_at_end=True, we could end up deleting the last checkpoint, which
|
||||
# we don't do to allow resuming.
|
||||
save_total_limit = self.args.save_total_limit
|
||||
if (
|
||||
self.state.best_model_checkpoint is not None
|
||||
and self.args.save_total_limit == 1
|
||||
and checkpoints_sorted[-1] != self.state.best_model_checkpoint
|
||||
):
|
||||
save_total_limit = 2
|
||||
|
||||
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - save_total_limit)
|
||||
checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
|
||||
for checkpoint in checkpoints_to_be_deleted:
|
||||
logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")
|
||||
|
@ -20,6 +20,7 @@ import os.path
|
||||
import random
|
||||
import tempfile
|
||||
import unittest
|
||||
import warnings
|
||||
from typing import List, Tuple
|
||||
|
||||
from huggingface_hub import HfApi
|
||||
@ -1375,7 +1376,14 @@ class ModelTesterMixin:
|
||||
|
||||
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
|
||||
|
||||
loss = model(**inputs).loss
|
||||
# This tests that we do not trigger the warning form PyTorch "Using a target size that is different
|
||||
# to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
|
||||
# they have the same size." which is a symptom something in wrong for the regression problem.
|
||||
# See https://github.com/huggingface/transformers/issues/11780
|
||||
with warnings.catch_warnings(record=True) as warning_list:
|
||||
loss = model(**inputs).loss
|
||||
self.assertListEqual(warning_list, [])
|
||||
|
||||
loss.backward()
|
||||
|
||||
|
||||
|
@ -21,6 +21,7 @@ import random
|
||||
import re
|
||||
import tempfile
|
||||
import unittest
|
||||
from pathlib import Path
|
||||
|
||||
import numpy as np
|
||||
|
||||
@ -45,6 +46,7 @@ from transformers.testing_utils import (
|
||||
require_torch_multi_gpu,
|
||||
slow,
|
||||
)
|
||||
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
|
||||
from transformers.utils.hp_naming import TrialShortNamer
|
||||
|
||||
|
||||
@ -1048,6 +1050,35 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
|
||||
trainer.train()
|
||||
self.assertTrue(isinstance(trainer.state.total_flos, float))
|
||||
|
||||
def check_checkpoint_deletion(self, trainer, output_dir, expected):
|
||||
# Make fake checkpoints
|
||||
for n in [5, 10, 15, 20, 25]:
|
||||
os.makedirs(os.path.join(output_dir, f"{PREFIX_CHECKPOINT_DIR}-{n}"), exist_ok=True)
|
||||
trainer._rotate_checkpoints(output_dir=output_dir)
|
||||
glob_checkpoints = [str(x) for x in Path(output_dir).glob(f"{PREFIX_CHECKPOINT_DIR}-*")]
|
||||
values = [int(re.match(f".*{PREFIX_CHECKPOINT_DIR}-([0-9]+)", d).groups()[0]) for d in glob_checkpoints]
|
||||
self.assertSetEqual(set(values), set(expected))
|
||||
|
||||
def test_checkpoint_rotation(self):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
# Without best model at end
|
||||
trainer = get_regression_trainer(output_dir=tmp_dir, save_total_limit=2)
|
||||
self.check_checkpoint_deletion(trainer, tmp_dir, [20, 25])
|
||||
|
||||
# With best model at end
|
||||
trainer = get_regression_trainer(output_dir=tmp_dir, load_best_model_at_end=True, save_total_limit=2)
|
||||
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
|
||||
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
|
||||
|
||||
# Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume
|
||||
# from checkpoint
|
||||
trainer = get_regression_trainer(output_dir=tmp_dir, load_best_model_at_end=True, save_total_limit=1)
|
||||
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25")
|
||||
self.check_checkpoint_deletion(trainer, tmp_dir, [25])
|
||||
|
||||
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
|
||||
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
|
||||
|
||||
def check_mem_metrics(self, trainer, check_func):
|
||||
metrics = trainer.train().metrics
|
||||
check_func("init_mem_cpu_alloc_delta", metrics)
|
||||
|
@ -26,7 +26,7 @@ REPLACE_PATTERNS = {
|
||||
"examples": (re.compile(r'^check_min_version\("[^"]+"\)\s*$', re.MULTILINE), 'check_min_version("VERSION")\n'),
|
||||
"init": (re.compile(r'^__version__\s+=\s+"([^"]+)"\s*$', re.MULTILINE), '__version__ = "VERSION"\n'),
|
||||
"setup": (re.compile(r'^(\s*)version\s*=\s*"[^"]+",', re.MULTILINE), r'\1version="VERSION",'),
|
||||
"doc": (re.compile(r"^(\s*)release\s*=\s*u'[^']+'$", re.MULTILINE), "release = u'VERSION'\n"),
|
||||
"doc": (re.compile(r'^(\s*)release\s*=\s*"[^"]+"$', re.MULTILINE), "release = u'VERSION'\n"),
|
||||
}
|
||||
REPLACE_FILES = {
|
||||
"init": "src/transformers/__init__.py",
|
||||
|
Reference in New Issue
Block a user