Compare commits

...

6 Commits
gpt2 ... v4.6.1

Author SHA1 Message Date
fb27b276e7 Release: v4.6.1 2021-05-20 10:46:15 -04:00
8c8a5d3661 Fix checkpoint deletion (#11748) 2021-05-20 10:45:01 -04:00
8924a5f3de Use new evaluation loop in TrainerQA (#11746) 2021-05-20 10:44:51 -04:00
c81584a292 Fix regression in regression (#11785)
* Fix regression in regression

* Add test
2021-05-20 10:44:40 -04:00
265c26e19e Fix pattern in conf.py (#11784) 2021-05-20 10:44:30 -04:00
25dee4a423 Fix doc deployment 2021-05-13 10:44:17 -04:00
24 changed files with 126 additions and 32 deletions

View File

@ -379,6 +379,8 @@ jobs:
keys:
- v0.4-deploy_doc-{{ checksum "setup.py" }}
- v0.4-{{ checksum "setup.py" }}
- run: sudo apt-get -y update && sudo apt-get install -y libsndfile1-dev
- run: pip install --upgrade pip
- run: pip install ."[docs]"
- save_cache:
key: v0.4-deploy_doc-{{ checksum "setup.py" }}

View File

@ -27,7 +27,8 @@ author = "huggingface"
# The short X.Y version
version = ""
# The full version, including alpha/beta/rc tags
release = "4.5.0.dev0"
release = u'4.6.1'
# Prefix link to point to master, comment this during version release and uncomment below line

View File

@ -27,6 +27,7 @@ There are two categories of pipeline abstractions to be aware about:
- :class:`~transformers.ConversationalPipeline`
- :class:`~transformers.FeatureExtractionPipeline`
- :class:`~transformers.FillMaskPipeline`
- :class:`~transformers.ImageClassificationPipeline`
- :class:`~transformers.QuestionAnsweringPipeline`
- :class:`~transformers.SummarizationPipeline`
- :class:`~transformers.TextClassificationPipeline`
@ -36,7 +37,6 @@ There are two categories of pipeline abstractions to be aware about:
- :class:`~transformers.ZeroShotClassificationPipeline`
- :class:`~transformers.Text2TextGenerationPipeline`
- :class:`~transformers.TableQuestionAnsweringPipeline`
- :class:`~transformers.ImageClassificationPipeline`
The pipeline abstraction
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

View File

@ -39,8 +39,9 @@ class QuestionAnsweringTrainer(Trainer):
# Temporarily disable metric computation, we will do it in the loop here.
compute_metrics = self.compute_metrics
self.compute_metrics = None
eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
try:
output = self.prediction_loop(
output = eval_loop(
eval_dataloader,
description="Evaluation",
# No point gathering the predictions if there are no metrics, otherwise we defer to
@ -72,8 +73,9 @@ class QuestionAnsweringTrainer(Trainer):
# Temporarily disable metric computation, we will do it in the loop here.
compute_metrics = self.compute_metrics
self.compute_metrics = None
eval_loop = self.prediction_loop if self.args.use_legacy_prediction_loop else self.evaluation_loop
try:
output = self.prediction_loop(
output = eval_loop(
predict_dataloader,
description="Prediction",
# No point gathering the predictions if there are no metrics, otherwise we defer to

View File

@ -320,7 +320,7 @@ install_requires = [
setup(
name="transformers",
version="4.6.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
version="4.6.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Suraj Patil, Stas Bekman, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
author_email="thomas@huggingface.co",
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",

View File

@ -22,7 +22,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.6.0"
__version__ = "4.6.1"
# Work around to update TensorFlow's absl.logging threshold which alters the
# default Python logging output behavior when present.

View File

@ -1037,7 +1037,10 @@ class AlbertForSequenceClassification(AlbertPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1528,7 +1528,10 @@ class BertForSequenceClassification(BertPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -2671,7 +2671,10 @@ class BigBirdForSequenceClassification(BigBirdPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1023,7 +1023,10 @@ class ConvBertForSequenceClassification(ConvBertPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -642,7 +642,10 @@ class DistilBertForSequenceClassification(DistilBertPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -964,7 +964,10 @@ class ElectraForSequenceClassification(ElectraPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1298,7 +1298,10 @@ class FunnelForSequenceClassification(FunnelPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1872,7 +1872,10 @@ class LongformerForSequenceClassification(LongformerPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1279,7 +1279,10 @@ class MobileBertForSequenceClassification(MobileBertPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -2445,7 +2445,10 @@ class ReformerForSequenceClassification(ReformerPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1178,7 +1178,10 @@ class RobertaForSequenceClassification(RobertaPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -798,7 +798,10 @@ class SqueezeBertForSequenceClassification(SqueezeBertPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -847,7 +847,10 @@ class XLMForSequenceClassification(XLMPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1562,7 +1562,10 @@ class XLNetForSequenceClassification(XLNetPreTrainedModel):
if self.config.problem_type == "regression":
loss_fct = MSELoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels)
if self.num_labels == 1:
loss = loss_fct(logits.squeeze(), labels.squeeze())
else:
loss = loss_fct(logits, labels)
elif self.config.problem_type == "single_label_classification":
loss_fct = CrossEntropyLoss()
loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))

View File

@ -1523,10 +1523,6 @@ class Trainer:
if self.is_world_process_zero():
self.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))
# Maybe delete some older checkpoints.
if self.is_world_process_zero():
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
# Save RNG state in non-distributed training
rng_states = {
"python": random.getstate(),
@ -1552,6 +1548,10 @@ class Trainer:
else:
torch.save(rng_states, os.path.join(output_dir, f"rng_state_{local_rank}.pth"))
# Maybe delete some older checkpoints.
if self.is_world_process_zero():
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
def _load_optimizer_and_scheduler(self, checkpoint):
"""If optimizer and scheduler states exist, load them."""
if checkpoint is None:
@ -1924,7 +1924,7 @@ class Trainer:
ordering_and_checkpoint_path.append((os.path.getmtime(path), path))
else:
regex_match = re.match(f".*{checkpoint_prefix}-([0-9]+)", path)
if regex_match and regex_match.groups():
if regex_match is not None and regex_match.groups() is not None:
ordering_and_checkpoint_path.append((int(regex_match.groups()[0]), path))
checkpoints_sorted = sorted(ordering_and_checkpoint_path)
@ -1932,10 +1932,8 @@ class Trainer:
# Make sure we don't delete the best model.
if self.state.best_model_checkpoint is not None:
best_model_index = checkpoints_sorted.index(str(Path(self.state.best_model_checkpoint)))
checkpoints_sorted[best_model_index], checkpoints_sorted[-1] = (
checkpoints_sorted[-1],
checkpoints_sorted[best_model_index],
)
for i in range(best_model_index, len(checkpoints_sorted) - 2):
checkpoints_sorted[i], checkpoints_sorted[i + 1] = checkpoints_sorted[i + 1], checkpoints_sorted[i]
return checkpoints_sorted
def _rotate_checkpoints(self, use_mtime=False, output_dir=None) -> None:
@ -1947,7 +1945,17 @@ class Trainer:
if len(checkpoints_sorted) <= self.args.save_total_limit:
return
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - self.args.save_total_limit)
# If save_total_limit=1 with load_best_mode_at_end=True, we could end up deleting the last checkpoint, which
# we don't do to allow resuming.
save_total_limit = self.args.save_total_limit
if (
self.state.best_model_checkpoint is not None
and self.args.save_total_limit == 1
and checkpoints_sorted[-1] != self.state.best_model_checkpoint
):
save_total_limit = 2
number_of_checkpoints_to_delete = max(0, len(checkpoints_sorted) - save_total_limit)
checkpoints_to_be_deleted = checkpoints_sorted[:number_of_checkpoints_to_delete]
for checkpoint in checkpoints_to_be_deleted:
logger.info(f"Deleting older checkpoint [{checkpoint}] due to args.save_total_limit")

View File

@ -20,6 +20,7 @@ import os.path
import random
import tempfile
import unittest
import warnings
from typing import List, Tuple
from huggingface_hub import HfApi
@ -1375,7 +1376,14 @@ class ModelTesterMixin:
inputs["labels"] = inputs["labels"].to(problem_type["dtype"])
# This tests that we do not trigger the warning form PyTorch "Using a target size that is different
# to the input size. This will likely lead to incorrect results due to broadcasting. Please ensure
# they have the same size." which is a symptom something in wrong for the regression problem.
# See https://github.com/huggingface/transformers/issues/11780
with warnings.catch_warnings(record=True) as warning_list:
loss = model(**inputs).loss
self.assertListEqual(warning_list, [])
loss.backward()

View File

@ -21,6 +21,7 @@ import random
import re
import tempfile
import unittest
from pathlib import Path
import numpy as np
@ -45,6 +46,7 @@ from transformers.testing_utils import (
require_torch_multi_gpu,
slow,
)
from transformers.trainer_utils import PREFIX_CHECKPOINT_DIR
from transformers.utils.hp_naming import TrialShortNamer
@ -1048,6 +1050,35 @@ class TrainerIntegrationTest(TestCasePlus, TrainerIntegrationCommon):
trainer.train()
self.assertTrue(isinstance(trainer.state.total_flos, float))
def check_checkpoint_deletion(self, trainer, output_dir, expected):
# Make fake checkpoints
for n in [5, 10, 15, 20, 25]:
os.makedirs(os.path.join(output_dir, f"{PREFIX_CHECKPOINT_DIR}-{n}"), exist_ok=True)
trainer._rotate_checkpoints(output_dir=output_dir)
glob_checkpoints = [str(x) for x in Path(output_dir).glob(f"{PREFIX_CHECKPOINT_DIR}-*")]
values = [int(re.match(f".*{PREFIX_CHECKPOINT_DIR}-([0-9]+)", d).groups()[0]) for d in glob_checkpoints]
self.assertSetEqual(set(values), set(expected))
def test_checkpoint_rotation(self):
with tempfile.TemporaryDirectory() as tmp_dir:
# Without best model at end
trainer = get_regression_trainer(output_dir=tmp_dir, save_total_limit=2)
self.check_checkpoint_deletion(trainer, tmp_dir, [20, 25])
# With best model at end
trainer = get_regression_trainer(output_dir=tmp_dir, load_best_model_at_end=True, save_total_limit=2)
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
# Edge case: we don't always honor save_total_limit=1 if load_best_model_at_end=True to be able to resume
# from checkpoint
trainer = get_regression_trainer(output_dir=tmp_dir, load_best_model_at_end=True, save_total_limit=1)
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-25")
self.check_checkpoint_deletion(trainer, tmp_dir, [25])
trainer.state.best_model_checkpoint = os.path.join(tmp_dir, "checkpoint-5")
self.check_checkpoint_deletion(trainer, tmp_dir, [5, 25])
def check_mem_metrics(self, trainer, check_func):
metrics = trainer.train().metrics
check_func("init_mem_cpu_alloc_delta", metrics)

View File

@ -26,7 +26,7 @@ REPLACE_PATTERNS = {
"examples": (re.compile(r'^check_min_version\("[^"]+"\)\s*$', re.MULTILINE), 'check_min_version("VERSION")\n'),
"init": (re.compile(r'^__version__\s+=\s+"([^"]+)"\s*$', re.MULTILINE), '__version__ = "VERSION"\n'),
"setup": (re.compile(r'^(\s*)version\s*=\s*"[^"]+",', re.MULTILINE), r'\1version="VERSION",'),
"doc": (re.compile(r"^(\s*)release\s*=\s*u'[^']+'$", re.MULTILINE), "release = u'VERSION'\n"),
"doc": (re.compile(r'^(\s*)release\s*=\s*"[^"]+"$', re.MULTILINE), "release = u'VERSION'\n"),
}
REPLACE_FILES = {
"init": "src/transformers/__init__.py",