Compare commits

...

3 Commits

Author SHA1 Message Date
4bae96ec2b Release: V4.5.1 2021-04-13 11:18:16 -04:00
9c4070bb78 Adds use_auth_token with pipelines (#11123)
* added model_kwargs to infer_framework_from_model

* added model_kwargs to tokenizer

* added use_auth_token as named parameter

* added dynamic get for use_auth_token
2021-04-13 11:17:21 -04:00
cd39c8eb37 Replace error by warning when loading an architecture in another (#11207)
* Replace error by warning when loading an architecture in another

* Style

* Style again

* Add a test

* Adapt old test
2021-04-13 11:15:19 -04:00
7 changed files with 39 additions and 28 deletions

View File

@ -305,7 +305,7 @@ install_requires = [
setup( setup(
name="transformers", name="transformers",
version="4.5.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots) version="4.5.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors", author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
author_email="thomas@huggingface.co", author_email="thomas@huggingface.co",
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch", description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",

View File

@ -22,7 +22,7 @@
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names # to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
# in the namespace without actually importing anything (and especially none of the backends). # in the namespace without actually importing anything (and especially none of the backends).
__version__ = "4.5.0" __version__ = "4.5.1"
# Work around to update TensorFlow's absl.logging threshold which alters the # Work around to update TensorFlow's absl.logging threshold which alters the
# default Python logging output behavior when present. # default Python logging output behavior when present.

View File

@ -399,10 +399,11 @@ class PretrainedConfig(object):
""" """
config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs) config_dict, kwargs = cls.get_config_dict(pretrained_model_name_or_path, **kwargs)
if config_dict.get("model_type", False) and hasattr(cls, "model_type"): if "model_type" in config_dict and hasattr(cls, "model_type") and config_dict["model_type"] != cls.model_type:
assert ( logger.warn(
config_dict["model_type"] == cls.model_type f"You are using a model of type {config_dict['model_type']} to instantiate a model of type "
), f"You tried to initiate a model of type '{cls.model_type}' with a pretrained model of type '{config_dict['model_type']}'" f"{cls.model_type}. This is not supported for all configurations of models and can yield errors."
)
return cls.from_dict(config_dict, **kwargs) return cls.from_dict(config_dict, **kwargs)

View File

@ -246,6 +246,7 @@ def pipeline(
framework: Optional[str] = None, framework: Optional[str] = None,
revision: Optional[str] = None, revision: Optional[str] = None,
use_fast: bool = True, use_fast: bool = True,
use_auth_token: Optional[Union[str, bool]] = None,
model_kwargs: Dict[str, Any] = {}, model_kwargs: Dict[str, Any] = {},
**kwargs **kwargs
) -> Pipeline: ) -> Pipeline:
@ -308,6 +309,10 @@ def pipeline(
artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git. artifacts on huggingface.co, so ``revision`` can be any identifier allowed by git.
use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`): use_fast (:obj:`bool`, `optional`, defaults to :obj:`True`):
Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`). Whether or not to use a Fast tokenizer if possible (a :class:`~transformers.PreTrainedTokenizerFast`).
use_auth_token (:obj:`str` or `bool`, `optional`):
The token to use as HTTP bearer authorization for remote files. If :obj:`True`, will use the token
generated when running :obj:`transformers-cli login` (stored in :obj:`~/.huggingface`).
revision(:obj:`str`, `optional`, defaults to :obj:`"main"`):
model_kwargs: model_kwargs:
Additional dictionary of keyword arguments passed along to the model's :obj:`from_pretrained(..., Additional dictionary of keyword arguments passed along to the model's :obj:`from_pretrained(...,
**model_kwargs)` function. **model_kwargs)` function.
@ -367,6 +372,9 @@ def pipeline(
task_class, model_class = targeted_task["impl"], targeted_task[framework] task_class, model_class = targeted_task["impl"], targeted_task[framework]
# Retrieve use_auth_token and add it to model_kwargs to be used in .from_pretrained
model_kwargs["use_auth_token"] = model_kwargs.get("use_auth_token", use_auth_token)
# Instantiate tokenizer if needed # Instantiate tokenizer if needed
if isinstance(tokenizer, (str, tuple)): if isinstance(tokenizer, (str, tuple)):
if isinstance(tokenizer, tuple): if isinstance(tokenizer, tuple):
@ -377,12 +385,12 @@ def pipeline(
) )
else: else:
tokenizer = AutoTokenizer.from_pretrained( tokenizer = AutoTokenizer.from_pretrained(
tokenizer, revision=revision, use_fast=use_fast, _from_pipeline=task tokenizer, revision=revision, use_fast=use_fast, _from_pipeline=task, **model_kwargs
) )
# Instantiate config if needed # Instantiate config if needed
if isinstance(config, str): if isinstance(config, str):
config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task) config = AutoConfig.from_pretrained(config, revision=revision, _from_pipeline=task, **model_kwargs)
# Instantiate modelcard if needed # Instantiate modelcard if needed
if isinstance(modelcard, str): if isinstance(modelcard, str):

View File

@ -48,7 +48,7 @@ logger = logging.get_logger(__name__)
def infer_framework_from_model( def infer_framework_from_model(
model, model_classes: Optional[Dict[str, type]] = None, revision: Optional[str] = None, task: Optional[str] = None model, model_classes: Optional[Dict[str, type]] = None, task: Optional[str] = None, **model_kwargs
): ):
""" """
Select framework (TensorFlow or PyTorch) to use from the :obj:`model` passed. Returns a tuple (framework, model). Select framework (TensorFlow or PyTorch) to use from the :obj:`model` passed. Returns a tuple (framework, model).
@ -65,10 +65,11 @@ def infer_framework_from_model(
from. from.
model_classes (dictionary :obj:`str` to :obj:`type`, `optional`): model_classes (dictionary :obj:`str` to :obj:`type`, `optional`):
A mapping framework to class. A mapping framework to class.
revision (:obj:`str`, `optional`): task (:obj:`str`):
The specific model version to use. It can be a branch name, a tag name, or a commit id, since we use a The task defining which pipeline will be returned.
git-based system for storing models and other artifacts on huggingface.co, so ``revision`` can be any model_kwargs:
identifier allowed by git. Additional dictionary of keyword arguments passed along to the model's :obj:`from_pretrained(...,
**model_kwargs)` function.
Returns: Returns:
:obj:`Tuple`: A tuple framework, model. :obj:`Tuple`: A tuple framework, model.
@ -80,19 +81,20 @@ def infer_framework_from_model(
"To install PyTorch, read the instructions at https://pytorch.org/." "To install PyTorch, read the instructions at https://pytorch.org/."
) )
if isinstance(model, str): if isinstance(model, str):
model_kwargs["_from_pipeline"] = task
if is_torch_available() and not is_tf_available(): if is_torch_available() and not is_tf_available():
model_class = model_classes.get("pt", AutoModel) model_class = model_classes.get("pt", AutoModel)
model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) model = model_class.from_pretrained(model, **model_kwargs)
elif is_tf_available() and not is_torch_available(): elif is_tf_available() and not is_torch_available():
model_class = model_classes.get("tf", TFAutoModel) model_class = model_classes.get("tf", TFAutoModel)
model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) model = model_class.from_pretrained(model, **model_kwargs)
else: else:
try: try:
model_class = model_classes.get("pt", AutoModel) model_class = model_classes.get("pt", AutoModel)
model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) model = model_class.from_pretrained(model, **model_kwargs)
except OSError: except OSError:
model_class = model_classes.get("tf", TFAutoModel) model_class = model_classes.get("tf", TFAutoModel)
model = model_class.from_pretrained(model, revision=revision, _from_pipeline=task) model = model_class.from_pretrained(model, **model_kwargs)
framework = "tf" if model.__class__.__name__.startswith("TF") else "pt" framework = "tf" if model.__class__.__name__.startswith("TF") else "pt"
return framework, model return framework, model

View File

@ -231,13 +231,7 @@ class BertGenerationEncoderTester:
self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size)) self.parent.assertEqual(result.logits.shape, (self.batch_size, self.seq_length, self.vocab_size))
def prepare_config_and_inputs_for_common(self): def prepare_config_and_inputs_for_common(self):
config_and_inputs = self.prepare_config_and_inputs() config, input_ids, input_mask, token_labels = self.prepare_config_and_inputs()
(
config,
input_ids,
input_mask,
token_labels,
) = config_and_inputs
inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask} inputs_dict = {"input_ids": input_ids, "attention_mask": input_mask}
return config, inputs_dict return config, inputs_dict
@ -259,6 +253,11 @@ class BertGenerationEncoderTest(ModelTesterMixin, GenerationTesterMixin, unittes
config_and_inputs = self.model_tester.prepare_config_and_inputs() config_and_inputs = self.model_tester.prepare_config_and_inputs()
self.model_tester.create_and_check_model(*config_and_inputs) self.model_tester.create_and_check_model(*config_and_inputs)
def test_model_as_bert(self):
config, input_ids, input_mask, token_labels = self.model_tester.prepare_config_and_inputs()
config.model_type = "bert"
self.model_tester.create_and_check_model(config, input_ids, input_mask, token_labels)
def test_model_as_decoder(self): def test_model_as_decoder(self):
config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder() config_and_inputs = self.model_tester.prepare_config_and_inputs_for_decoder()
self.model_tester.create_and_check_model_as_decoder(*config_and_inputs) self.model_tester.create_and_check_model_as_decoder(*config_and_inputs)

View File

@ -22,9 +22,9 @@ import tempfile
import unittest import unittest
from typing import List, Tuple from typing import List, Tuple
from transformers import is_torch_available from transformers import is_torch_available, logging
from transformers.file_utils import WEIGHTS_NAME from transformers.file_utils import WEIGHTS_NAME
from transformers.testing_utils import require_torch, require_torch_multi_gpu, slow, torch_device from transformers.testing_utils import CaptureLogger, require_torch, require_torch_multi_gpu, slow, torch_device
if is_torch_available(): if is_torch_available():
@ -1295,6 +1295,7 @@ class ModelUtilsTest(unittest.TestCase):
model = T5ForConditionalGeneration.from_pretrained(TINY_T5) model = T5ForConditionalGeneration.from_pretrained(TINY_T5)
self.assertIsNotNone(model) self.assertIsNotNone(model)
with self.assertRaises(Exception) as context: logger = logging.get_logger("transformers.configuration_utils")
with CaptureLogger(logger) as cl:
BertModel.from_pretrained(TINY_T5) BertModel.from_pretrained(TINY_T5)
self.assertTrue("You tried to initiate a model of type" in str(context.exception)) self.assertTrue("You are using a model of type t5 to instantiate a model of type bert" in cl.out)