mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
4 Commits
working-ve
...
v4.2.1
Author | SHA1 | Date | |
---|---|---|---|
236cc365af | |||
5b05321b56 | |||
412d878c5e | |||
59fbd64b1c |
2
setup.py
2
setup.py
@ -248,7 +248,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.2.0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.2.1", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
||||
author_email="thomas@huggingface.co",
|
||||
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
||||
|
@ -22,7 +22,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.2.0"
|
||||
__version__ = "4.2.1"
|
||||
|
||||
# Work around to update TensorFlow's absl.logging threshold which alters the
|
||||
# default Python logging output behavior when present.
|
||||
|
@ -89,8 +89,20 @@ if USE_TF in ENV_VARS_TRUE_AND_AUTO_VALUES and USE_TORCH not in ENV_VARS_TRUE_VA
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("tensorflow-cpu")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_tf_version = None
|
||||
_tf_available = False
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("tensorflow-gpu")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("tf-nightly")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("tf-nightly-cpu")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
try:
|
||||
_tf_version = importlib_metadata.version("tf-nightly-gpu")
|
||||
except importlib_metadata.PackageNotFoundError:
|
||||
_tf_version = None
|
||||
_tf_available = False
|
||||
if _tf_available:
|
||||
if version.parse(_tf_version) < version.parse("2"):
|
||||
logger.info(f"TensorFlow found but with version {_tf_version}. Transformers requires version 2 minimum.")
|
||||
|
@ -65,6 +65,12 @@ def _is_torch(x):
|
||||
return isinstance(x, torch.Tensor)
|
||||
|
||||
|
||||
def _is_torch_device(x):
|
||||
import torch
|
||||
|
||||
return isinstance(x, torch.device)
|
||||
|
||||
|
||||
def _is_tensorflow(x):
|
||||
import tensorflow as tf
|
||||
|
||||
@ -801,7 +807,7 @@ class BatchEncoding(UserDict):
|
||||
# This check catches things like APEX blindly calling "to" on all inputs to a module
|
||||
# Otherwise it passes the casts down and casts the LongTensor containing the token idxs
|
||||
# into a HalfTensor
|
||||
if isinstance(device, str) or isinstance(device, torch.device) or isinstance(device, int):
|
||||
if isinstance(device, str) or _is_torch_device(device) or isinstance(device, int):
|
||||
self.data = {k: v.to(device=device) for k, v in self.data.items()}
|
||||
else:
|
||||
logger.warning(
|
||||
|
@ -16,7 +16,7 @@ import json
|
||||
import os
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Dict, List, Optional, Tuple
|
||||
from typing import Any, Dict, List, Optional
|
||||
|
||||
from .file_utils import cached_property, is_torch_available, is_torch_tpu_available, torch_required
|
||||
from .trainer_utils import EvaluationStrategy, SchedulerType
|
||||
@ -426,7 +426,6 @@ class TrainingArguments:
|
||||
|
||||
if is_torch_available() and self.device.type != "cuda" and self.fp16:
|
||||
raise ValueError("Mixed precision training with AMP or APEX (`--fp16`) can only be used on CUDA devices.")
|
||||
self._n_gpu = torch.cuda.device_count()
|
||||
|
||||
def __repr__(self):
|
||||
# We override the default repr to remove deprecated arguments from the repr. This method should be removed once
|
||||
@ -467,14 +466,14 @@ class TrainingArguments:
|
||||
|
||||
@cached_property
|
||||
@torch_required
|
||||
def _setup_devices(self) -> Tuple["torch.device", int]:
|
||||
def _setup_devices(self) -> "torch.device":
|
||||
logger.info("PyTorch: setting up devices")
|
||||
if self.no_cuda:
|
||||
device = torch.device("cpu")
|
||||
n_gpu = 0
|
||||
self._n_gpu = 0
|
||||
elif is_torch_tpu_available():
|
||||
device = xm.xla_device()
|
||||
n_gpu = 0
|
||||
self._n_gpu = 0
|
||||
elif self.local_rank == -1:
|
||||
# if n_gpu is > 1 we'll use nn.DataParallel.
|
||||
# If you only want to use a specific subset of GPUs use `CUDA_VISIBLE_DEVICES=0`
|
||||
@ -485,9 +484,7 @@ class TrainingArguments:
|
||||
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
|
||||
# Sometimes the line in the postinit has not been run before we end up here, so just checking we're not at
|
||||
# the default value.
|
||||
if self._n_gpu == -1:
|
||||
self._n_gpu = torch.cuda.device_count()
|
||||
n_gpu = self._n_gpu
|
||||
self._n_gpu = torch.cuda.device_count()
|
||||
else:
|
||||
# Here, we'll use torch.distributed.
|
||||
# Initializes the distributed backend which will take care of synchronizing nodes/GPUs
|
||||
@ -507,12 +504,12 @@ class TrainingArguments:
|
||||
else:
|
||||
torch.distributed.init_process_group(backend="nccl")
|
||||
device = torch.device("cuda", self.local_rank)
|
||||
n_gpu = 1
|
||||
self._n_gpu = 1
|
||||
|
||||
if device.type == "cuda":
|
||||
torch.cuda.set_device(device)
|
||||
|
||||
return device, n_gpu
|
||||
return device
|
||||
|
||||
@property
|
||||
@torch_required
|
||||
@ -520,7 +517,7 @@ class TrainingArguments:
|
||||
"""
|
||||
The device used by this process.
|
||||
"""
|
||||
return self._setup_devices[0]
|
||||
return self._setup_devices
|
||||
|
||||
@property
|
||||
@torch_required
|
||||
@ -532,7 +529,9 @@ class TrainingArguments:
|
||||
This will only be greater than one when you have multiple GPUs available but are not using distributed
|
||||
training. For distributed training, it will always be 1.
|
||||
"""
|
||||
return self._setup_devices[1]
|
||||
# Make sure `self._n_gpu` is properly setup.
|
||||
_ = self._setup_devices
|
||||
return self._n_gpu
|
||||
|
||||
@property
|
||||
@torch_required
|
||||
|
@ -1704,6 +1704,10 @@ class TokenizerTesterMixin:
|
||||
first_ten_tokens = list(tokenizer.get_vocab().keys())[:10]
|
||||
sequence = " ".join(first_ten_tokens)
|
||||
encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="pt")
|
||||
|
||||
# Ensure that the BatchEncoding.to() method works.
|
||||
encoded_sequence.to(model.device)
|
||||
|
||||
batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="pt")
|
||||
# This should not fail
|
||||
|
||||
|
@ -381,9 +381,11 @@ class TrainerIntegrationTest(unittest.TestCase):
|
||||
# Make the Trainer believe it's a parallelized model
|
||||
model.is_parallelizable = True
|
||||
model.model_parallel = True
|
||||
trainer = Trainer(model=model, train_dataset=RegressionDataset(), eval_dataset=RegressionDataset())
|
||||
args = TrainingArguments("./regression", per_device_train_batch_size=16, per_device_eval_batch_size=16)
|
||||
trainer = Trainer(model, args, train_dataset=RegressionDataset(), eval_dataset=RegressionDataset())
|
||||
# Check the Trainer was fooled
|
||||
self.assertTrue(trainer.is_model_parallel)
|
||||
self.assertEqual(trainer.args.n_gpu, 1)
|
||||
|
||||
# The batch size of the training and evaluation dataloaders should be 16, not 16 * n_gpu
|
||||
self.assertEqual(trainer.get_train_dataloader().batch_size, 16)
|
||||
|
Reference in New Issue
Block a user