mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-21 01:23:56 +08:00
Compare commits
9 Commits
Author | SHA1 | Date | |
---|---|---|---|
96d1cfb13d | |||
7d42ddda89 | |||
22bb717c04 | |||
2fcc976045 | |||
136617224b | |||
c0073b66ec | |||
0b752bf9da | |||
fb711f22d6 | |||
055f86fd88 |
@ -50,7 +50,8 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -49,7 +49,8 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -45,7 +45,8 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/language-modeling/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils import check_min_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -47,7 +47,8 @@ from utils_qa import postprocess_qa_predictions
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -46,7 +46,8 @@ from utils_qa import postprocess_qa_predictions_with_beam_search
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -51,7 +51,8 @@ from utils_qa import postprocess_qa_predictions_with_beam_search
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -53,7 +53,8 @@ from utils_qa import postprocess_qa_predictions
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/question-answering/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -47,7 +47,8 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/summarization/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,7 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/text-classification/requirements.txt")
|
||||
|
||||
|
@ -46,7 +46,8 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/token-classification/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -51,7 +51,8 @@ from transformers.utils.versions import require_version
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
require_version("datasets>=1.8.0", "To fix: pip install -r examples/pytorch/translation/requirements.txt")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
@ -45,7 +45,7 @@ from utils_qa import postprocess_qa_predictions
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.7.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
@ -100,7 +100,7 @@ class SavePretrainedCallback(tf.keras.callbacks.Callback):
|
||||
|
||||
|
||||
# Will error if the minimal version of Transformers is not installed. Remove at your own risks.
|
||||
check_min_version("4.8.0.dev0")
|
||||
check_min_version("4.8.0")
|
||||
|
||||
task_to_keys = {
|
||||
"cola": ("sentence", None),
|
||||
|
6
setup.py
6
setup.py
@ -125,7 +125,7 @@ _deps = [
|
||||
"pytest-sugar",
|
||||
"pytest-xdist",
|
||||
"python>=3.6.0",
|
||||
"ray",
|
||||
"ray[tune]",
|
||||
"recommonmark",
|
||||
"regex!=2019.12.17",
|
||||
"requests",
|
||||
@ -246,7 +246,7 @@ extras["sagemaker"] = deps_list("sagemaker")
|
||||
extras["deepspeed"] = deps_list("deepspeed")
|
||||
extras["fairscale"] = deps_list("fairscale")
|
||||
extras["optuna"] = deps_list("optuna")
|
||||
extras["ray"] = deps_list("ray")
|
||||
extras["ray"] = deps_list("ray[tune]")
|
||||
|
||||
extras["integrations"] = extras["optuna"] + extras["ray"]
|
||||
|
||||
@ -336,7 +336,7 @@ install_requires = [
|
||||
|
||||
setup(
|
||||
name="transformers",
|
||||
version="4.8.0.dev0", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
version="4.8.2", # expected format is one of x.y.z.dev0, or x.y.z.rc1 or x.y.z (no to dashes, yes to dots)
|
||||
author="Thomas Wolf, Lysandre Debut, Victor Sanh, Julien Chaumond, Sam Shleifer, Patrick von Platen, Sylvain Gugger, Suraj Patil, Stas Bekman, Google AI Language Team Authors, Open AI team Authors, Facebook AI Authors, Carnegie Mellon University Authors",
|
||||
author_email="thomas@huggingface.co",
|
||||
description="State-of-the-art Natural Language Processing for TensorFlow 2.0 and PyTorch",
|
||||
|
@ -22,7 +22,7 @@
|
||||
# to defer the actual importing for when the objects are requested. This way `import transformers` provides the names
|
||||
# in the namespace without actually importing anything (and especially none of the backends).
|
||||
|
||||
__version__ = "4.8.0.dev0"
|
||||
__version__ = "4.8.2"
|
||||
|
||||
# Work around to update TensorFlow's absl.logging threshold which alters the
|
||||
# default Python logging output behavior when present.
|
||||
|
@ -288,7 +288,7 @@ class SPMTokenizer:
|
||||
# <s> 1+1
|
||||
# </s> 2+1
|
||||
self.vocab = {spm.IdToPiece(i): i for i in range(bpe_vocab_size)}
|
||||
self.id_to_tokens = [spm.IdToPiece(i) for i in range(bpe_vocab_size)]
|
||||
self.ids_to_tokens = [spm.IdToPiece(i) for i in range(bpe_vocab_size)]
|
||||
# self.vocab['[PAD]'] = 0
|
||||
# self.vocab['[CLS]'] = 1
|
||||
# self.vocab['[SEP]'] = 2
|
||||
@ -351,7 +351,7 @@ class SPMTokenizer:
|
||||
self.special_tokens.append(token)
|
||||
if token not in self.vocab:
|
||||
self.vocab[token] = len(self.vocab) - 1
|
||||
self.id_to_tokens.append(token)
|
||||
self.ids_to_tokens.append(token)
|
||||
return self.id(token)
|
||||
|
||||
def part_of_whole_word(self, token, is_bos=False):
|
||||
|
@ -64,11 +64,6 @@ class DetrConfig(PretrainedConfig):
|
||||
The dropout ratio for the attention probabilities.
|
||||
activation_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The dropout ratio for activations inside the fully connected layer.
|
||||
classifier_dropout (:obj:`float`, `optional`, defaults to 0.0):
|
||||
The dropout ratio for classifier.
|
||||
max_position_embeddings (:obj:`int`, `optional`, defaults to 1024):
|
||||
The maximum sequence length that this model might ever be used with. Typically set this to something large
|
||||
just in case (e.g., 512 or 1024 or 2048).
|
||||
init_std (:obj:`float`, `optional`, defaults to 0.02):
|
||||
The standard deviation of the truncated_normal_initializer for initializing all weight matrices.
|
||||
init_xavier_std (:obj:`float`, `optional`, defaults to 1):
|
||||
@ -178,7 +173,6 @@ class DetrConfig(PretrainedConfig):
|
||||
self.init_xavier_std = init_xavier_std
|
||||
self.encoder_layerdrop = encoder_layerdrop
|
||||
self.decoder_layerdrop = decoder_layerdrop
|
||||
self.classifier_dropout = classifier_dropout
|
||||
self.num_hidden_layers = encoder_layers
|
||||
self.scale_embedding = scale_embedding # scale factor will be sqrt(d_model) if True
|
||||
self.auxiliary_loss = auxiliary_loss
|
||||
|
@ -440,7 +440,8 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
annotations.
|
||||
|
||||
return_segmentation_masks (:obj:`Dict`, :obj:`List[Dict]`, `optional`, defaults to :obj:`False`):
|
||||
Whether to also return instance segmentation masks in case :obj:`format = "coco_detection"`.
|
||||
Whether to also include instance segmentation masks as part of the labels in case :obj:`format =
|
||||
"coco_detection"`.
|
||||
|
||||
masks_path (:obj:`pathlib.Path`, `optional`):
|
||||
Path to the directory containing the PNG files that store the class-agnostic image segmentations. Only
|
||||
@ -465,6 +466,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
- **pixel_values** -- Pixel values to be fed to a model.
|
||||
- **pixel_mask** -- Pixel mask to be fed to a model (when :obj:`pad_and_return_pixel_mask=True` or if
|
||||
`"pixel_mask"` is in :obj:`self.model_input_names`).
|
||||
- **labels** -- Optional labels to be fed to a model (when :obj:`annotations` are provided)
|
||||
"""
|
||||
# Input type checking for clearer error
|
||||
|
||||
@ -613,7 +615,7 @@ class DetrFeatureExtractor(FeatureExtractionMixin, ImageFeatureExtractionMixin):
|
||||
if not is_torch_available():
|
||||
raise ImportError("Unable to convert output to PyTorch tensors format, PyTorch is not installed.")
|
||||
|
||||
encoded_inputs["target"] = [
|
||||
encoded_inputs["labels"] = [
|
||||
{k: torch.from_numpy(v) for k, v in target.items()} for target in annotations
|
||||
]
|
||||
|
||||
|
@ -828,8 +828,8 @@ DETR_INPUTS_DOCSTRING = r"""
|
||||
pixel_values (:obj:`torch.FloatTensor` of shape :obj:`(batch_size, num_channels, height, width)`):
|
||||
Pixel values. Padding will be ignored by default should you provide it.
|
||||
|
||||
Pixel values can be obtained using :class:`~transformers.DetrTokenizer`. See
|
||||
:meth:`transformers.DetrTokenizer.__call__` for details.
|
||||
Pixel values can be obtained using :class:`~transformers.DetrFeatureExtractor`. See
|
||||
:meth:`transformers.DetrFeatureExtractor.__call__` for details.
|
||||
|
||||
pixel_mask (:obj:`torch.LongTensor` of shape :obj:`(batch_size, height, width)`, `optional`):
|
||||
Mask to avoid performing attention on padding pixel values. Mask values selected in ``[0, 1]``:
|
||||
@ -990,7 +990,6 @@ class DetrDecoder(DetrPreTrainedModel):
|
||||
super().__init__(config)
|
||||
self.dropout = config.dropout
|
||||
self.layerdrop = config.decoder_layerdrop
|
||||
self.max_target_positions = config.max_position_embeddings
|
||||
|
||||
self.layers = nn.ModuleList([DetrDecoderLayer(config) for _ in range(config.decoder_layers)])
|
||||
# in DETR, the decoder uses layernorm after the last decoder layer output
|
||||
|
@ -393,7 +393,7 @@ class Trainer:
|
||||
# Create clone of distant repo and output directory if needed
|
||||
if self.args.push_to_hub:
|
||||
self.init_git_repo()
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
os.makedirs(self.args.output_dir, exist_ok=True)
|
||||
|
||||
if not callable(self.data_collator) and callable(getattr(self.data_collator, "collate_batch", None)):
|
||||
@ -899,7 +899,7 @@ class Trainer:
|
||||
with tune.checkpoint_dir(step=self.state.global_step) as checkpoint_dir:
|
||||
output_dir = os.path.join(checkpoint_dir, f"{PREFIX_CHECKPOINT_DIR}-{self.state.global_step}")
|
||||
self.save_model(output_dir)
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
self.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))
|
||||
torch.save(self.optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
|
||||
torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
|
||||
@ -1357,10 +1357,18 @@ class Trainer:
|
||||
logger.info(
|
||||
f"Loading best model from {self.state.best_model_checkpoint} (score: {self.state.best_metric})."
|
||||
)
|
||||
# We load the model state dict on the CPU to avoid an OOM error.
|
||||
state_dict = torch.load(os.path.join(self.state.best_model_checkpoint, WEIGHTS_NAME), map_location="cpu")
|
||||
# If the model is on the GPU, it still works!
|
||||
self._load_state_dict_in_model(state_dict)
|
||||
|
||||
best_model_path = os.path.join(self.state.best_model_checkpoint, WEIGHTS_NAME)
|
||||
if os.path.exists(best_model_path):
|
||||
# We load the model state dict on the CPU to avoid an OOM error.
|
||||
state_dict = torch.load(best_model_path, map_location="cpu")
|
||||
# If the model is on the GPU, it still works!
|
||||
self._load_state_dict_in_model(state_dict)
|
||||
else:
|
||||
logger.warn(
|
||||
f"Could not locate the best model at {best_model_path}, if you are running a distributed training "
|
||||
"on multiple nodes, you should activate `--save_on_each_node`."
|
||||
)
|
||||
|
||||
if self.deepspeed:
|
||||
self.deepspeed.load_checkpoint(
|
||||
@ -1500,14 +1508,14 @@ class Trainer:
|
||||
# Consolidate the state dict on all processed of dp_rank 0
|
||||
opt_state_dict = self.optimizer.state_dict()
|
||||
# Save it and the scheduler on the main process
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
torch.save(opt_state_dict, os.path.join(output_dir, "optimizer.pt"))
|
||||
with warnings.catch_warnings(record=True) as caught_warnings:
|
||||
torch.save(self.lr_scheduler.state_dict(), os.path.join(output_dir, "scheduler.pt"))
|
||||
reissue_pt_warnings(caught_warnings)
|
||||
if self.use_amp:
|
||||
torch.save(self.scaler.state_dict(), os.path.join(output_dir, "scaler.pt"))
|
||||
elif self.is_world_process_zero() and not self.deepspeed:
|
||||
elif self.args.should_save and not self.deepspeed:
|
||||
# deepspeed.save_checkpoint above saves model/optim/sched
|
||||
torch.save(self.optimizer.state_dict(), os.path.join(output_dir, "optimizer.pt"))
|
||||
with warnings.catch_warnings(record=True) as caught_warnings:
|
||||
@ -1533,7 +1541,7 @@ class Trainer:
|
||||
self.state.best_model_checkpoint = output_dir
|
||||
|
||||
# Save the Trainer state
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
self.state.save_to_json(os.path.join(output_dir, "trainer_state.json"))
|
||||
|
||||
# Save RNG state in non-distributed training
|
||||
@ -1562,7 +1570,7 @@ class Trainer:
|
||||
torch.save(rng_states, os.path.join(output_dir, f"rng_state_{local_rank}.pth"))
|
||||
|
||||
# Maybe delete some older checkpoints.
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
self._rotate_checkpoints(use_mtime=True, output_dir=run_dir)
|
||||
|
||||
def _load_optimizer_and_scheduler(self, checkpoint):
|
||||
@ -1831,19 +1839,19 @@ class Trainer:
|
||||
elif is_sagemaker_mp_enabled():
|
||||
# Calling the state_dict needs to be done on the wrapped model and on all processes.
|
||||
state_dict = self.model_wrapped.state_dict()
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
self._save(output_dir, state_dict=state_dict)
|
||||
elif (
|
||||
ShardedDDPOption.ZERO_DP_2 in self.args.sharded_ddp or ShardedDDPOption.ZERO_DP_3 in self.args.sharded_ddp
|
||||
):
|
||||
state_dict = self.model.state_dict()
|
||||
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
self._save(output_dir, state_dict=state_dict)
|
||||
elif self.deepspeed:
|
||||
|
||||
# this takes care of everything as long as we aren't under zero3
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
self._save(output_dir)
|
||||
|
||||
if is_deepspeed_zero3_enabled():
|
||||
@ -1851,7 +1859,7 @@ class Trainer:
|
||||
# saved, so since under zero3 the file is bogus, simply delete it. The user should
|
||||
# either user deepspeed checkpoint to resume or to recover full weights use
|
||||
# zero_to_fp32.py stored in the checkpoint.
|
||||
if self.is_world_process_zero():
|
||||
if self.args.should_save:
|
||||
file = os.path.join(output_dir, WEIGHTS_NAME)
|
||||
if os.path.isfile(file):
|
||||
# logger.info(f"deepspeed zero3: removing {file}, see zero_to_fp32.py to recover weights")
|
||||
@ -1862,7 +1870,7 @@ class Trainer:
|
||||
# This must be called on all ranks
|
||||
self.deepspeed.save_fp16_model(output_dir, WEIGHTS_NAME)
|
||||
|
||||
elif self.is_world_process_zero():
|
||||
elif self.args.should_save:
|
||||
self._save(output_dir)
|
||||
|
||||
def _save_tpu(self, output_dir: Optional[str] = None):
|
||||
@ -1880,7 +1888,7 @@ class Trainer:
|
||||
if isinstance(unwrap_model(self.model), PreTrainedModel):
|
||||
unwrap_model(self.model).save_pretrained(
|
||||
output_dir,
|
||||
save_config=self.is_world_process_zero(),
|
||||
save_config=self.args.should_save,
|
||||
state_dict=self.model.state_dict(),
|
||||
save_function=xm.save,
|
||||
)
|
||||
@ -1889,8 +1897,8 @@ class Trainer:
|
||||
state_dict = self.model.state_dict()
|
||||
xm.save(state_dict, os.path.join(output_dir, WEIGHTS_NAME))
|
||||
else:
|
||||
self.model.save_pretrained(output_dir, save_config=self.is_world_process_zero(), save_function=xm.save)
|
||||
if self.tokenizer is not None and self.is_world_process_zero():
|
||||
self.model.save_pretrained(output_dir, save_config=self.args.should_save, save_function=xm.save)
|
||||
if self.tokenizer is not None and self.args.should_save:
|
||||
self.tokenizer.save_pretrained(output_dir)
|
||||
|
||||
def _save(self, output_dir: Optional[str] = None, state_dict=None):
|
||||
@ -1960,7 +1968,7 @@ class Trainer:
|
||||
if len(checkpoints_sorted) <= self.args.save_total_limit:
|
||||
return
|
||||
|
||||
# If save_total_limit=1 with load_best_mode_at_end=True, we could end up deleting the last checkpoint, which
|
||||
# If save_total_limit=1 with load_best_model_at_end=True, we could end up deleting the last checkpoint, which
|
||||
# we don't do to allow resuming.
|
||||
save_total_limit = self.args.save_total_limit
|
||||
if (
|
||||
@ -2436,7 +2444,7 @@ class Trainer:
|
||||
"""
|
||||
Initializes a git repo in :obj:`self.args.push_to_hub_model_id`.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
if not self.args.should_save:
|
||||
return
|
||||
use_auth_token = True if self.args.push_to_hub_token is None else self.args.push_to_hub_token
|
||||
repo_url = PushToHubMixin._get_repo_url_from_name(
|
||||
@ -2494,11 +2502,16 @@ class Trainer:
|
||||
Returns:
|
||||
The url of the commit of your model in the given repository.
|
||||
"""
|
||||
if not self.is_world_process_zero():
|
||||
if not self.args.should_save:
|
||||
return
|
||||
|
||||
self.create_model_card(model_name=self.args.push_to_hub_model_id, **kwargs)
|
||||
self.save_model()
|
||||
|
||||
# Only push from one node.
|
||||
if not self.is_world_process_zero():
|
||||
return
|
||||
|
||||
return self.repo.push_to_hub(commit_message=commit_message)
|
||||
|
||||
#
|
||||
|
@ -182,6 +182,12 @@ class TrainingArguments:
|
||||
save_total_limit (:obj:`int`, `optional`):
|
||||
If a value is passed, will limit the total amount of checkpoints. Deletes the older checkpoints in
|
||||
:obj:`output_dir`.
|
||||
save_on_each_node (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
When doing multi-node distributed training, whether to save models and checkpoints on each node, or only on
|
||||
the main one.
|
||||
|
||||
This should not be activated when the different nodes use the same storage as the files will be saved with
|
||||
the same names for each node.
|
||||
no_cuda (:obj:`bool`, `optional`, defaults to :obj:`False`):
|
||||
Whether to not use CUDA even when it is available or not.
|
||||
seed (:obj:`int`, `optional`, defaults to 42):
|
||||
@ -434,7 +440,7 @@ class TrainingArguments:
|
||||
"help": "When doing a multinode distributed training, whether to log once per node or just once on the main node."
|
||||
},
|
||||
)
|
||||
logging_dir: Optional[str] = field(default_factory=default_logdir, metadata={"help": "Tensorboard log dir."})
|
||||
logging_dir: Optional[str] = field(default=None, metadata={"help": "Tensorboard log dir."})
|
||||
logging_strategy: IntervalStrategy = field(
|
||||
default="steps",
|
||||
metadata={"help": "The logging strategy to use."},
|
||||
@ -455,6 +461,12 @@ class TrainingArguments:
|
||||
)
|
||||
},
|
||||
)
|
||||
save_on_each_node: bool = field(
|
||||
default=False,
|
||||
metadata={
|
||||
"help": "When doing multi-node distributed training, whether to save models and checkpoints on each node, or only on the main one"
|
||||
},
|
||||
)
|
||||
no_cuda: bool = field(default=False, metadata={"help": "Do not use CUDA even when it is available"})
|
||||
seed: int = field(default=42, metadata={"help": "Random seed that will be set at the beginning of training."})
|
||||
|
||||
@ -936,6 +948,19 @@ class TrainingArguments:
|
||||
else:
|
||||
return self.process_index == 0
|
||||
|
||||
@property
|
||||
def should_save(self):
|
||||
"""
|
||||
Whether or not the current process should write to disk, e.g., to save models and checkpoints.
|
||||
"""
|
||||
if self.save_on_each_node:
|
||||
return self.local_process_index == 0
|
||||
else:
|
||||
if is_sagemaker_mp_enabled():
|
||||
return smp.rank() == 0
|
||||
else:
|
||||
return self.process_index == 0
|
||||
|
||||
def get_process_log_level(self):
|
||||
"""
|
||||
Returns the log level to be used depending on whether this process is the main process of node 0, main process
|
||||
|
@ -253,8 +253,7 @@ class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestC
|
||||
target = {"image_id": 39769, "annotations": target}
|
||||
|
||||
# encode them
|
||||
# TODO replace by facebook/detr-resnet-50
|
||||
feature_extractor = DetrFeatureExtractor.from_pretrained("nielsr/detr-resnet-50")
|
||||
feature_extractor = DetrFeatureExtractor.from_pretrained("facebook/detr-resnet-50")
|
||||
encoding = feature_extractor(images=image, annotations=target, return_tensors="pt")
|
||||
|
||||
# verify pixel values
|
||||
@ -266,27 +265,27 @@ class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestC
|
||||
|
||||
# verify area
|
||||
expected_area = torch.tensor([5887.9600, 11250.2061, 489353.8438, 837122.7500, 147967.5156, 165732.3438])
|
||||
assert torch.allclose(encoding["target"][0]["area"], expected_area)
|
||||
assert torch.allclose(encoding["labels"][0]["area"], expected_area)
|
||||
# verify boxes
|
||||
expected_boxes_shape = torch.Size([6, 4])
|
||||
self.assertEqual(encoding["target"][0]["boxes"].shape, expected_boxes_shape)
|
||||
self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape)
|
||||
expected_boxes_slice = torch.tensor([0.5503, 0.2765, 0.0604, 0.2215])
|
||||
assert torch.allclose(encoding["target"][0]["boxes"][0], expected_boxes_slice, atol=1e-3)
|
||||
assert torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3)
|
||||
# verify image_id
|
||||
expected_image_id = torch.tensor([39769])
|
||||
assert torch.allclose(encoding["target"][0]["image_id"], expected_image_id)
|
||||
assert torch.allclose(encoding["labels"][0]["image_id"], expected_image_id)
|
||||
# verify is_crowd
|
||||
expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0])
|
||||
assert torch.allclose(encoding["target"][0]["iscrowd"], expected_is_crowd)
|
||||
assert torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd)
|
||||
# verify class_labels
|
||||
expected_class_labels = torch.tensor([75, 75, 63, 65, 17, 17])
|
||||
assert torch.allclose(encoding["target"][0]["class_labels"], expected_class_labels)
|
||||
assert torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels)
|
||||
# verify orig_size
|
||||
expected_orig_size = torch.tensor([480, 640])
|
||||
assert torch.allclose(encoding["target"][0]["orig_size"], expected_orig_size)
|
||||
assert torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size)
|
||||
# verify size
|
||||
expected_size = torch.tensor([800, 1066])
|
||||
assert torch.allclose(encoding["target"][0]["size"], expected_size)
|
||||
assert torch.allclose(encoding["labels"][0]["size"], expected_size)
|
||||
|
||||
@slow
|
||||
def test_call_pytorch_with_coco_panoptic_annotations(self):
|
||||
@ -313,27 +312,27 @@ class DetrFeatureExtractionTest(FeatureExtractionSavingTestMixin, unittest.TestC
|
||||
|
||||
# verify area
|
||||
expected_area = torch.tensor([147979.6875, 165527.0469, 484638.5938, 11292.9375, 5879.6562, 7634.1147])
|
||||
assert torch.allclose(encoding["target"][0]["area"], expected_area)
|
||||
assert torch.allclose(encoding["labels"][0]["area"], expected_area)
|
||||
# verify boxes
|
||||
expected_boxes_shape = torch.Size([6, 4])
|
||||
self.assertEqual(encoding["target"][0]["boxes"].shape, expected_boxes_shape)
|
||||
self.assertEqual(encoding["labels"][0]["boxes"].shape, expected_boxes_shape)
|
||||
expected_boxes_slice = torch.tensor([0.2625, 0.5437, 0.4688, 0.8625])
|
||||
assert torch.allclose(encoding["target"][0]["boxes"][0], expected_boxes_slice, atol=1e-3)
|
||||
assert torch.allclose(encoding["labels"][0]["boxes"][0], expected_boxes_slice, atol=1e-3)
|
||||
# verify image_id
|
||||
expected_image_id = torch.tensor([39769])
|
||||
assert torch.allclose(encoding["target"][0]["image_id"], expected_image_id)
|
||||
assert torch.allclose(encoding["labels"][0]["image_id"], expected_image_id)
|
||||
# verify is_crowd
|
||||
expected_is_crowd = torch.tensor([0, 0, 0, 0, 0, 0])
|
||||
assert torch.allclose(encoding["target"][0]["iscrowd"], expected_is_crowd)
|
||||
assert torch.allclose(encoding["labels"][0]["iscrowd"], expected_is_crowd)
|
||||
# verify class_labels
|
||||
expected_class_labels = torch.tensor([17, 17, 63, 75, 75, 93])
|
||||
assert torch.allclose(encoding["target"][0]["class_labels"], expected_class_labels)
|
||||
assert torch.allclose(encoding["labels"][0]["class_labels"], expected_class_labels)
|
||||
# verify masks
|
||||
expected_masks_sum = 822338
|
||||
self.assertEqual(encoding["target"][0]["masks"].sum().item(), expected_masks_sum)
|
||||
self.assertEqual(encoding["labels"][0]["masks"].sum().item(), expected_masks_sum)
|
||||
# verify orig_size
|
||||
expected_orig_size = torch.tensor([480, 640])
|
||||
assert torch.allclose(encoding["target"][0]["orig_size"], expected_orig_size)
|
||||
assert torch.allclose(encoding["labels"][0]["orig_size"], expected_orig_size)
|
||||
# verify size
|
||||
expected_size = torch.tensor([800, 1066])
|
||||
assert torch.allclose(encoding["target"][0]["size"], expected_size)
|
||||
assert torch.allclose(encoding["labels"][0]["size"], expected_size)
|
||||
|
@ -564,13 +564,34 @@ class ModelTesterMixin:
|
||||
model_state_dict = model.state_dict()
|
||||
loaded_model_state_dict = loaded_model.state_dict()
|
||||
|
||||
non_persistent_buffers = {}
|
||||
for key in loaded_model_state_dict.keys():
|
||||
if key not in model_state_dict.keys():
|
||||
non_persistent_buffers[key] = loaded_model_state_dict[key]
|
||||
|
||||
loaded_model_state_dict = {
|
||||
key: value for key, value in loaded_model_state_dict.items() if key not in non_persistent_buffers
|
||||
}
|
||||
|
||||
self.assertEqual(set(model_state_dict.keys()), set(loaded_model_state_dict.keys()))
|
||||
|
||||
model_buffers = list(model.buffers())
|
||||
for non_persistent_buffer in non_persistent_buffers.values():
|
||||
found_buffer = False
|
||||
for i, model_buffer in enumerate(model_buffers):
|
||||
if torch.equal(non_persistent_buffer, model_buffer):
|
||||
found_buffer = True
|
||||
break
|
||||
|
||||
self.assertTrue(found_buffer)
|
||||
model_buffers.pop(i)
|
||||
|
||||
models_equal = True
|
||||
for layer_name, p1 in model_state_dict.items():
|
||||
p2 = loaded_model_state_dict[layer_name]
|
||||
if p1.data.ne(p2.data).sum() > 0:
|
||||
models_equal = False
|
||||
if layer_name in loaded_model_state_dict:
|
||||
p2 = loaded_model_state_dict[layer_name]
|
||||
if p1.data.ne(p2.data).sum() > 0:
|
||||
models_equal = False
|
||||
|
||||
self.assertTrue(models_equal)
|
||||
|
||||
|
Reference in New Issue
Block a user