diff --git a/docs/source/de/testing.md b/docs/source/de/testing.md index 100151e58c3..07be15f31ec 100644 --- a/docs/source/de/testing.md +++ b/docs/source/de/testing.md @@ -473,13 +473,6 @@ Hier ist zum Beispiel ein Test, der nur ausgeführt werden muss, wenn 2 oder meh def test_example_with_multi_gpu(): ``` -Wenn ein Test `tensorflow` benötigt, verwenden Sie den Dekorator `require_tf`. Zum Beispiel: - -```python no-style -@require_tf -def test_tf_thing_with_tensorflow(): -``` - Diese Dekors können gestapelt werden. Wenn zum Beispiel ein Test langsam ist und mindestens eine GPU unter pytorch benötigt, können Sie wie Sie ihn einrichten können: @@ -1204,9 +1197,6 @@ if torch.cuda.is_available(): import numpy as np np.random.seed(seed) - -# tf RNG -tf.random.set_seed(seed) ``` ### Tests debuggen diff --git a/docs/source/en/testing.md b/docs/source/en/testing.md index dd0b9cbb426..ddcb363f8cb 100644 --- a/docs/source/en/testing.md +++ b/docs/source/en/testing.md @@ -474,13 +474,6 @@ For example, here is a test that must be run only when there are 2 or more GPUs def test_example_with_multi_gpu(): ``` -If a test requires `tensorflow` use the `require_tf` decorator. For example: - -```python no-style -@require_tf -def test_tf_thing_with_tensorflow(): -``` - These decorators can be stacked. For example, if a test is slow and requires at least one GPU under pytorch, here is how to set it up: @@ -1226,11 +1219,6 @@ if torch.cuda.is_available(): import numpy as np np.random.seed(seed) - -# tf RNG -import tensorflow as tf - -tf.random.set_seed(seed) ``` ### Debugging tests diff --git a/docs/source/ja/testing.md b/docs/source/ja/testing.md index 8831d48a3bd..5425861a1d1 100644 --- a/docs/source/ja/testing.md +++ b/docs/source/ja/testing.md @@ -445,13 +445,6 @@ CUDA_VISIBLE_DEVICES="1" pytest tests/utils/test_logging.py def test_example_with_multi_gpu(): ``` -テストに `tensorflow` が必要な場合は、`require_tf` デコレータを使用します。例えば: - -```python no-style -@require_tf -def test_tf_thing_with_tensorflow(): -``` - これらのデコレータは積み重ねることができます。たとえば、テストが遅く、pytorch で少なくとも 1 つの GPU が必要な場合は、次のようになります。 設定方法: @@ -1135,9 +1128,6 @@ if torch.cuda.is_available(): import numpy as np np.random.seed(seed) - -# tf RNG -tf.random.set_seed(seed) ``` diff --git a/docs/source/ko/testing.md b/docs/source/ko/testing.md index fd3f548eeb8..0a9e8ee47ac 100644 --- a/docs/source/ko/testing.md +++ b/docs/source/ko/testing.md @@ -473,13 +473,6 @@ GPU 요구 사항을 표로 정리하면 아래와 같습니디ㅏ: def test_example_with_multi_gpu(): ``` -`tensorflow`가 필요한 경우 `require_tf` 데코레이터를 사용합니다. 예를 들어 다음과 같습니다: - -```python no-style -@require_tf -def test_tf_thing_with_tensorflow(): -``` - 이러한 데코레이터는 중첩될 수 있습니다. 예를 들어, 느린 테스트로 진행되고 pytorch에서 적어도 하나의 GPU가 필요한 경우 다음과 같이 설정할 수 있습니다: diff --git a/src/transformers/testing_utils.py b/src/transformers/testing_utils.py index 2ddbd51d414..10f31b81c8f 100644 --- a/src/transformers/testing_utils.py +++ b/src/transformers/testing_utils.py @@ -705,6 +705,9 @@ def require_tf(test_case): """ Decorator marking a test that requires TensorFlow. These tests are skipped when TensorFlow isn't installed. """ + logger.warning_once( + "TensorFlow test-related code, including `require_tf`, is deprecated and will be removed in Transformers v4.55" + ) return unittest.skipUnless(is_tf_available(), "test requires TensorFlow")(test_case) diff --git a/tests/models/bert/test_tokenization_bert_tf.py b/tests/models/bert/test_tokenization_bert_tf.py deleted file mode 100644 index 0539613a10f..00000000000 --- a/tests/models/bert/test_tokenization_bert_tf.py +++ /dev/null @@ -1,106 +0,0 @@ -import unittest -from pathlib import Path -from tempfile import TemporaryDirectory - -from transformers import AutoConfig, TFAutoModel, is_tensorflow_text_available, is_tf_available -from transformers.models.bert.tokenization_bert import BertTokenizer -from transformers.testing_utils import require_tensorflow_text, require_tf, slow - - -if is_tf_available(): - import tensorflow as tf - - from transformers.modeling_tf_utils import keras - -if is_tensorflow_text_available(): - from transformers.models.bert import TFBertTokenizer - - -TOKENIZER_CHECKPOINTS = ["google-bert/bert-base-uncased", "google-bert/bert-base-cased"] -TINY_MODEL_CHECKPOINT = "hf-internal-testing/tiny-bert-tf-only" - -if is_tf_available(): - from transformers.modeling_tf_utils import keras - - class ModelToSave(keras.Model): - def __init__(self, tokenizer): - super().__init__() - self.tokenizer = tokenizer - config = AutoConfig.from_pretrained(TINY_MODEL_CHECKPOINT) - self.bert = TFAutoModel.from_config(config) - - def call(self, inputs): - tokenized = self.tokenizer(inputs) - out = self.bert(tokenized) - return out["pooler_output"] - - -@require_tf -@require_tensorflow_text -class BertTokenizationTest(unittest.TestCase): - # The TF tokenizers are usually going to be used as pretrained tokenizers from existing model checkpoints, - # so that's what we focus on here. - - def setUp(self): - super().setUp() - - self.tokenizers = [BertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS] - self.tf_tokenizers = [TFBertTokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS] - assert len(self.tokenizers) == len(self.tf_tokenizers) - - self.test_sentences = [ - "This is a straightforward English test sentence.", - "This one has some weird characters\rto\nsee\r\nif those\u00e9break things.", - "Now we're going to add some Chinese: 一 二 三 一二三", - "And some much more rare Chinese: 齉 堃 齉堃", - "Je vais aussi écrire en français pour tester les accents", - "Classical Irish also has some unusual characters, so in they go: Gaelaċ, ꝼ", - ] - self.paired_sentences = list(zip(self.test_sentences, self.test_sentences[::-1])) - - def test_output_equivalence(self): - for tokenizer, tf_tokenizer in zip(self.tokenizers, self.tf_tokenizers): - for test_inputs in (self.test_sentences, self.paired_sentences): - python_outputs = tokenizer(test_inputs, return_tensors="tf", padding="longest") - tf_outputs = tf_tokenizer(test_inputs) - - for key in python_outputs.keys(): - self.assertTrue(tf.reduce_all(python_outputs[key].shape == tf_outputs[key].shape)) - self.assertTrue(tf.reduce_all(tf.cast(python_outputs[key], tf.int64) == tf_outputs[key])) - - @slow - def test_different_pairing_styles(self): - for tf_tokenizer in self.tf_tokenizers: - merged_outputs = tf_tokenizer(self.paired_sentences) - separated_outputs = tf_tokenizer( - text=[sentence[0] for sentence in self.paired_sentences], - text_pair=[sentence[1] for sentence in self.paired_sentences], - ) - for key in merged_outputs.keys(): - self.assertTrue(tf.reduce_all(tf.cast(merged_outputs[key], tf.int64) == separated_outputs[key])) - - @slow - def test_graph_mode(self): - for tf_tokenizer in self.tf_tokenizers: - compiled_tokenizer = tf.function(tf_tokenizer) - for test_inputs in (self.test_sentences, self.paired_sentences): - test_inputs = tf.constant(test_inputs) - compiled_outputs = compiled_tokenizer(test_inputs) - eager_outputs = tf_tokenizer(test_inputs) - - for key in eager_outputs.keys(): - self.assertTrue(tf.reduce_all(eager_outputs[key] == compiled_outputs[key])) - - @slow - def test_export_for_inference(self): - for tf_tokenizer in self.tf_tokenizers: - model = ModelToSave(tokenizer=tf_tokenizer) - test_inputs = tf.convert_to_tensor(self.test_sentences) - out = model(test_inputs) # Build model with some sample inputs - with TemporaryDirectory() as tempdir: - save_path = Path(tempdir) / "saved.model" - model.export(save_path) - loaded_model = tf.saved_model.load(save_path) - loaded_output = loaded_model.serve(test_inputs) - # We may see small differences because the loaded model is compiled, so we need an epsilon for the test - self.assertLessEqual(tf.reduce_max(tf.abs(out - loaded_output)), 1e-5) diff --git a/tests/models/gpt2/test_tokenization_gpt2_tf.py b/tests/models/gpt2/test_tokenization_gpt2_tf.py deleted file mode 100644 index 06f16c36e31..00000000000 --- a/tests/models/gpt2/test_tokenization_gpt2_tf.py +++ /dev/null @@ -1,131 +0,0 @@ -import unittest -from pathlib import Path -from tempfile import TemporaryDirectory - -from transformers import AutoConfig, TFGPT2LMHeadModel, is_keras_nlp_available, is_tf_available -from transformers.models.gpt2.tokenization_gpt2 import GPT2Tokenizer -from transformers.testing_utils import require_keras_nlp, require_tf, slow - - -if is_tf_available(): - import tensorflow as tf - - -if is_keras_nlp_available(): - from transformers.models.gpt2 import TFGPT2Tokenizer - - -TOKENIZER_CHECKPOINTS = ["openai-community/gpt2"] -TINY_MODEL_CHECKPOINT = "openai-community/gpt2" - -if is_tf_available(): - - class ModelToSave(tf.Module): - def __init__(self, tokenizer): - super().__init__() - self.tokenizer = tokenizer - config = AutoConfig.from_pretrained(TINY_MODEL_CHECKPOINT) - self.model = TFGPT2LMHeadModel.from_config(config) - - @tf.function(input_signature=(tf.TensorSpec((None,), tf.string, name="text"),)) - def serving(self, text): - tokenized = self.tokenizer(text) - input_ids_dense = tokenized["input_ids"].to_tensor() - - input_mask = tf.cast(input_ids_dense > 0, tf.int32) - # input_mask = tf.reshape(input_mask, [-1, MAX_SEQ_LEN]) - - outputs = self.model(input_ids=input_ids_dense, attention_mask=input_mask)["logits"] - - return outputs - - -@require_tf -@require_keras_nlp -class GPTTokenizationTest(unittest.TestCase): - # The TF tokenizers are usually going to be used as pretrained tokenizers from existing model checkpoints, - # so that's what we focus on here. - - def setUp(self): - super().setUp() - - self.tokenizers = [GPT2Tokenizer.from_pretrained(checkpoint) for checkpoint in (TOKENIZER_CHECKPOINTS)] - self.tf_tokenizers = [TFGPT2Tokenizer.from_pretrained(checkpoint) for checkpoint in TOKENIZER_CHECKPOINTS] - assert len(self.tokenizers) == len(self.tf_tokenizers) - - self.test_sentences = [ - "This is a straightforward English test sentence.", - "This one has some weird characters\rto\nsee\r\nif those\u00e9break things.", - "Now we're going to add some Chinese: 一 二 三 一二三", - "And some much more rare Chinese: 齉 堃 齉堃", - "Je vais aussi écrire en français pour tester les accents", - "Classical Irish also has some unusual characters, so in they go: Gaelaċ, ꝼ", - ] - self.paired_sentences = list(zip(self.test_sentences, self.test_sentences[::-1])) - - def test_output_equivalence(self): - for tokenizer, tf_tokenizer in zip(self.tokenizers, self.tf_tokenizers): - for test_inputs in self.test_sentences: - python_outputs = tokenizer([test_inputs], return_tensors="tf") - tf_outputs = tf_tokenizer([test_inputs]) - - for key in python_outputs.keys(): - # convert them to numpy to avoid messing with ragged tensors - python_outputs_values = python_outputs[key].numpy() - tf_outputs_values = tf_outputs[key].numpy() - - self.assertTrue(tf.reduce_all(python_outputs_values.shape == tf_outputs_values.shape)) - self.assertTrue(tf.reduce_all(tf.cast(python_outputs_values, tf.int64) == tf_outputs_values)) - - @slow - def test_graph_mode(self): - for tf_tokenizer in self.tf_tokenizers: - compiled_tokenizer = tf.function(tf_tokenizer) - for test_inputs in self.test_sentences: - test_inputs = tf.constant(test_inputs) - compiled_outputs = compiled_tokenizer(test_inputs) - eager_outputs = tf_tokenizer(test_inputs) - - for key in eager_outputs.keys(): - self.assertTrue(tf.reduce_all(eager_outputs[key] == compiled_outputs[key])) - - @slow - def test_saved_model(self): - for tf_tokenizer in self.tf_tokenizers: - model = ModelToSave(tokenizer=tf_tokenizer) - test_inputs = tf.convert_to_tensor([self.test_sentences[0]]) - out = model.serving(test_inputs) # Build model with some sample inputs - with TemporaryDirectory() as tempdir: - save_path = Path(tempdir) / "saved.model" - tf.saved_model.save(model, save_path, signatures={"serving_default": model.serving}) - loaded_model = tf.saved_model.load(save_path) - loaded_output = loaded_model.signatures["serving_default"](test_inputs)["output_0"] - # We may see small differences because the loaded model is compiled, so we need an epsilon for the test - self.assertTrue(tf.reduce_all(out == loaded_output)) - - @slow - def test_from_config(self): - for tf_tokenizer in self.tf_tokenizers: - test_inputs = tf.convert_to_tensor([self.test_sentences[0]]) - out = tf_tokenizer(test_inputs) # Build model with some sample inputs - - config = tf_tokenizer.get_config() - model_from_config = TFGPT2Tokenizer.from_config(config) - from_config_output = model_from_config(test_inputs) - - for key in from_config_output.keys(): - self.assertTrue(tf.reduce_all(from_config_output[key] == out[key])) - - @slow - def test_padding(self): - for tf_tokenizer in self.tf_tokenizers: - # for the test to run - tf_tokenizer.pad_token_id = 123123 - - for max_length in [3, 5, 1024]: - test_inputs = tf.convert_to_tensor([self.test_sentences[0]]) - out = tf_tokenizer(test_inputs, max_length=max_length) - - out_length = out["input_ids"].numpy().shape[1] - - assert out_length == max_length diff --git a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py index 6e5f1ee11a7..deee8d31d24 100644 --- a/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py +++ b/tests/models/layoutlmv3/test_tokenization_layoutlmv3.py @@ -34,7 +34,6 @@ from transformers import ( from transformers.models.layoutlmv3.tokenization_layoutlmv3 import VOCAB_FILES_NAMES, LayoutLMv3Tokenizer from transformers.testing_utils import ( require_pandas, - require_tf, require_tokenizers, require_torch, slow, @@ -2306,42 +2305,6 @@ class LayoutLMv3TokenizationTest(TokenizerTesterMixin, unittest.TestCase): def test_np_encode_plus_sent_to_model(self): pass - @require_tf - @slow - def test_tf_encode_plus_sent_to_model(self): - from transformers import TF_MODEL_MAPPING, TOKENIZER_MAPPING - - MODEL_TOKENIZER_MAPPING = merge_model_tokenizer_mappings(TF_MODEL_MAPPING, TOKENIZER_MAPPING) - - tokenizers = self.get_tokenizers(do_lower_case=False) - for tokenizer in tokenizers: - with self.subTest(f"{tokenizer.__class__.__name__}"): - if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING: - self.skipTest(f"{tokenizer.__class__} is not in the MODEL_TOKENIZER_MAPPING") - - config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__] - config = config_class() - - if config.is_encoder_decoder or config.pad_token_id is None: - self.skipTest(reason="Model is an encoder-decoder or has no pad token id set.") - - model = model_class(config) - - # Make sure the model contains at least the full vocabulary size in its embedding matrix - self.assertGreaterEqual(model.config.vocab_size, len(tokenizer)) - - # Build sequence - first_ten_tokens = list(tokenizer.get_vocab().keys())[:10] - boxes = [[1000, 1000, 1000, 1000] for _ in range(len(first_ten_tokens))] - encoded_sequence = tokenizer.encode_plus(first_ten_tokens, boxes=boxes, return_tensors="tf") - batch_encoded_sequence = tokenizer.batch_encode_plus( - [first_ten_tokens, first_ten_tokens], boxes=[boxes, boxes], return_tensors="tf" - ) - - # This should not fail - model(encoded_sequence) - model(batch_encoded_sequence) - @unittest.skip(reason="Chat is not supported") def test_chat_template(self): pass diff --git a/tests/models/pop2piano/test_feature_extraction_pop2piano.py b/tests/models/pop2piano/test_feature_extraction_pop2piano.py index 5684cbf6f5f..7a744a68e3b 100644 --- a/tests/models/pop2piano/test_feature_extraction_pop2piano.py +++ b/tests/models/pop2piano/test_feature_extraction_pop2piano.py @@ -24,7 +24,6 @@ from transformers.testing_utils import ( require_essentia, require_librosa, require_scipy, - require_tf, require_torch, ) from transformers.utils.import_utils import ( @@ -231,28 +230,6 @@ class Pop2PianoFeatureExtractionTest(SequenceFeatureExtractionTestMixin, unittes # check shape self.assertEqual(len(input_features["input_features"].shape), 3) - @require_tf - def test_batch_feature_tf(self): - import tensorflow as tf - - feature_extractor = self.feature_extraction_class(**self.feat_extract_tester.prepare_feat_extract_dict()) - speech_input1 = np.zeros([1_000_000], dtype=np.float32) - speech_input2 = np.ones([2_000_000], dtype=np.float32) - speech_input3 = np.random.randint(low=0, high=10, size=500_000).astype(np.float32) - - input_features = feature_extractor( - [speech_input1, speech_input2, speech_input3], - sampling_rate=[44_100, 16_000, 48_000], - return_tensors="tf", - return_attention_mask=True, - ) - - # check tf tensor or not - self.assertTrue(tf.is_tensor(input_features["input_features"])) - - # check shape - self.assertEqual(len(input_features["input_features"].shape), 3) - @unittest.skip( "Pop2PianoFeatureExtractor does not supports padding externally (while processing audios in batches padding is automatically applied to max_length)" ) diff --git a/tests/models/sam/test_processor_sam.py b/tests/models/sam/test_processor_sam.py index 2275c7dc4b0..15cd2b0297c 100644 --- a/tests/models/sam/test_processor_sam.py +++ b/tests/models/sam/test_processor_sam.py @@ -17,15 +17,10 @@ import unittest import numpy as np -from transformers.testing_utils import ( - require_tf, - require_torch, - require_torchvision, - require_vision, -) -from transformers.utils import is_tf_available, is_torch_available, is_vision_available +from transformers.testing_utils import require_torch, require_torchvision, require_vision +from transformers.utils import is_torch_available, is_vision_available -from ...test_processing_common import ProcessorTesterMixin, prepare_image_inputs +from ...test_processing_common import ProcessorTesterMixin if is_vision_available(): @@ -38,11 +33,6 @@ if is_torch_available(): from transformers.models.sam.image_processing_sam import _mask_to_rle_pytorch -if is_tf_available(): - import tensorflow as tf - - from transformers.models.sam.image_processing_sam import _mask_to_rle_tf - @require_vision @require_torchvision @@ -202,143 +192,3 @@ class SamProcessorTest(ProcessorTesterMixin, unittest.TestCase): self.assertEqual(len(rle), 1) self.assertEqual(rle[0]["size"], [2, 2]) self.assertEqual(rle[0]["counts"], [1, 3]) # 1 zero, followed by 3 ones - - -@require_vision -@require_tf -class TFSamProcessorTest(unittest.TestCase): - def setUp(self): - self.tmpdirname = tempfile.mkdtemp() - image_processor = SamImageProcessor() - processor = SamProcessor(image_processor) - processor.save_pretrained(self.tmpdirname) - - def get_image_processor(self, **kwargs): - return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor - - def tearDown(self): - shutil.rmtree(self.tmpdirname) - - # This is to avoid repeating the skipping of the common tests - def prepare_image_inputs(self): - """This function prepares a list of PIL images.""" - return prepare_image_inputs() - - def test_save_load_pretrained_additional_features(self): - processor = SamProcessor(image_processor=self.get_image_processor()) - processor.save_pretrained(self.tmpdirname) - - image_processor_add_kwargs = self.get_image_processor(do_normalize=False, padding_value=1.0) - - processor = SamProcessor.from_pretrained(self.tmpdirname, do_normalize=False, padding_value=1.0) - - self.assertEqual(processor.image_processor.to_json_string(), image_processor_add_kwargs.to_json_string()) - self.assertIsInstance(processor.image_processor, SamImageProcessor) - - def test_image_processor(self): - image_processor = self.get_image_processor() - - processor = SamProcessor(image_processor=image_processor) - - image_input = self.prepare_image_inputs() - - input_feat_extract = image_processor(image_input, return_tensors="np") - input_processor = processor(images=image_input, return_tensors="np") - - input_feat_extract.pop("original_sizes") # pop original_sizes as it is popped in the processor - input_feat_extract.pop("reshaped_input_sizes") # pop reshaped_input_sizes as it is popped in the processor - - for key in input_feat_extract.keys(): - self.assertAlmostEqual(input_feat_extract[key].sum(), input_processor[key].sum(), delta=1e-2) - - @require_tf - def test_post_process_masks(self): - image_processor = self.get_image_processor() - - processor = SamProcessor(image_processor=image_processor) - dummy_masks = [tf.ones((1, 3, 5, 5))] - - original_sizes = [[1764, 2646]] - - reshaped_input_size = [[683, 1024]] - masks = processor.post_process_masks(dummy_masks, original_sizes, reshaped_input_size, return_tensors="tf") - self.assertEqual(masks[0].shape, (1, 3, 1764, 2646)) - - masks = processor.post_process_masks( - dummy_masks, - tf.convert_to_tensor(original_sizes), - tf.convert_to_tensor(reshaped_input_size), - return_tensors="tf", - ) - self.assertEqual(masks[0].shape, (1, 3, 1764, 2646)) - - # should also work with np - dummy_masks = [np.ones((1, 3, 5, 5))] - masks = processor.post_process_masks( - dummy_masks, np.array(original_sizes), np.array(reshaped_input_size), return_tensors="tf" - ) - - self.assertEqual(masks[0].shape, (1, 3, 1764, 2646)) - - dummy_masks = [[1, 0], [0, 1]] - with self.assertRaises(tf.errors.InvalidArgumentError): - masks = processor.post_process_masks( - dummy_masks, np.array(original_sizes), np.array(reshaped_input_size), return_tensors="tf" - ) - - def test_rle_encoding(self): - """ - Test the run-length encoding function. - """ - # Test that a mask of all zeros returns a single run [height * width]. - input_mask = tf.zeros((1, 2, 2), dtype=tf.int64) # shape: 1 x 2 x 2 - rle = _mask_to_rle_tf(input_mask) - - self.assertEqual(len(rle), 1) - self.assertEqual(rle[0]["size"], [2, 2]) - # For a 2x2 all-zero mask, we expect a single run of length 4: - self.assertEqual(rle[0]["counts"], [4]) - - # Test that a mask of all ones returns [0, height * width]. - input_mask = tf.ones((1, 2, 2), dtype=tf.int64) # shape: 1 x 2 x 2 - rle = _mask_to_rle_tf(input_mask) - - self.assertEqual(len(rle), 1) - self.assertEqual(rle[0]["size"], [2, 2]) - # For a 2x2 all-one mask, we expect two runs: [0, 4]. - self.assertEqual(rle[0]["counts"], [0, 4]) - - # Test a mask with mixed 0s and 1s to ensure the run-length encoding is correct. - # Example mask: - # Row 0: [0, 1] - # Row 1: [1, 1] - # This is shape (1, 2, 2). - # Flattened in Fortran order -> [0, 1, 1, 1]. - # The RLE for [0,1,1,1] is [1, 3]. - input_mask = tf.constant([[[0, 1], [1, 1]]], dtype=tf.int64) - rle = _mask_to_rle_tf(input_mask) - - self.assertEqual(len(rle), 1) - self.assertEqual(rle[0]["size"], [2, 2]) - self.assertEqual(rle[0]["counts"], [1, 3]) # 1 zero, followed by 3 ones - - -@require_vision -@require_torchvision -class SamProcessorEquivalenceTest(unittest.TestCase): - def setUp(self): - self.tmpdirname = tempfile.mkdtemp() - image_processor = SamImageProcessor() - processor = SamProcessor(image_processor) - processor.save_pretrained(self.tmpdirname) - - def get_image_processor(self, **kwargs): - return AutoProcessor.from_pretrained(self.tmpdirname, **kwargs).image_processor - - def tearDown(self): - shutil.rmtree(self.tmpdirname) - - # This is to avoid repeating the skipping of the common tests - def prepare_image_inputs(self): - """This function prepares a list of PIL images.""" - return prepare_image_inputs() diff --git a/tests/models/whisper/test_tokenization_whisper.py b/tests/models/whisper/test_tokenization_whisper.py index 61a34c165d8..45ba9c401b8 100644 --- a/tests/models/whisper/test_tokenization_whisper.py +++ b/tests/models/whisper/test_tokenization_whisper.py @@ -18,7 +18,7 @@ import numpy as np from transformers.models.whisper import WhisperTokenizer, WhisperTokenizerFast from transformers.models.whisper.tokenization_whisper import _combine_tokens_into_words, _find_longest_common_sequence -from transformers.testing_utils import require_flax, require_tf, require_torch, slow +from transformers.testing_utils import require_flax, require_torch, slow from ...test_tokenization_common import TokenizerTesterMixin @@ -588,15 +588,6 @@ class SpeechToTextTokenizerMultilinguialTest(unittest.TestCase): self.assertListEqual(WhisperTokenizer._convert_to_list(np_array), test_list) self.assertListEqual(WhisperTokenizerFast._convert_to_list(np_array), test_list) - @require_tf - def test_convert_to_list_tf(self): - import tensorflow as tf - - test_list = [[1, 2, 3], [4, 5, 6]] - tf_tensor = tf.constant(test_list) - self.assertListEqual(WhisperTokenizer._convert_to_list(tf_tensor), test_list) - self.assertListEqual(WhisperTokenizerFast._convert_to_list(tf_tensor), test_list) - @require_flax def test_convert_to_list_jax(self): import jax.numpy as jnp diff --git a/tests/optimization/test_optimization_tf.py b/tests/optimization/test_optimization_tf.py deleted file mode 100644 index d3a948c938d..00000000000 --- a/tests/optimization/test_optimization_tf.py +++ /dev/null @@ -1,100 +0,0 @@ -# Copyright 2020 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -from transformers import is_tf_available -from transformers.testing_utils import require_tf - - -if is_tf_available(): - import tensorflow as tf - from tensorflow.python.eager import context - from tensorflow.python.framework import ops - - from transformers import GradientAccumulator, create_optimizer - - -@require_tf -class OptimizationFTest(unittest.TestCase): - def assertListAlmostEqual(self, list1, list2, tol): - self.assertEqual(len(list1), len(list2)) - for a, b in zip(list1, list2): - self.assertAlmostEqual(a, b, delta=tol) - - def testGradientAccumulator(self): - accumulator = GradientAccumulator() - accumulator([tf.constant([1.0, 2.0])]) - accumulator([tf.constant([-2.0, 1.0])]) - accumulator([tf.constant([-1.0, 2.0])]) - with self.assertRaises(ValueError): - accumulator([tf.constant([1.0, 1.0]), tf.constant([2.0, 2.0])]) - self.assertEqual(accumulator.step, 3) - self.assertEqual(len(accumulator.gradients), 1) - self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [-2.0, 5.0], tol=1e-2) - accumulator.reset() - self.assertEqual(accumulator.step, 0) - self.assertListAlmostEqual(accumulator.gradients[0].numpy().tolist(), [0.0, 0.0], tol=1e-2) - - def testGradientAccumulatorDistributionStrategy(self): - context._context = None - ops.enable_eager_execution_internal() - physical_devices = tf.config.list_physical_devices("CPU") - if len(physical_devices) == 1: - tf.config.set_logical_device_configuration( - physical_devices[0], [tf.config.LogicalDeviceConfiguration(), tf.config.LogicalDeviceConfiguration()] - ) - devices = tf.config.list_logical_devices(device_type="CPU") - strategy = tf.distribute.MirroredStrategy(devices=devices[:2]) - - with strategy.scope(): - accumulator = GradientAccumulator() - variable = tf.Variable([4.0, 3.0]) - optimizer, _ = create_optimizer(5e-5, 10, 5) - gradient_placeholder = tf.Variable([0.0, 0.0], trainable=False) - - def accumulate_on_replica(gradient): - accumulator([gradient]) - - def apply_on_replica(): - optimizer.apply_gradients(list(zip(accumulator.gradients, [variable]))) - - @tf.function - def accumulate(grad1, grad2): - with strategy.scope(): - local_variables = strategy.experimental_local_results(gradient_placeholder) - local_variables[0].assign(grad1) - local_variables[1].assign(grad2) - strategy.run(accumulate_on_replica, args=(gradient_placeholder,)) - - @tf.function - def apply_grad(): - with strategy.scope(): - strategy.run(apply_on_replica) - - def _check_local_values(grad1, grad2): - values = strategy.experimental_local_results(accumulator._gradients[0]) - self.assertListAlmostEqual(values[0].value(), grad1, tol=1e-2) - self.assertListAlmostEqual(values[1].value(), grad2, tol=1e-2) - - accumulate([1.0, 2.0], [-1.0, 1.0]) - accumulate([3.0, -1.0], [-1.0, -1.0]) - accumulate([-2.0, 2.0], [3.0, -2.0]) - self.assertEqual(accumulator.step, 3) - _check_local_values([2.0, 3.0], [1.0, -2.0]) - apply_grad() - self.assertListAlmostEqual(variable.value(), [4.0, 3.0], tol=1e-2) - accumulator.reset() - self.assertEqual(accumulator.step, 0) - _check_local_values([0.0, 0.0], [0.0, 0.0]) diff --git a/tests/pipelines/test_pipelines_audio_classification.py b/tests/pipelines/test_pipelines_audio_classification.py index bbad033d138..2871467ac90 100644 --- a/tests/pipelines/test_pipelines_audio_classification.py +++ b/tests/pipelines/test_pipelines_audio_classification.py @@ -28,7 +28,6 @@ from transformers.testing_utils import ( compare_pipeline_output_to_hub_spec, is_pipeline_test, nested_simplify, - require_tf, require_torch, require_torchaudio, slow, @@ -193,11 +192,6 @@ class AudioClassificationPipelineTests(unittest.TestCase): ], ) - @require_tf - @unittest.skip(reason="Audio classification is not implemented for TF") - def test_small_model_tf(self): - pass - @require_torch @slow def test_top_k_none_returns_all_labels(self): diff --git a/tests/pipelines/test_pipelines_automatic_speech_recognition.py b/tests/pipelines/test_pipelines_automatic_speech_recognition.py index d48caf16137..a9977d912c5 100644 --- a/tests/pipelines/test_pipelines_automatic_speech_recognition.py +++ b/tests/pipelines/test_pipelines_automatic_speech_recognition.py @@ -40,7 +40,6 @@ from transformers.testing_utils import ( is_torch_available, nested_simplify, require_pyctcdecode, - require_tf, require_torch, require_torch_accelerator, require_torchaudio, @@ -326,10 +325,6 @@ class AutomaticSpeechRecognitionPipelineTests(unittest.TestCase): ): _ = speech_recognizer(filename, return_timestamps="char") - @require_tf - def test_small_model_tf(self): - self.skipTest(reason="Tensorflow not supported yet.") - @require_torch @unittest.skip("TODO (joao, eustache): this test is failing, find the breaking PR and fix the cause or the test") def test_torch_small_no_tokenizer_files(self): diff --git a/tests/pipelines/test_pipelines_common.py b/tests/pipelines/test_pipelines_common.py index 5dde697d1cc..bc85f0749b1 100644 --- a/tests/pipelines/test_pipelines_common.py +++ b/tests/pipelines/test_pipelines_common.py @@ -48,8 +48,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_staging_test, nested_simplify, - require_tensorflow_probability, - require_tf, require_torch, require_torch_accelerator, require_torch_multi_accelerator, @@ -177,20 +175,6 @@ class CommonPipelineTest(unittest.TestCase): results.append(out) self.assertEqual(len(results), 10) - @require_tf - def test_iterator_data_tf(self): - def data(n: int): - for _ in range(n): - yield "This is a test" - - pipe = pipeline(model="hf-internal-testing/tiny-random-distilbert", framework="tf") - out = pipe("This is a test") - results = [] - for out in pipe(data(10)): - self.assertEqual(nested_simplify(out), {"label": "LABEL_0", "score": 0.504}) - results.append(out) - self.assertEqual(len(results), 10) - @require_torch def test_unbatch_attentions_hidden_states(self): model = DistilBertForSequenceClassification.from_pretrained( @@ -262,9 +246,9 @@ class CommonPipelineTest(unittest.TestCase): @is_pipeline_test +@require_torch class PipelineScikitCompatTest(unittest.TestCase): - @require_torch - def test_pipeline_predict_pt(self): + def test_pipeline_predict(self): data = ["This is a test"] text_classifier = pipeline( @@ -275,20 +259,7 @@ class PipelineScikitCompatTest(unittest.TestCase): actual_output = text_classifier.predict(data) self.assertEqual(expected_output, actual_output) - @require_tf - def test_pipeline_predict_tf(self): - data = ["This is a test"] - - text_classifier = pipeline( - task="text-classification", model="hf-internal-testing/tiny-random-distilbert", framework="tf" - ) - - expected_output = [{"label": ANY(str), "score": ANY(float)}] - actual_output = text_classifier.predict(data) - self.assertEqual(expected_output, actual_output) - - @require_torch - def test_pipeline_transform_pt(self): + def test_pipeline_transform(self): data = ["This is a test"] text_classifier = pipeline( @@ -299,18 +270,6 @@ class PipelineScikitCompatTest(unittest.TestCase): actual_output = text_classifier.transform(data) self.assertEqual(expected_output, actual_output) - @require_tf - def test_pipeline_transform_tf(self): - data = ["This is a test"] - - text_classifier = pipeline( - task="text-classification", model="hf-internal-testing/tiny-random-distilbert", framework="tf" - ) - - expected_output = [{"label": ANY(str), "score": ANY(float)}] - actual_output = text_classifier.transform(data) - self.assertEqual(expected_output, actual_output) - @is_pipeline_test class PipelinePadTest(unittest.TestCase): @@ -620,23 +579,6 @@ class PipelineUtilsTest(unittest.TestCase): gc.collect() backend_empty_cache(torch_device) - @slow - @require_tf - def test_load_default_pipelines_tf(self): - from transformers.modeling_tf_utils import keras - from transformers.pipelines import SUPPORTED_TASKS - - set_seed_fn = lambda: keras.utils.set_random_seed(0) # noqa: E731 - for task in SUPPORTED_TASKS.keys(): - if task == "table-question-answering": - # test table in separate test due to more dependencies - continue - - self.check_default_pipeline(task, "tf", set_seed_fn, self.check_models_equal_tf) - - # clean-up as much as possible GPU memory occupied by TF - gc.collect() - @slow @require_torch def test_load_default_pipelines_pt_table_qa(self): @@ -663,18 +605,6 @@ class PipelineUtilsTest(unittest.TestCase): pipe = pipeline("text-generation", device=torch_device) _ = pipe("Hello") - @slow - @require_tf - @require_tensorflow_probability - def test_load_default_pipelines_tf_table_qa(self): - import tensorflow as tf - - set_seed_fn = lambda: tf.random.set_seed(0) # noqa: E731 - self.check_default_pipeline("table-question-answering", "tf", set_seed_fn, self.check_models_equal_tf) - - # clean-up as much as possible GPU memory occupied by PyTorch - gc.collect() - def check_default_pipeline(self, task, framework, set_seed_fn, check_models_equal_fn): from transformers.pipelines import SUPPORTED_TASKS, pipeline diff --git a/tests/pipelines/test_pipelines_depth_estimation.py b/tests/pipelines/test_pipelines_depth_estimation.py index a5dcb3ef249..130d386abe2 100644 --- a/tests/pipelines/test_pipelines_depth_estimation.py +++ b/tests/pipelines/test_pipelines_depth_estimation.py @@ -24,7 +24,6 @@ from transformers.testing_utils import ( compare_pipeline_output_to_hub_spec, is_pipeline_test, nested_simplify, - require_tf, require_timm, require_torch, require_vision, @@ -123,11 +122,6 @@ class DepthEstimationPipelineTests(unittest.TestCase): for single_output in outputs: compare_pipeline_output_to_hub_spec(single_output, DepthEstimationOutput) - @require_tf - @unittest.skip(reason="Depth estimation is not implemented in TF") - def test_small_model_tf(self): - pass - @slow @require_torch def test_large_model_pt(self): diff --git a/tests/pipelines/test_pipelines_document_question_answering.py b/tests/pipelines/test_pipelines_document_question_answering.py index 7a1b319096b..0900b1e1030 100644 --- a/tests/pipelines/test_pipelines_document_question_answering.py +++ b/tests/pipelines/test_pipelines_document_question_answering.py @@ -27,7 +27,6 @@ from transformers.testing_utils import ( nested_simplify, require_detectron2, require_pytesseract, - require_tf, require_torch, require_torch_bf16, require_vision, @@ -423,8 +422,3 @@ class DocumentQuestionAnsweringPipelineTests(unittest.TestCase): question = "What is the invoice number?" outputs = dqa_pipeline(image=image, question=question, top_k=2) self.assertEqual(nested_simplify(outputs, decimals=4), [{"answer": "us-001"}]) - - @require_tf - @unittest.skip(reason="Document question answering not implemented in TF") - def test_small_model_tf(self): - pass diff --git a/tests/pipelines/test_pipelines_feature_extraction.py b/tests/pipelines/test_pipelines_feature_extraction.py index 12bc3dc655b..ff6669e19b3 100644 --- a/tests/pipelines/test_pipelines_feature_extraction.py +++ b/tests/pipelines/test_pipelines_feature_extraction.py @@ -23,19 +23,15 @@ from transformers import ( TF_MODEL_MAPPING, FeatureExtractionPipeline, LxmertConfig, - is_tf_available, is_torch_available, pipeline, ) -from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch +from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch if is_torch_available(): import torch -if is_tf_available(): - import tensorflow as tf - @is_pipeline_test class FeatureExtractionPipelineTests(unittest.TestCase): @@ -52,16 +48,6 @@ class FeatureExtractionPipelineTests(unittest.TestCase): nested_simplify(outputs), [[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip - @require_tf - def test_small_model_tf(self): - feature_extractor = pipeline( - task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="tf" - ) - outputs = feature_extractor("This is a test") - self.assertEqual( - nested_simplify(outputs), - [[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip - @require_torch def test_tokenization_small_model_pt(self): feature_extractor = pipeline( @@ -102,46 +88,6 @@ class FeatureExtractionPipelineTests(unittest.TestCase): tokenize_kwargs=tokenize_kwargs, ) - @require_tf - def test_tokenization_small_model_tf(self): - feature_extractor = pipeline( - task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="tf" - ) - # test with empty parameters - outputs = feature_extractor("This is a test") - self.assertEqual( - nested_simplify(outputs), - [[[2.287, 1.234, 0.042, 1.53, 1.306, 0.879, -0.526, -1.71, -1.276, 0.756, -0.775, -1.048, -0.25, -0.595, -0.137, -0.598, 2.022, -0.812, 0.284, -0.488, -0.391, -0.403, -0.525, -0.061, -0.228, 1.086, 0.378, -0.14, 0.599, -0.087, -2.259, -0.098], [1.676, 0.232, -1.508, -0.145, 1.798, -1.388, 1.331, -0.37, -0.939, 0.043, 0.06, -0.414, -1.408, 0.24, 0.622, -0.55, -0.569, 1.873, -0.706, 1.924, -0.254, 1.927, -0.423, 0.152, -0.952, 0.509, -0.496, -0.968, 0.093, -1.049, -0.65, 0.312], [0.207, -0.775, -1.822, 0.321, -0.71, -0.201, 0.3, 1.146, -0.233, -0.753, -0.305, 1.309, -1.47, -0.21, 1.802, -1.555, -1.175, 1.323, -0.303, 0.722, -0.076, 0.103, -1.406, 1.931, 0.091, 0.237, 1.172, 1.607, 0.253, -0.9, -1.068, 0.438], [0.615, 1.077, 0.171, -0.175, 1.3, 0.901, -0.653, -0.138, 0.341, -0.654, -0.184, -0.441, -0.424, 0.356, -0.075, 0.26, -1.023, 0.814, 0.524, -0.904, -0.204, -0.623, 1.234, -1.03, 2.594, 0.56, 1.831, -0.199, -1.508, -0.492, -1.687, -2.165], [0.129, 0.008, -1.279, -0.412, -0.004, 1.663, 0.196, 0.104, 0.123, 0.119, 0.635, 1.757, 2.334, -0.799, -1.626, -1.26, 0.595, -0.316, -1.399, 0.232, 0.264, 1.386, -1.171, -0.256, -0.256, -1.944, 1.168, -0.368, -0.714, -0.51, 0.454, 1.148], [-0.32, 0.29, -1.309, -0.177, 0.453, 0.636, -0.024, 0.509, 0.931, -1.754, -1.575, 0.786, 0.046, -1.165, -1.416, 1.373, 1.293, -0.285, -1.541, -1.186, -0.106, -0.994, 2.001, 0.972, -0.02, 1.654, -0.236, 0.643, 1.02, 0.572, -0.914, -0.154], [0.7, -0.937, 0.441, 0.25, 0.78, -0.022, 0.282, -0.095, 1.558, -0.336, 1.706, 0.884, 1.28, 0.198, -0.796, 1.218, -1.769, 1.197, -0.342, -0.177, -0.645, 1.364, 0.008, -0.597, -0.484, -2.772, -0.696, -0.632, -0.34, -1.527, -0.562, 0.862], [2.504, 0.831, -1.271, -0.033, 0.298, -0.735, 1.339, 1.74, 0.233, -1.424, -0.819, -0.761, 0.291, 0.853, -0.092, -0.885, 0.164, 1.025, 0.907, 0.749, -1.515, -0.545, -1.365, 0.271, 0.034, -2.005, 0.031, 0.244, 0.621, 0.176, 0.336, -1.196], [-0.711, 0.591, -1.001, -0.946, 0.784, -1.66, 1.545, 0.799, -0.857, 1.148, 0.213, -0.285, 0.464, -0.139, 0.79, -1.663, -1.121, 0.575, -0.178, -0.508, 1.565, -0.242, -0.346, 1.024, -1.135, -0.158, -2.101, 0.275, 2.009, -0.425, 0.716, 0.981], [0.912, -1.186, -0.846, -0.421, -1.315, -0.827, 0.309, 0.533, 1.029, -2.343, 1.513, -1.238, 1.487, -0.849, 0.896, -0.927, -0.459, 0.159, 0.177, 0.873, 0.935, 1.433, -0.485, 0.737, 1.327, -0.338, 1.608, -0.47, -0.445, -1.118, -0.213, -0.446], [-0.434, -1.362, -1.098, -1.068, 1.507, 0.003, 0.413, -0.395, 0.897, -0.237, 1.405, -0.344, 1.693, 0.677, 0.097, -0.257, -0.602, 1.026, -1.229, 0.855, -0.713, 1.014, 0.443, 0.238, 0.425, -2.184, 1.933, -1.157, -1.132, -0.597, -0.785, 0.967], [0.58, -0.971, 0.789, -0.468, -0.576, 1.779, 1.747, 1.715, -1.939, 0.125, 0.656, -0.042, -1.024, -1.767, 0.107, -0.408, -0.866, -1.774, 1.248, 0.939, -0.033, 1.523, 1.168, -0.744, 0.209, -0.168, -0.316, 0.207, -0.432, 0.047, -0.646, -0.664], [-0.185, -0.613, -1.695, 1.602, -0.32, -0.277, 0.967, 0.728, -0.965, -0.234, 1.069, -0.63, -1.631, 0.711, 0.426, 1.298, -0.191, -0.467, -0.771, 0.971, -0.118, -1.577, -2.064, -0.055, -0.59, 0.642, -0.997, 1.251, 0.538, 1.367, 0.106, 1.704]]]) # fmt: skip - - # test with various tokenizer parameters - tokenize_kwargs = {"max_length": 3} - outputs = feature_extractor("This is a test", tokenize_kwargs=tokenize_kwargs) - self.assertEqual(np.squeeze(outputs).shape, (3, 32)) - - tokenize_kwargs = {"truncation": True, "padding": True, "max_length": 4} - outputs = feature_extractor( - ["This is a test", "This", "This is", "This is a", "This is a test test test test"], - tokenize_kwargs=tokenize_kwargs, - ) - self.assertEqual(np.squeeze(outputs).shape, (5, 4, 32)) - - tokenize_kwargs = {"padding": True, "max_length": 4} - outputs = feature_extractor( - ["This is a test", "This", "This is", "This is a", "This is a test test test test"], - truncation=True, - tokenize_kwargs=tokenize_kwargs, - ) - self.assertEqual(np.squeeze(outputs).shape, (5, 4, 32)) - - # raise value error if truncation parameter given for two places - tokenize_kwargs = {"truncation": True} - with self.assertRaises(ValueError): - _ = feature_extractor( - ["This is a test", "This", "This is", "This is a", "This is a test test test test"], - truncation=True, - tokenize_kwargs=tokenize_kwargs, - ) - @require_torch def test_return_tensors_pt(self): feature_extractor = pipeline( @@ -150,14 +96,6 @@ class FeatureExtractionPipelineTests(unittest.TestCase): outputs = feature_extractor("This is a test", return_tensors=True) self.assertTrue(torch.is_tensor(outputs)) - @require_tf - def test_return_tensors_tf(self): - feature_extractor = pipeline( - task="feature-extraction", model="hf-internal-testing/tiny-random-distilbert", framework="tf" - ) - outputs = feature_extractor("This is a test", return_tensors=True) - self.assertTrue(tf.is_tensor(outputs)) - def get_shape(self, input_, shape=None): if shape is None: shape = [] diff --git a/tests/pipelines/test_pipelines_fill_mask.py b/tests/pipelines/test_pipelines_fill_mask.py index 14061eaef7c..fc563f9edd4 100644 --- a/tests/pipelines/test_pipelines_fill_mask.py +++ b/tests/pipelines/test_pipelines_fill_mask.py @@ -22,7 +22,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_torch_available, nested_simplify, - require_tf, require_torch, require_torch_accelerator, slow, @@ -44,47 +43,6 @@ class FillMaskPipelineTests(unittest.TestCase): if is_torch_available(): backend_empty_cache(torch_device) - @require_tf - def test_small_model_tf(self): - unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", top_k=2, framework="tf") - outputs = unmasker("My name is ") - self.assertEqual( - nested_simplify(outputs, decimals=6), - [ - {"sequence": "My name is grouped", "score": 2.1e-05, "token": 38015, "token_str": " grouped"}, - {"sequence": "My name is accuser", "score": 2.1e-05, "token": 25506, "token_str": " accuser"}, - ], - ) - - outputs = unmasker("The largest city in France is ") - self.assertEqual( - nested_simplify(outputs, decimals=6), - [ - { - "sequence": "The largest city in France is grouped", - "score": 2.1e-05, - "token": 38015, - "token_str": " grouped", - }, - { - "sequence": "The largest city in France is accuser", - "score": 2.1e-05, - "token": 25506, - "token_str": " accuser", - }, - ], - ) - - outputs = unmasker("My name is ", targets=[" Patrick", " Clara", " Teven"], top_k=3) - self.assertEqual( - nested_simplify(outputs, decimals=6), - [ - {"sequence": "My name is Clara", "score": 2e-05, "token": 13606, "token_str": " Clara"}, - {"sequence": "My name is Patrick", "score": 2e-05, "token": 3499, "token_str": " Patrick"}, - {"sequence": "My name is Te", "score": 1.9e-05, "token": 2941, "token_str": " Te"}, - ], - ) - @require_torch def test_small_model_pt(self): unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", top_k=2, framework="pt") @@ -172,12 +130,6 @@ class FillMaskPipelineTests(unittest.TestCase): unmasker = pipeline(task="fill-mask", model="distilbert/distilroberta-base", top_k=2, framework="pt") self.run_large_test(unmasker) - @slow - @require_tf - def test_large_model_tf(self): - unmasker = pipeline(task="fill-mask", model="distilbert/distilroberta-base", top_k=2, framework="tf") - self.run_large_test(unmasker) - def run_large_test(self, unmasker): outputs = unmasker("My name is ") self.assertEqual( @@ -244,13 +196,6 @@ class FillMaskPipelineTests(unittest.TestCase): unmasker.tokenizer.pad_token = None self.run_pipeline_test(unmasker, []) - @require_tf - def test_model_no_pad_tf(self): - unmasker = pipeline(task="fill-mask", model="sshleifer/tiny-distilroberta-base", framework="tf") - unmasker.tokenizer.pad_token_id = None - unmasker.tokenizer.pad_token = None - self.run_pipeline_test(unmasker, []) - def get_test_pipeline( self, model, diff --git a/tests/pipelines/test_pipelines_image_classification.py b/tests/pipelines/test_pipelines_image_classification.py index 17aec8bf35b..a57774211ec 100644 --- a/tests/pipelines/test_pipelines_image_classification.py +++ b/tests/pipelines/test_pipelines_image_classification.py @@ -29,7 +29,6 @@ from transformers.testing_utils import ( compare_pipeline_output_to_hub_spec, is_pipeline_test, nested_simplify, - require_tf, require_torch, require_torch_or_tf, require_vision, @@ -175,32 +174,6 @@ class ImageClassificationPipelineTests(unittest.TestCase): ], ) - @require_tf - def test_small_model_tf(self): - small_model = "hf-internal-testing/tiny-random-vit" - image_classifier = pipeline("image-classification", model=small_model, framework="tf") - - outputs = image_classifier("http://images.cocodataset.org/val2017/000000039769.jpg") - self.assertEqual( - nested_simplify(outputs, decimals=4), - [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}], - ) - - outputs = image_classifier( - [ - "http://images.cocodataset.org/val2017/000000039769.jpg", - "http://images.cocodataset.org/val2017/000000039769.jpg", - ], - top_k=2, - ) - self.assertEqual( - nested_simplify(outputs, decimals=4), - [ - [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}], - [{"label": "LABEL_1", "score": 0.574}, {"label": "LABEL_0", "score": 0.426}], - ], - ) - def test_custom_tokenizer(self): tokenizer = PreTrainedTokenizerBase() diff --git a/tests/pipelines/test_pipelines_image_feature_extraction.py b/tests/pipelines/test_pipelines_image_feature_extraction.py index d5d441bda69..e17d34714c3 100644 --- a/tests/pipelines/test_pipelines_image_feature_extraction.py +++ b/tests/pipelines/test_pipelines_image_feature_extraction.py @@ -22,20 +22,16 @@ from transformers import ( TF_MODEL_MAPPING, TOKENIZER_MAPPING, ImageFeatureExtractionPipeline, - is_tf_available, is_torch_available, is_vision_available, pipeline, ) -from transformers.testing_utils import is_pipeline_test, nested_simplify, require_tf, require_torch +from transformers.testing_utils import is_pipeline_test, nested_simplify, require_torch if is_torch_available(): import torch -if is_tf_available(): - import tensorflow as tf - if is_vision_available(): from PIL import Image @@ -73,28 +69,6 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase): nested_simplify(outputs[0]), [-0.056, 0.083, 0.021, 0.038, 0.242, -0.279, -0.033, -0.003, 0.200, -0.192, 0.045, -0.095, -0.077, 0.017, -0.058, -0.063, -0.029, -0.204, 0.014, 0.042, 0.305, -0.205, -0.099, 0.146, -0.287, 0.020, 0.168, -0.052, 0.046, 0.048, -0.156, 0.093]) # fmt: skip - @require_tf - def test_small_model_tf(self): - feature_extractor = pipeline( - task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler", framework="tf" - ) - img = prepare_img() - outputs = feature_extractor(img) - self.assertEqual( - nested_simplify(outputs[0][0]), - [-1.417, -0.392, -1.264, -1.196, 1.648, 0.885, 0.56, -0.606, -1.175, 0.823, 1.912, 0.081, -0.053, 1.119, -0.062, -1.757, -0.571, 0.075, 0.959, 0.118, 1.201, -0.672, -0.498, 0.364, 0.937, -1.623, 0.228, 0.19, 1.697, -1.115, 0.583, -0.981]) # fmt: skip - - @require_tf - def test_small_model_w_pooler_tf(self): - feature_extractor = pipeline( - task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit-w-pooler", framework="tf" - ) - img = prepare_img() - outputs = feature_extractor(img, pool=True) - self.assertEqual( - nested_simplify(outputs[0]), - [-0.056, 0.083, 0.021, 0.038, 0.242, -0.279, -0.033, -0.003, 0.200, -0.192, 0.045, -0.095, -0.077, 0.017, -0.058, -0.063, -0.029, -0.204, 0.014, 0.042, 0.305, -0.205, -0.099, 0.146, -0.287, 0.020, 0.168, -0.052, 0.046, 0.048, -0.156, 0.093]) # fmt: skip - @require_torch def test_image_processing_small_model_pt(self): feature_extractor = pipeline( @@ -117,28 +91,6 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase): outputs = feature_extractor(img, pool=True) self.assertEqual(np.squeeze(outputs).shape, (32,)) - @require_tf - def test_image_processing_small_model_tf(self): - feature_extractor = pipeline( - task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="tf" - ) - - # test with image processor parameters - image_processor_kwargs = {"size": {"height": 300, "width": 300}} - img = prepare_img() - with pytest.raises(ValueError): - # Image doesn't match model input size - feature_extractor(img, image_processor_kwargs=image_processor_kwargs) - - image_processor_kwargs = {"image_mean": [0, 0, 0], "image_std": [1, 1, 1]} - img = prepare_img() - outputs = feature_extractor(img, image_processor_kwargs=image_processor_kwargs) - self.assertEqual(np.squeeze(outputs).shape, (226, 32)) - - # Test pooling option - outputs = feature_extractor(img, pool=True) - self.assertEqual(np.squeeze(outputs).shape, (32,)) - @require_torch def test_return_tensors_pt(self): feature_extractor = pipeline( @@ -148,15 +100,6 @@ class ImageFeatureExtractionPipelineTests(unittest.TestCase): outputs = feature_extractor(img, return_tensors=True) self.assertTrue(torch.is_tensor(outputs)) - @require_tf - def test_return_tensors_tf(self): - feature_extractor = pipeline( - task="image-feature-extraction", model="hf-internal-testing/tiny-random-vit", framework="tf" - ) - img = prepare_img() - outputs = feature_extractor(img, return_tensors=True) - self.assertTrue(tf.is_tensor(outputs)) - def get_test_pipeline( self, model, diff --git a/tests/pipelines/test_pipelines_image_segmentation.py b/tests/pipelines/test_pipelines_image_segmentation.py index 215a6180379..3860a39d6e8 100644 --- a/tests/pipelines/test_pipelines_image_segmentation.py +++ b/tests/pipelines/test_pipelines_image_segmentation.py @@ -39,7 +39,6 @@ from transformers.testing_utils import ( compare_pipeline_output_to_hub_spec, is_pipeline_test, nested_simplify, - require_tf, require_timm, require_torch, require_vision, @@ -202,11 +201,6 @@ class ImageSegmentationPipelineTests(unittest.TestCase): for output_element in single_output: compare_pipeline_output_to_hub_spec(output_element, ImageSegmentationOutputElement) - @require_tf - @unittest.skip(reason="Image segmentation not implemented in TF") - def test_small_model_tf(self): - pass - @require_torch def test_small_model_pt_no_panoptic(self): model_id = "hf-internal-testing/tiny-random-mobilevit" diff --git a/tests/pipelines/test_pipelines_mask_generation.py b/tests/pipelines/test_pipelines_mask_generation.py index 96ea5ae870b..d7ce7091583 100644 --- a/tests/pipelines/test_pipelines_mask_generation.py +++ b/tests/pipelines/test_pipelines_mask_generation.py @@ -29,7 +29,6 @@ from transformers.testing_utils import ( Expectations, is_pipeline_test, nested_simplify, - require_tf, require_torch, require_vision, slow, @@ -103,11 +102,6 @@ class MaskGenerationPipelineTests(unittest.TestCase): def run_pipeline_test(self, mask_generator, examples): pass - @require_tf - @unittest.skip(reason="Image segmentation not implemented in TF") - def test_small_model_tf(self): - pass - @slow @require_torch def test_small_model_pt(self): diff --git a/tests/pipelines/test_pipelines_object_detection.py b/tests/pipelines/test_pipelines_object_detection.py index fcc50ca5b2b..6e2e3ee77c3 100644 --- a/tests/pipelines/test_pipelines_object_detection.py +++ b/tests/pipelines/test_pipelines_object_detection.py @@ -30,7 +30,6 @@ from transformers.testing_utils import ( is_pipeline_test, nested_simplify, require_pytesseract, - require_tf, require_timm, require_torch, require_vision, @@ -128,11 +127,6 @@ class ObjectDetectionPipelineTests(unittest.TestCase): ) compare_pipeline_output_to_hub_spec(detected_object, ObjectDetectionOutputElement) - @require_tf - @unittest.skip(reason="Object detection not implemented in TF") - def test_small_model_tf(self): - pass - @require_torch def test_small_model_pt(self): model_id = "hf-internal-testing/tiny-detr-mobilenetsv3" diff --git a/tests/pipelines/test_pipelines_question_answering.py b/tests/pipelines/test_pipelines_question_answering.py index fbd70b2a099..2de1de20d2e 100644 --- a/tests/pipelines/test_pipelines_question_answering.py +++ b/tests/pipelines/test_pipelines_question_answering.py @@ -29,7 +29,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_torch_available, nested_simplify, - require_tf, require_torch, require_torch_or_tf, slow, @@ -296,17 +295,6 @@ class QAPipelineTests(unittest.TestCase): answers = [output["answer"] for output in outputs] self.assertEqual(len(answers), len(set(answers)), "There are duplicate answers in the outputs.") - @require_tf - def test_small_model_tf(self): - question_answerer = pipeline( - "question-answering", model="sshleifer/tiny-distilbert-base-cased-distilled-squad", framework="tf" - ) - outputs = question_answerer( - question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris." - ) - - self.assertEqual(nested_simplify(outputs), {"score": 0.011, "start": 0, "end": 11, "answer": "HuggingFace"}) - @slow @require_torch def test_large_model_pt(self): @@ -421,16 +409,6 @@ between them. It's straightforward to train your models with one before loading {"answer": "Jax, PyTorch and TensorFlow", "end": 1919, "score": 0.971, "start": 1892}, ) - @slow - @require_tf - def test_large_model_tf(self): - question_answerer = pipeline("question-answering", framework="tf") - outputs = question_answerer( - question="Where was HuggingFace founded ?", context="HuggingFace was founded in Paris." - ) - - self.assertEqual(nested_simplify(outputs), {"score": 0.979, "start": 27, "end": 32, "answer": "Paris"}) - @require_torch_or_tf class QuestionAnsweringArgumentHandlerTests(unittest.TestCase): diff --git a/tests/pipelines/test_pipelines_table_question_answering.py b/tests/pipelines/test_pipelines_table_question_answering.py index 3a72ea5dbda..1a5f2839e59 100644 --- a/tests/pipelines/test_pipelines_table_question_answering.py +++ b/tests/pipelines/test_pipelines_table_question_answering.py @@ -26,7 +26,6 @@ from transformers.testing_utils import ( is_pipeline_test, require_pandas, require_tensorflow_probability, - require_tf, require_torch, slow, ) @@ -38,111 +37,6 @@ class TQAPipelineTests(unittest.TestCase): # which are needed to generate automatic tests model_mapping = MODEL_FOR_TABLE_QUESTION_ANSWERING_MAPPING - @require_tensorflow_probability - @require_pandas - @require_tf - @require_torch - def test_small_model_tf(self): - model_id = "lysandre/tiny-tapas-random-wtq" - model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) - self.assertIsInstance(model.config.aggregation_labels, dict) - self.assertIsInstance(model.config.no_aggregation_label_index, int) - - table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20) - outputs = table_querier( - table={ - "actors": ["brad pitt", "leonardo di caprio", "george clooney"], - "age": ["56", "45", "59"], - "number of movies": ["87", "53", "69"], - "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], - }, - query="how many movies has george clooney played in?", - ) - self.assertEqual( - outputs, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - ) - outputs = table_querier( - table={ - "actors": ["brad pitt", "leonardo di caprio", "george clooney"], - "age": ["56", "45", "59"], - "number of movies": ["87", "53", "69"], - "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], - }, - query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], - ) - self.assertEqual( - outputs, - [ - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - ], - ) - outputs = table_querier( - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - }, - query=[ - "What repository has the largest number of stars?", - "Given that the numbers of stars defines if a repository is active, what repository is the most" - " active?", - "What is the number of repositories?", - "What is the average number of stars?", - "What is the total amount of stars?", - ], - ) - self.assertEqual( - outputs, - [ - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - {"answer": "AVERAGE > ", "coordinates": [], "cells": [], "aggregator": "AVERAGE"}, - ], - ) - - with self.assertRaises(ValueError): - table_querier(query="What does it do with empty context ?", table=None) - with self.assertRaises(ValueError): - table_querier(query="What does it do with empty context ?", table="") - with self.assertRaises(ValueError): - table_querier(query="What does it do with empty context ?", table={}) - with self.assertRaises(ValueError): - table_querier( - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - } - ) - with self.assertRaises(ValueError): - table_querier( - query="", - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - }, - ) - with self.assertRaises(ValueError): - table_querier( - query=None, - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - }, - ) - @require_torch def test_small_model_pt(self, torch_dtype="float32"): model_id = "lysandre/tiny-tapas-random-wtq" @@ -372,128 +266,6 @@ class TQAPipelineTests(unittest.TestCase): def test_slow_tokenizer_sqa_pt_fp16(self): self.test_slow_tokenizer_sqa_pt(torch_dtype="float16") - @require_tf - @require_tensorflow_probability - @require_pandas - @require_torch - def test_slow_tokenizer_sqa_tf(self): - model_id = "lysandre/tiny-tapas-random-sqa" - model = TFAutoModelForTableQuestionAnswering.from_pretrained(model_id, from_pt=True) - tokenizer = AutoTokenizer.from_pretrained(model_id) - table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20) - - inputs = { - "table": { - "actors": ["brad pitt", "leonardo di caprio", "george clooney"], - "age": ["56", "45", "59"], - "number of movies": ["87", "53", "69"], - "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], - }, - "query": ["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], - } - sequential_outputs = table_querier(**inputs, sequential=True) - batch_outputs = table_querier(**inputs, sequential=False) - - self.assertEqual(len(sequential_outputs), 3) - self.assertEqual(len(batch_outputs), 3) - self.assertEqual(sequential_outputs[0], batch_outputs[0]) - self.assertNotEqual(sequential_outputs[1], batch_outputs[1]) - # self.assertNotEqual(sequential_outputs[2], batch_outputs[2]) - - table_querier = TableQuestionAnsweringPipeline(model=model, tokenizer=tokenizer, max_new_tokens=20) - outputs = table_querier( - table={ - "actors": ["brad pitt", "leonardo di caprio", "george clooney"], - "age": ["56", "45", "59"], - "number of movies": ["87", "53", "69"], - "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], - }, - query="how many movies has george clooney played in?", - ) - self.assertEqual( - outputs, - {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, - ) - outputs = table_querier( - table={ - "actors": ["brad pitt", "leonardo di caprio", "george clooney"], - "age": ["56", "45", "59"], - "number of movies": ["87", "53", "69"], - "date of birth": ["7 february 1967", "10 june 1996", "28 november 1967"], - }, - query=["how many movies has george clooney played in?", "how old is he?", "what's his date of birth?"], - ) - self.assertEqual( - outputs, - [ - {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, - {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, - {"answer": "7 february 1967", "coordinates": [(0, 3)], "cells": ["7 february 1967"]}, - ], - ) - outputs = table_querier( - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - }, - query=[ - "What repository has the largest number of stars?", - "Given that the numbers of stars defines if a repository is active, what repository is the most" - " active?", - "What is the number of repositories?", - "What is the average number of stars?", - "What is the total amount of stars?", - ], - ) - self.assertEqual( - outputs, - [ - {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, - {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, - {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, - {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, - {"answer": "Python, Python", "coordinates": [(0, 3), (1, 3)], "cells": ["Python", "Python"]}, - ], - ) - - with self.assertRaises(ValueError): - table_querier(query="What does it do with empty context ?", table=None) - with self.assertRaises(ValueError): - table_querier(query="What does it do with empty context ?", table="") - with self.assertRaises(ValueError): - table_querier(query="What does it do with empty context ?", table={}) - with self.assertRaises(ValueError): - table_querier( - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - } - ) - with self.assertRaises(ValueError): - table_querier( - query="", - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - }, - ) - with self.assertRaises(ValueError): - table_querier( - query=None, - table={ - "Repository": ["Transformers", "Datasets", "Tokenizers"], - "Stars": ["36542", "4512", "3934"], - "Contributors": ["651", "77", "34"], - "Programming language": ["Python", "Python", "Rust, Python and NodeJS"], - }, - ) - @slow @require_torch def test_integration_wtq_pt(self, torch_dtype="float32"): diff --git a/tests/pipelines/test_pipelines_text_classification.py b/tests/pipelines/test_pipelines_text_classification.py index e059382b823..8f29bde9f8a 100644 --- a/tests/pipelines/test_pipelines_text_classification.py +++ b/tests/pipelines/test_pipelines_text_classification.py @@ -24,7 +24,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_torch_available, nested_simplify, - require_tf, require_torch, require_torch_bf16, require_torch_fp16, @@ -152,15 +151,6 @@ class TextClassificationPipelineTests(unittest.TestCase): outputs = text_classifier("This is great !") self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}]) - @require_tf - def test_small_model_tf(self): - text_classifier = pipeline( - task="text-classification", model="hf-internal-testing/tiny-random-distilbert", framework="tf" - ) - - outputs = text_classifier("This is great !") - self.assertEqual(nested_simplify(outputs), [{"label": "LABEL_0", "score": 0.504}]) - @slow @require_torch def test_pt_bert(self): @@ -173,18 +163,6 @@ class TextClassificationPipelineTests(unittest.TestCase): outputs = text_classifier("Birds are a type of animal") self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) - @slow - @require_tf - def test_tf_bert(self): - text_classifier = pipeline("text-classification", framework="tf") - - outputs = text_classifier("This is great !") - self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 1.0}]) - outputs = text_classifier("This is bad !") - self.assertEqual(nested_simplify(outputs), [{"label": "NEGATIVE", "score": 1.0}]) - outputs = text_classifier("Birds are a type of animal") - self.assertEqual(nested_simplify(outputs), [{"label": "POSITIVE", "score": 0.988}]) - def get_test_pipeline( self, model, diff --git a/tests/pipelines/test_pipelines_token_classification.py b/tests/pipelines/test_pipelines_token_classification.py index 643e4d6675d..16767b342c8 100644 --- a/tests/pipelines/test_pipelines_token_classification.py +++ b/tests/pipelines/test_pipelines_token_classification.py @@ -29,7 +29,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_torch_available, nested_simplify, - require_tf, require_torch, require_torch_accelerator, slow, @@ -823,26 +822,6 @@ class TokenClassificationPipelineTests(unittest.TestCase): [("▁I", False), ("▁play", False), ("▁the", False), ("▁there", False), ("min", True)], ) - @require_tf - def test_tf_only(self): - model_name = "hf-internal-testing/tiny-random-bert-tf-only" # This model only has a TensorFlow version - # We test that if we don't specify framework='tf', it gets detected automatically - token_classifier = pipeline(task="ner", model=model_name) - self.assertEqual(token_classifier.framework, "tf") - - @require_tf - def test_small_model_tf(self): - model_name = "hf-internal-testing/tiny-bert-for-token-classification" - token_classifier = pipeline(task="token-classification", model=model_name, framework="tf") - outputs = token_classifier("This is a test !") - self.assertEqual( - nested_simplify(outputs), - [ - {"entity": "I-MISC", "score": 0.115, "index": 1, "word": "this", "start": 0, "end": 4}, - {"entity": "I-MISC", "score": 0.115, "index": 2, "word": "is", "start": 5, "end": 7}, - ], - ) - @require_torch def test_no_offset_tokenizer(self): model_name = "hf-internal-testing/tiny-bert-for-token-classification" diff --git a/tests/pipelines/test_pipelines_video_classification.py b/tests/pipelines/test_pipelines_video_classification.py index 6dbe324ed3d..5043c1f6b32 100644 --- a/tests/pipelines/test_pipelines_video_classification.py +++ b/tests/pipelines/test_pipelines_video_classification.py @@ -23,7 +23,6 @@ from transformers.testing_utils import ( is_pipeline_test, nested_simplify, require_av, - require_tf, require_torch, require_torch_or_tf, require_vision, @@ -124,8 +123,3 @@ class VideoClassificationPipelineTests(unittest.TestCase): for output in outputs: for element in output: compare_pipeline_output_to_hub_spec(element, VideoClassificationOutputElement) - - @require_tf - @unittest.skip - def test_small_model_tf(self): - pass diff --git a/tests/pipelines/test_pipelines_visual_question_answering.py b/tests/pipelines/test_pipelines_visual_question_answering.py index a4b1a9b7957..8066c885bfd 100644 --- a/tests/pipelines/test_pipelines_visual_question_answering.py +++ b/tests/pipelines/test_pipelines_visual_question_answering.py @@ -22,7 +22,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_torch_available, nested_simplify, - require_tf, require_torch, require_torch_accelerator, require_vision, @@ -246,8 +245,3 @@ class VisualQuestionAnsweringPipelineTests(unittest.TestCase): [{"score": ANY(float), "answer": ANY(str)}], ], ) - - @require_tf - @unittest.skip(reason="Visual question answering not implemented in TF") - def test_small_model_tf(self): - pass diff --git a/tests/pipelines/test_pipelines_zero_shot.py b/tests/pipelines/test_pipelines_zero_shot.py index bfd2b1518a3..17553915f43 100644 --- a/tests/pipelines/test_pipelines_zero_shot.py +++ b/tests/pipelines/test_pipelines_zero_shot.py @@ -25,7 +25,6 @@ from transformers.testing_utils import ( is_pipeline_test, is_torch_available, nested_simplify, - require_tf, require_torch, slow, ) @@ -243,26 +242,6 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase): }, ) - @require_tf - def test_small_model_tf(self): - zero_shot_classifier = pipeline( - "zero-shot-classification", - model="sshleifer/tiny-distilbert-base-cased-distilled-squad", - framework="tf", - ) - outputs = zero_shot_classifier( - "Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"] - ) - - self.assertEqual( - nested_simplify(outputs), - { - "sequence": "Who are you voting for in 2020?", - "labels": ["science", "public health", "politics"], - "scores": [0.333, 0.333, 0.333], - }, - ) - @slow @require_torch def test_large_model_pt(self): @@ -319,60 +298,3 @@ class ZeroShotClassificationPipelineTests(unittest.TestCase): "scores": [0.817, 0.713, 0.018, 0.018], }, ) - - @slow - @require_tf - def test_large_model_tf(self): - zero_shot_classifier = pipeline( - "zero-shot-classification", model="FacebookAI/roberta-large-mnli", framework="tf" - ) - outputs = zero_shot_classifier( - "Who are you voting for in 2020?", candidate_labels=["politics", "public health", "science"] - ) - - self.assertEqual( - nested_simplify(outputs), - { - "sequence": "Who are you voting for in 2020?", - "labels": ["politics", "public health", "science"], - "scores": [0.976, 0.015, 0.009], - }, - ) - outputs = zero_shot_classifier( - "The dominant sequence transduction models are based on complex recurrent or convolutional neural networks" - " in an encoder-decoder configuration. The best performing models also connect the encoder and decoder" - " through an attention mechanism. We propose a new simple network architecture, the Transformer, based" - " solely on attention mechanisms, dispensing with recurrence and convolutions entirely. Experiments on two" - " machine translation tasks show these models to be superior in quality while being more parallelizable" - " and requiring significantly less time to train. Our model achieves 28.4 BLEU on the WMT 2014" - " English-to-German translation task, improving over the existing best results, including ensembles by" - " over 2 BLEU. On the WMT 2014 English-to-French translation task, our model establishes a new" - " single-model state-of-the-art BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small" - " fraction of the training costs of the best models from the literature. We show that the Transformer" - " generalizes well to other tasks by applying it successfully to English constituency parsing both with" - " large and limited training data.", - candidate_labels=["machine learning", "statistics", "translation", "vision"], - multi_label=True, - ) - self.assertEqual( - nested_simplify(outputs), - { - "sequence": ( - "The dominant sequence transduction models are based on complex recurrent or convolutional neural" - " networks in an encoder-decoder configuration. The best performing models also connect the" - " encoder and decoder through an attention mechanism. We propose a new simple network" - " architecture, the Transformer, based solely on attention mechanisms, dispensing with recurrence" - " and convolutions entirely. Experiments on two machine translation tasks show these models to be" - " superior in quality while being more parallelizable and requiring significantly less time to" - " train. Our model achieves 28.4 BLEU on the WMT 2014 English-to-German translation task," - " improving over the existing best results, including ensembles by over 2 BLEU. On the WMT 2014" - " English-to-French translation task, our model establishes a new single-model state-of-the-art" - " BLEU score of 41.8 after training for 3.5 days on eight GPUs, a small fraction of the training" - " costs of the best models from the literature. We show that the Transformer generalizes well to" - " other tasks by applying it successfully to English constituency parsing both with large and" - " limited training data." - ), - "labels": ["translation", "machine learning", "vision", "statistics"], - "scores": [0.817, 0.713, 0.018, 0.018], - }, - ) diff --git a/tests/pipelines/test_pipelines_zero_shot_image_classification.py b/tests/pipelines/test_pipelines_zero_shot_image_classification.py index bbeaeff3c17..39cc712ab72 100644 --- a/tests/pipelines/test_pipelines_zero_shot_image_classification.py +++ b/tests/pipelines/test_pipelines_zero_shot_image_classification.py @@ -22,7 +22,6 @@ from transformers.testing_utils import ( compare_pipeline_output_to_hub_spec, is_pipeline_test, nested_simplify, - require_tf, require_torch, require_vision, slow, @@ -137,57 +136,6 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase): def test_small_model_pt_fp16(self): self.test_small_model_pt(torch_dtype="float16") - @require_tf - def test_small_model_tf(self): - image_classifier = pipeline( - model="hf-internal-testing/tiny-random-clip-zero-shot-image-classification", framework="tf" - ) - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - output = image_classifier(image, candidate_labels=["a", "b", "c"]) - - self.assertEqual( - nested_simplify(output), - [{"score": 0.333, "label": "a"}, {"score": 0.333, "label": "b"}, {"score": 0.333, "label": "c"}], - ) - - output = image_classifier([image] * 5, candidate_labels=["A", "B", "C"], batch_size=2) - self.assertEqual( - nested_simplify(output), - # Pipeline outputs are supposed to be deterministic and - # So we could in theory have real values "A", "B", "C" instead - # of ANY(str). - # However it seems that in this particular case, the floating - # scores are so close, we enter floating error approximation - # and the order is not guaranteed anymore with batching. - [ - [ - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - ], - [ - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - ], - [ - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - ], - [ - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - ], - [ - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - {"score": 0.333, "label": ANY(str)}, - ], - ], - ) - @slow @require_torch def test_large_model_pt(self): @@ -221,37 +169,6 @@ class ZeroShotImageClassificationPipelineTests(unittest.TestCase): * 5, ) - @slow - @require_tf - def test_large_model_tf(self): - image_classifier = pipeline( - task="zero-shot-image-classification", model="openai/clip-vit-base-patch32", framework="tf" - ) - # This is an image of 2 cats with remotes and no planes - image = Image.open("./tests/fixtures/tests_samples/COCO/000000039769.png") - output = image_classifier(image, candidate_labels=["cat", "plane", "remote"]) - self.assertEqual( - nested_simplify(output), - [ - {"score": 0.511, "label": "remote"}, - {"score": 0.485, "label": "cat"}, - {"score": 0.004, "label": "plane"}, - ], - ) - - output = image_classifier([image] * 5, candidate_labels=["cat", "plane", "remote"], batch_size=2) - self.assertEqual( - nested_simplify(output), - [ - [ - {"score": 0.511, "label": "remote"}, - {"score": 0.485, "label": "cat"}, - {"score": 0.004, "label": "plane"}, - ], - ] - * 5, - ) - @slow @require_torch def test_siglip_model_pt(self): diff --git a/tests/pipelines/test_pipelines_zero_shot_object_detection.py b/tests/pipelines/test_pipelines_zero_shot_object_detection.py index 5ed48de3610..8d5afbe3ded 100644 --- a/tests/pipelines/test_pipelines_zero_shot_object_detection.py +++ b/tests/pipelines/test_pipelines_zero_shot_object_detection.py @@ -23,7 +23,6 @@ from transformers import ( from transformers.testing_utils import ( is_pipeline_test, nested_simplify, - require_tf, require_torch, require_vision, slow, @@ -90,11 +89,6 @@ class ZeroShotObjectDetectionPipelineTests(unittest.TestCase): ], ) - @require_tf - @unittest.skip(reason="Zero Shot Object Detection not implemented in TF") - def test_small_model_tf(self): - pass - @require_torch def test_small_model_pt(self): object_detector = pipeline( @@ -201,11 +195,6 @@ class ZeroShotObjectDetectionPipelineTests(unittest.TestCase): ], ) - @require_tf - @unittest.skip(reason="Zero Shot Object Detection not implemented in TF") - def test_large_model_tf(self): - pass - @require_torch @slow def test_threshold(self): diff --git a/tests/test_image_transforms.py b/tests/test_image_transforms.py index 3d3b84c7e81..b18d79ec98a 100644 --- a/tests/test_image_transforms.py +++ b/tests/test_image_transforms.py @@ -17,16 +17,13 @@ import unittest import numpy as np from parameterized import parameterized -from transformers.testing_utils import require_flax, require_tf, require_torch, require_vision -from transformers.utils.import_utils import is_flax_available, is_tf_available, is_torch_available, is_vision_available +from transformers.testing_utils import require_flax, require_torch, require_vision +from transformers.utils.import_utils import is_flax_available, is_torch_available, is_vision_available if is_torch_available(): import torch -if is_tf_available(): - import tensorflow as tf - if is_flax_available(): import jax @@ -122,20 +119,6 @@ class ImageTransformsTester(unittest.TestCase): self.assertTrue(np_img.min() == 0) self.assertTrue(np_img.max() == 1) - @require_tf - def test_to_pil_image_from_tensorflow(self): - # channels_first - image = tf.random.uniform((3, 4, 5)) - pil_image = to_pil_image(image) - self.assertIsInstance(pil_image, PIL.Image.Image) - self.assertEqual(pil_image.size, (5, 4)) - - # channels_last - image = tf.random.uniform((4, 5, 3)) - pil_image = to_pil_image(image) - self.assertIsInstance(pil_image, PIL.Image.Image) - self.assertEqual(pil_image.size, (5, 4)) - @require_torch def test_to_pil_image_from_torch(self): # channels first diff --git a/tests/test_sequence_feature_extraction_common.py b/tests/test_sequence_feature_extraction_common.py index cde16deb75e..6fd55978e4c 100644 --- a/tests/test_sequence_feature_extraction_common.py +++ b/tests/test_sequence_feature_extraction_common.py @@ -16,7 +16,7 @@ import numpy as np from transformers import BatchFeature -from transformers.testing_utils import require_tf, require_torch +from transformers.testing_utils import require_torch from .test_feature_extraction_common import FeatureExtractionSavingTestMixin @@ -76,24 +76,6 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin): == (self.feat_extract_tester.batch_size, len(speech_inputs[0]), self.feat_extract_tester.feature_size) ) - @require_tf - def test_batch_feature_tf(self): - speech_inputs = self.feat_extract_tester.prepare_inputs_for_common(equal_length=True) - feat_extract = self.feature_extraction_class(**self.feat_extract_dict) - input_name = feat_extract.model_input_names[0] - - processed_features = BatchFeature({input_name: speech_inputs}, tensor_type="tf") - - batch_features_input = processed_features[input_name] - - if len(batch_features_input.shape) < 3: - batch_features_input = batch_features_input[:, :, None] - - self.assertTrue( - batch_features_input.shape - == (self.feat_extract_tester.batch_size, len(speech_inputs[0]), self.feat_extract_tester.feature_size) - ) - def _check_padding(self, numpify=False): def _inputs_have_equal_length(input): length = len(input[0]) @@ -372,19 +354,6 @@ class SequenceFeatureExtractionTestMixin(FeatureExtractionSavingTestMixin): self.assertTrue(abs(input_np.astype(np.float32).sum() - input_pt.numpy().astype(np.float32).sum()) < 1e-2) - @require_tf - def test_padding_accepts_tensors_tf(self): - feat_extract = self.feature_extraction_class(**self.feat_extract_dict) - speech_inputs = self.feat_extract_tester.prepare_inputs_for_common() - input_name = feat_extract.model_input_names[0] - - processed_features = BatchFeature({input_name: speech_inputs}) - - input_np = feat_extract.pad(processed_features, padding="longest", return_tensors="np")[input_name] - input_tf = feat_extract.pad(processed_features, padding="longest", return_tensors="tf")[input_name] - - self.assertTrue(abs(input_np.astype(np.float32).sum() - input_tf.numpy().astype(np.float32).sum()) < 1e-2) - def test_attention_mask(self): feat_dict = self.feat_extract_dict feat_dict["return_attention_mask"] = True diff --git a/tests/test_tokenization_common.py b/tests/test_tokenization_common.py index b1749f281e6..b18fa36f095 100644 --- a/tests/test_tokenization_common.py +++ b/tests/test_tokenization_common.py @@ -53,7 +53,6 @@ from transformers.testing_utils import ( get_tests_dir, require_jinja, require_read_token, - require_tf, require_tokenizers, require_torch, run_test_in_subprocess, @@ -3106,40 +3105,6 @@ class TokenizerTesterMixin: # model(**encoded_sequence_fast) # model(**batch_encoded_sequence_fast) - @require_tf - @slow - def test_tf_encode_plus_sent_to_model(self): - from transformers import TF_MODEL_MAPPING, TOKENIZER_MAPPING - - MODEL_TOKENIZER_MAPPING = merge_model_tokenizer_mappings(TF_MODEL_MAPPING, TOKENIZER_MAPPING) - - tokenizers = self.get_tokenizers(do_lower_case=False) - for tokenizer in tokenizers: - with self.subTest(f"{tokenizer.__class__.__name__}"): - if tokenizer.__class__ not in MODEL_TOKENIZER_MAPPING: - self.skipTest(f"{tokenizer.__class__.__name__} is not in the MODEL_TOKENIZER_MAPPING") - - config_class, model_class = MODEL_TOKENIZER_MAPPING[tokenizer.__class__] - config = config_class() - - if config.is_encoder_decoder or config.pad_token_id is None: - self.skipTest(reason="Model is not an encoder-decoder model or has no set pad token id") - - model = model_class(config) - - # Make sure the model contains at least the full vocabulary size in its embedding matrix - self.assertGreaterEqual(model.config.vocab_size, len(tokenizer)) - - # Build sequence - first_ten_tokens = list(tokenizer.get_vocab().keys())[:10] - sequence = " ".join(first_ten_tokens) - encoded_sequence = tokenizer.encode_plus(sequence, return_tensors="tf") - batch_encoded_sequence = tokenizer.batch_encode_plus([sequence, sequence], return_tensors="tf") - - # This should not fail - model(encoded_sequence) - model(batch_encoded_sequence) - # TODO: Check if require_torch is the best to test for numpy here ... Maybe move to require_flax when available @require_torch @slow diff --git a/tests/tokenization/test_tokenization_utils.py b/tests/tokenization/test_tokenization_utils.py index ce70863c345..0a2960672c3 100644 --- a/tests/tokenization/test_tokenization_utils.py +++ b/tests/tokenization/test_tokenization_utils.py @@ -39,7 +39,6 @@ from transformers.testing_utils import ( CaptureStderr, require_flax, require_sentencepiece, - require_tf, require_tokenizers, require_torch, slow, @@ -121,27 +120,6 @@ class TokenizerUtilsTest(unittest.TestCase): tokenizer_r("Small example to encode", return_tensors=TensorType.NUMPY), np.array_equal ) - @require_tf - @require_tokenizers - def test_batch_encoding_pickle_tf(self): - import tensorflow as tf - - def tf_array_equals(t1, t2): - return tf.reduce_all(tf.equal(t1, t2)) - - tokenizer_p = BertTokenizer.from_pretrained("google-bert/bert-base-cased") - tokenizer_r = BertTokenizerFast.from_pretrained("google-bert/bert-base-cased") - - with self.subTest("BatchEncoding (Python, return_tensors=TENSORFLOW)"): - self.assert_dump_and_restore( - tokenizer_p("Small example to encode", return_tensors=TensorType.TENSORFLOW), tf_array_equals - ) - - with self.subTest("BatchEncoding (Rust, return_tensors=TENSORFLOW)"): - self.assert_dump_and_restore( - tokenizer_r("Small example to encode", return_tensors=TensorType.TENSORFLOW), tf_array_equals - ) - @require_torch @require_tokenizers def test_batch_encoding_pickle_pt(self): @@ -211,22 +189,6 @@ class TokenizerUtilsTest(unittest.TestCase): self.assertEqual(tensor_batch["inputs"].shape, (1, 3)) self.assertEqual(tensor_batch["labels"].shape, (1,)) - @require_tf - def test_batch_encoding_with_labels_tf(self): - batch = BatchEncoding({"inputs": [[1, 2, 3], [4, 5, 6]], "labels": [0, 1]}) - tensor_batch = batch.convert_to_tensors(tensor_type="tf") - self.assertEqual(tensor_batch["inputs"].shape, (2, 3)) - self.assertEqual(tensor_batch["labels"].shape, (2,)) - # test converting the converted - with CaptureStderr() as cs: - tensor_batch = batch.convert_to_tensors(tensor_type="tf") - self.assertFalse(len(cs.err), msg=f"should have no warning, but got {cs.err}") - - batch = BatchEncoding({"inputs": [1, 2, 3], "labels": 0}) - tensor_batch = batch.convert_to_tensors(tensor_type="tf", prepend_batch_axis=True) - self.assertEqual(tensor_batch["inputs"].shape, (1, 3)) - self.assertEqual(tensor_batch["labels"].shape, (1,)) - @require_flax def test_batch_encoding_with_labels_jax(self): batch = BatchEncoding({"inputs": [[1, 2, 3], [4, 5, 6]], "labels": [0, 1]}) @@ -381,20 +343,6 @@ class TokenizerUtilsTest(unittest.TestCase): self.assertTrue(isinstance(batch["input_ids"], torch.Tensor)) self.assertEqual(batch["input_ids"].tolist(), [[0, 1, 2, tokenizer.pad_token_id], [0, 1, 2, 3]]) - @require_tf - def test_padding_accepts_tensors_tf(self): - import tensorflow as tf - - features = [{"input_ids": tf.constant([0, 1, 2])}, {"input_ids": tf.constant([0, 1, 2, 3])}] - tokenizer = BertTokenizer.from_pretrained("google-bert/bert-base-cased") - - batch = tokenizer.pad(features, padding=True) - self.assertTrue(isinstance(batch["input_ids"], tf.Tensor)) - self.assertEqual(batch["input_ids"].numpy().tolist(), [[0, 1, 2, tokenizer.pad_token_id], [0, 1, 2, 3]]) - batch = tokenizer.pad(features, padding=True, return_tensors="tf") - self.assertTrue(isinstance(batch["input_ids"], tf.Tensor)) - self.assertEqual(batch["input_ids"].numpy().tolist(), [[0, 1, 2, tokenizer.pad_token_id], [0, 1, 2, 3]]) - @require_tokenizers def test_instantiation_from_tokenizers(self): bert_tokenizer = Tokenizer(WordPiece(unk_token="[UNK]")) diff --git a/tests/trainer/test_data_collator.py b/tests/trainer/test_data_collator.py index d4360c32c90..d25aa7ceba9 100644 --- a/tests/trainer/test_data_collator.py +++ b/tests/trainer/test_data_collator.py @@ -29,20 +29,16 @@ from transformers import ( DataCollatorWithFlattening, DataCollatorWithPadding, default_data_collator, - is_tf_available, is_torch_available, set_seed, ) -from transformers.testing_utils import require_tf, require_torch +from transformers.testing_utils import require_torch from transformers.utils import PaddingStrategy if is_torch_available(): import torch -if is_tf_available(): - import tensorflow as tf - @require_torch class DataCollatorIntegrationTest(unittest.TestCase): @@ -1022,795 +1018,6 @@ class DataCollatorImmutabilityTest(unittest.TestCase): ) -@require_tf -class TFDataCollatorIntegrationTest(unittest.TestCase): - def setUp(self): - super().setUp() - self.tmpdirname = tempfile.mkdtemp() - - vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] - self.vocab_file = os.path.join(self.tmpdirname, "vocab.txt") - with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) - - def tearDown(self): - shutil.rmtree(self.tmpdirname) - - def test_default_with_dict(self): - features = [{"label": i, "inputs": [0, 1, 2, 3, 4, 5]} for i in range(8)] - batch = default_data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].numpy().tolist(), list(range(8))) - self.assertEqual(batch["labels"].dtype, tf.int64) - self.assertEqual(batch["inputs"].shape.as_list(), [8, 6]) - - # With label_ids - features = [{"label_ids": [0, 1, 2], "inputs": [0, 1, 2, 3, 4, 5]} for i in range(8)] - batch = default_data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].numpy().tolist(), ([[0, 1, 2]] * 8)) - self.assertEqual(batch["labels"].dtype, tf.int64) - self.assertEqual(batch["inputs"].shape.as_list(), [8, 6]) - - # Features can already be tensors - features = [{"label": i, "inputs": np.random.randint(0, 10, [10])} for i in range(8)] - batch = default_data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].numpy().tolist(), (list(range(8)))) - self.assertEqual(batch["labels"].dtype, tf.int64) - self.assertEqual(batch["inputs"].shape.as_list(), [8, 10]) - - # Labels can already be tensors - features = [{"label": np.array(i), "inputs": np.random.randint(0, 10, [10])} for i in range(8)] - batch = default_data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].dtype, tf.int64) - self.assertEqual(batch["labels"].numpy().tolist(), list(range(8))) - self.assertEqual(batch["labels"].dtype, tf.int64) - self.assertEqual(batch["inputs"].shape.as_list(), [8, 10]) - - def test_numpy_dtype_preservation(self): - data_collator = default_data_collator - - # Confirms that numpy inputs are handled correctly even when scalars - features = [{"input_ids": np.array([0, 1, 2, 3, 4]), "label": np.int64(i)} for i in range(4)] - batch = data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].dtype, tf.int64) - - def test_default_classification_and_regression(self): - data_collator = default_data_collator - - features = [{"input_ids": [0, 1, 2, 3, 4], "label": i} for i in range(4)] - batch = data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].dtype, tf.int64) - - features = [{"input_ids": [0, 1, 2, 3, 4], "label": float(i)} for i in range(4)] - batch = data_collator(features, return_tensors="tf") - self.assertEqual(batch["labels"].dtype, tf.float32) - - def test_default_with_no_labels(self): - features = [{"label": None, "inputs": [0, 1, 2, 3, 4, 5]} for i in range(8)] - batch = default_data_collator(features, return_tensors="tf") - self.assertTrue("labels" not in batch) - self.assertEqual(batch["inputs"].shape.as_list(), [8, 6]) - - # With label_ids - features = [{"label_ids": None, "inputs": [0, 1, 2, 3, 4, 5]} for i in range(8)] - batch = default_data_collator(features, return_tensors="tf") - self.assertTrue("labels" not in batch) - self.assertEqual(batch["inputs"].shape.as_list(), [8, 6]) - - def test_data_collator_with_padding(self): - tokenizer = BertTokenizer(self.vocab_file) - features = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}] - - data_collator = DataCollatorWithPadding(tokenizer, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), [0, 1, 2] + [tokenizer.pad_token_id] * 3) - - data_collator = DataCollatorWithPadding(tokenizer, padding="max_length", max_length=10, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - - data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape, [2, 8]) - - def test_data_collator_for_token_classification(self): - tokenizer = BertTokenizer(self.vocab_file) - features = [ - {"input_ids": [0, 1, 2], "labels": [0, 1, 2]}, - {"input_ids": [0, 1, 2, 3, 4, 5], "labels": [0, 1, 2, 3, 4, 5]}, - ] - - data_collator = DataCollatorForTokenClassification(tokenizer, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), [0, 1, 2] + [tokenizer.pad_token_id] * 3) - self.assertEqual(batch["labels"].shape.as_list(), [2, 6]) - self.assertEqual(batch["labels"][0].numpy().tolist(), [0, 1, 2] + [-100] * 3) - - data_collator = DataCollatorForTokenClassification( - tokenizer, padding="max_length", max_length=10, return_tensors="tf" - ) - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - data_collator = DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 8]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 8]) - - data_collator = DataCollatorForTokenClassification(tokenizer, label_pad_token_id=-1, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), [0, 1, 2] + [tokenizer.pad_token_id] * 3) - self.assertEqual(batch["labels"].shape.as_list(), [2, 6]) - self.assertEqual(batch["labels"][0].numpy().tolist(), [0, 1, 2] + [-1] * 3) - - def test_data_collator_for_seq2seq(self): - def create_features(): - return [ - {"input_ids": list(range(3)), "labels": list(range(3))}, - {"input_ids": list(range(6)), "labels": list(range(6))}, - ] - - tokenizer = BertTokenizer(self.vocab_file) - features = create_features() - - data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, return_tensors="tf") - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3) - self.assertEqual(batch["input_ids"][1].numpy().tolist(), list(range(6))) - self.assertEqual(batch["labels"].shape.as_list(), [2, 6]) - self.assertEqual(batch["labels"][0].numpy().tolist(), list(range(3)) + [-100] * 3) - self.assertEqual(batch["labels"][1].numpy().tolist(), list(range(6))) - - data_collator = DataCollatorForSeq2Seq( - tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7, return_tensors="tf" - ) - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 7]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 4) - self.assertEqual(batch["input_ids"][1].numpy().tolist(), list(range(6)) + [tokenizer.pad_token_id] * 1) - self.assertEqual(batch["labels"].shape.as_list(), [2, 7]) - self.assertEqual(batch["labels"][0].numpy().tolist(), list(range(3)) + [-100] * 4) - self.assertEqual(batch["labels"][1].numpy().tolist(), list(range(6)) + [-100] * 1) - - data_collator = DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.DO_NOT_PAD, return_tensors="tf") - with self.assertRaises(ValueError): - # expects an error due to unequal shapes to create tensor - data_collator(features) - batch = data_collator([features[0], features[0]]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), features[0]["input_ids"]) - self.assertEqual(batch["input_ids"][1].numpy().tolist(), features[0]["input_ids"]) - self.assertEqual(batch["labels"][0].numpy().tolist(), features[0]["labels"]) - self.assertEqual(batch["labels"][1].numpy().tolist(), features[0]["labels"]) - - data_collator = DataCollatorForSeq2Seq( - tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8, return_tensors="tf" - ) - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 8]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 8]) - - # side effects on labels cause mismatch on longest strategy - features = create_features() - - data_collator = DataCollatorForSeq2Seq( - tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1, return_tensors="tf" - ) - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3) - self.assertEqual(batch["input_ids"][1].numpy().tolist(), list(range(6))) - self.assertEqual(batch["labels"].shape.as_list(), [2, 6]) - self.assertEqual(batch["labels"][0].numpy().tolist(), list(range(3)) + [-1] * 3) - self.assertEqual(batch["labels"][1].numpy().tolist(), list(range(6))) - - for feature in features: - feature.pop("labels") - - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 6]) - self.assertEqual(batch["input_ids"][0].numpy().tolist(), list(range(3)) + [tokenizer.pad_token_id] * 3) - - def _test_no_pad_and_pad(self, no_pad_features, pad_features): - tokenizer = BertTokenizer(self.vocab_file) - data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="tf") - batch = data_collator(no_pad_features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - batch = data_collator(pad_features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - data_collator = DataCollatorForLanguageModeling( - tokenizer, mlm=False, pad_to_multiple_of=8, return_tensors="tf" - ) - batch = data_collator(no_pad_features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 16]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 16]) - - batch = data_collator(pad_features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 16]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 16]) - - tokenizer.pad_token = None - data_collator = DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="tf") - with self.assertRaises(ValueError): - # Expect error due to padding token missing - data_collator(pad_features) - - set_seed(42) # For reproducibility - tokenizer = BertTokenizer(self.vocab_file) - data_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf") - batch = data_collator(no_pad_features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - masked_tokens = batch["input_ids"] == tokenizer.mask_token_id - self.assertTrue(tf.reduce_any(masked_tokens)) - # self.assertTrue(all(x == -100 for x in batch["labels"].numpy()[~masked_tokens.numpy()].tolist())) - - batch = data_collator(pad_features, return_tensors="tf") - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - masked_tokens = batch["input_ids"] == tokenizer.mask_token_id - self.assertTrue(tf.reduce_any(masked_tokens)) - # self.assertTrue(all(x == -100 for x in batch["labels"].numpy()[~masked_tokens.numpy()].tolist())) - - data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - batch = data_collator(no_pad_features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 16]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 16]) - - masked_tokens = batch["input_ids"] == tokenizer.mask_token_id - self.assertTrue(tf.reduce_any(masked_tokens)) - # self.assertTrue(all(x == -100 for x in batch["labels"].numpy()[~masked_tokens.numpy()].tolist())) - - batch = data_collator(pad_features, return_tensors="tf") - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 16]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 16]) - - masked_tokens = batch["input_ids"] == tokenizer.mask_token_id - self.assertTrue(tf.reduce_any(masked_tokens)) - # self.assertTrue(all(x == -100 for x in batch["labels"].numpy()[~masked_tokens.numpy()].tolist())) - - def test_probability_sum_error(self): - """Test that the sum of mask_replace_prob and random_replace_prob exceeding 1 raises an error.""" - tokenizer = BertTokenizer(self.vocab_file) - with self.assertRaises(ValueError): - DataCollatorForLanguageModeling(tokenizer=tokenizer, mask_replace_prob=0.9, random_replace_prob=0.2) - - def test_all_mask_replacement(self): - """Test behavior when mask_replace_prob=1.""" - tokenizer = BertTokenizer(self.vocab_file) - - # pytorch call - collator = DataCollatorForLanguageModeling( - tokenizer=tokenizer, mask_replace_prob=1, random_replace_prob=0, return_tensors="pt" - ) - - inputs = torch.tensor([0, 1, 2, 3, 4, 5]) - features = [{"input_ids": inputs} for _ in range(8)] - batch = collator(features) - - # confirm that every token is either the original token or [MASK] - self.assertTrue(torch.all((batch["input_ids"] == inputs) | (batch["input_ids"] == tokenizer.mask_token_id))) - - # tf call - collator = DataCollatorForLanguageModeling( - tokenizer=tokenizer, mask_replace_prob=1, random_replace_prob=0, return_tensors="tf" - ) - inputs = tf.constant([0, 1, 2, 3, 4, 5]) - features = [{"input_ids": inputs} for _ in range(8)] - batch = collator(features) - - # confirm that every token is either the original token or [MASK] - self.assertTrue( - tf.reduce_all( - (batch["input_ids"] == tf.cast(inputs, tf.int64)) | (batch["input_ids"] == tokenizer.mask_token_id) - ) - ) - - # numpy call - collator = DataCollatorForLanguageModeling( - tokenizer=tokenizer, mask_replace_prob=1, random_replace_prob=0, return_tensors="np" - ) - inputs = np.array([0, 1, 2, 3, 4, 5]) - features = [{"input_ids": inputs} for _ in range(8)] - batch = collator(features) - - # confirm that every token is either the original token or [MASK] - self.assertTrue(np.all((batch["input_ids"] == inputs) | (batch["input_ids"] == tokenizer.mask_token_id))) - - def test_data_collator_for_language_modeling(self): - no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] - pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}] - self._test_no_pad_and_pad(no_pad_features, pad_features) - - no_pad_features = [list(range(10)), list(range(10))] - pad_features = [list(range(5)), list(range(10))] - self._test_no_pad_and_pad(no_pad_features, pad_features) - - def test_data_collator_for_language_modeling_with_seed(self): - tokenizer = BertTokenizer(self.vocab_file) - features = [{"input_ids": list(range(1000))}, {"input_ids": list(range(1000))}] - - # check if seed is respected between two different DataCollatorForLanguageModeling instances - data_collator = DataCollatorForLanguageModeling(tokenizer, seed=42, return_tensors="tf") - batch_1 = data_collator(features) - self.assertEqual(batch_1["input_ids"].shape.as_list(), [2, 1000]) - self.assertEqual(batch_1["labels"].shape.as_list(), [2, 1000]) - - data_collator = DataCollatorForLanguageModeling(tokenizer, seed=42, return_tensors="tf") - batch_2 = data_collator(features) - self.assertEqual(batch_2["input_ids"].shape.as_list(), [2, 1000]) - self.assertEqual(batch_2["labels"].shape.as_list(), [2, 1000]) - - self.assertTrue(np.all(batch_1["input_ids"] == batch_2["input_ids"])) - self.assertTrue(np.all(batch_1["labels"] == batch_2["labels"])) - - # try with different seed - data_collator = DataCollatorForLanguageModeling(tokenizer, seed=43, return_tensors="tf") - batch_3 = data_collator(features) - self.assertEqual(batch_3["input_ids"].shape.as_list(), [2, 1000]) - self.assertEqual(batch_3["labels"].shape.as_list(), [2, 1000]) - - self.assertFalse(np.all(batch_1["input_ids"] == batch_3["input_ids"])) - self.assertFalse(np.all(batch_1["labels"] == batch_3["labels"])) - - def test_data_collator_for_whole_word_mask(self): - tokenizer = BertTokenizer(self.vocab_file) - data_collator = DataCollatorForWholeWordMask(tokenizer, return_tensors="tf") - - features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - # Features can already be tensors - features = [{"input_ids": np.arange(10)}, {"input_ids": np.arange(10)}] - batch = data_collator(features) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - def test_data_collator_for_whole_word_mask_with_seed(self): - tokenizer = BertTokenizer(self.vocab_file) - features = [{"input_ids": list(range(1000))}, {"input_ids": list(range(1000))}] - - # check if seed is respected between two different DataCollatorForWholeWordMask instances - data_collator = DataCollatorForWholeWordMask(tokenizer, seed=42, return_tensors="tf") - batch_1 = data_collator(features) - self.assertEqual(batch_1["input_ids"].shape.as_list(), [2, 1000]) - self.assertEqual(batch_1["labels"].shape.as_list(), [2, 1000]) - - data_collator = DataCollatorForWholeWordMask(tokenizer, seed=42, return_tensors="tf") - batch_2 = data_collator(features) - self.assertEqual(batch_2["input_ids"].shape.as_list(), [2, 1000]) - self.assertEqual(batch_2["labels"].shape.as_list(), [2, 1000]) - - self.assertTrue(np.all(batch_1["input_ids"] == batch_2["input_ids"])) - self.assertTrue(np.all(batch_1["labels"] == batch_2["labels"])) - - # try with different seed - data_collator = DataCollatorForWholeWordMask(tokenizer, seed=43, return_tensors="tf") - batch_3 = data_collator(features) - self.assertEqual(batch_3["input_ids"].shape.as_list(), [2, 1000]) - self.assertEqual(batch_3["labels"].shape.as_list(), [2, 1000]) - - self.assertFalse(np.all(batch_1["input_ids"] == batch_3["input_ids"])) - self.assertFalse(np.all(batch_1["labels"] == batch_3["labels"])) - - def test_plm(self): - tokenizer = BertTokenizer(self.vocab_file) - no_pad_features = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] - pad_features = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}] - - data_collator = DataCollatorForPermutationLanguageModeling(tokenizer, return_tensors="tf") - - batch = data_collator(pad_features) - self.assertIsInstance(batch, dict) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["perm_mask"].shape.as_list(), [2, 10, 10]) - self.assertEqual(batch["target_mapping"].shape.as_list(), [2, 10, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - batch = data_collator(no_pad_features) - self.assertIsInstance(batch, dict) - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 10]) - self.assertEqual(batch["perm_mask"].shape.as_list(), [2, 10, 10]) - self.assertEqual(batch["target_mapping"].shape.as_list(), [2, 10, 10]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 10]) - - example = [np.random.randint(0, 5, [5])] - with self.assertRaises(ValueError): - # Expect error due to odd sequence length - data_collator(example) - - def test_nsp(self): - tokenizer = BertTokenizer(self.vocab_file) - features = [ - {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i} - for i in range(2) - ] - data_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf") - batch = data_collator(features) - - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 5]) - self.assertEqual(batch["token_type_ids"].shape.as_list(), [2, 5]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 5]) - self.assertEqual(batch["next_sentence_label"].shape.as_list(), [2]) - - data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - batch = data_collator(features) - - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 8]) - self.assertEqual(batch["token_type_ids"].shape.as_list(), [2, 8]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 8]) - self.assertEqual(batch["next_sentence_label"].shape.as_list(), [2]) - - def test_sop(self): - tokenizer = BertTokenizer(self.vocab_file) - features = [ - { - "input_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]), - "token_type_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]), - "sentence_order_label": i, - } - for i in range(2) - ] - data_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf") - batch = data_collator(features) - - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 5]) - self.assertEqual(batch["token_type_ids"].shape.as_list(), [2, 5]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 5]) - self.assertEqual(batch["sentence_order_label"].shape.as_list(), [2]) - - data_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - batch = data_collator(features) - - self.assertEqual(batch["input_ids"].shape.as_list(), [2, 8]) - self.assertEqual(batch["token_type_ids"].shape.as_list(), [2, 8]) - self.assertEqual(batch["labels"].shape.as_list(), [2, 8]) - self.assertEqual(batch["sentence_order_label"].shape.as_list(), [2]) - - -@require_tf -class TFDataCollatorImmutabilityTest(unittest.TestCase): - def setUp(self): - self.tmpdirname = tempfile.mkdtemp() - - vocab_tokens = ["[UNK]", "[CLS]", "[SEP]", "[PAD]", "[MASK]"] - self.vocab_file = os.path.join(self.tmpdirname, "vocab.txt") - with open(self.vocab_file, "w", encoding="utf-8") as vocab_writer: - vocab_writer.write("".join([x + "\n" for x in vocab_tokens])) - - def tearDown(self): - shutil.rmtree(self.tmpdirname) - - def _turn_to_none(self, item): - """used to convert `item` to `None` type""" - return None - - def _validate_original_data_against_collated_data(self, collator, original_data, batch_data): - # we only care about side effects, the results are tested elsewhere - collator(batch_data) - - # we go through every item and convert to `primitive` datatypes if necessary - # then compares for equivalence for the original data and the data that has been passed through the collator - for original, batch in zip(original_data, batch_data): - for original_val, batch_val in zip(original.values(), batch.values()): - if isinstance(original_val, np.ndarray): - self.assertEqual(original_val.tolist(), batch_val.tolist()) - elif isinstance(original_val, tf.Tensor): - self.assertEqual(original_val.numpy().tolist(), batch_val.numpy().tolist()) - else: - self.assertEqual(original_val, batch_val) - - def _validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - self, collator, base_data, input_key, input_datatype, label_key, label_datatype, ignore_label=False - ): - # using the arguments to recreate the features with their respective (potentially new) datatypes - features_original = [ - {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])} - for sample in base_data - ] - features_batch = [ - {label_key: label_datatype(sample[label_key]), input_key: input_datatype(sample[input_key])} - for sample in base_data - ] - - # some collators do not use labels, or sometimes we want to check if the collator with labels can handle such cases - if ignore_label: - for original, batch in zip(features_original, features_batch): - original.pop(label_key) - batch.pop(label_key) - - self._validate_original_data_against_collated_data( - collator=collator, original_data=features_original, batch_data=features_batch - ) - - def test_default_collator_immutability(self): - features_base_single_label = [{"label": i, "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)] - features_base_multiple_labels = [{"label": (0, 1, 2), "inputs": (0, 1, 2, 3, 4, 5)} for i in range(4)] - - for datatype_input, datatype_label in [ - (list, int), - (list, float), - (np.array, int), - (np.array, tf.constant), - (list, self._turn_to_none), - ]: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=lambda x: default_data_collator(x, return_tensors="tf"), - base_data=features_base_single_label, - input_key="inputs", - input_datatype=datatype_input, - label_key="label", - label_datatype=datatype_label, - ) - - for datatype_input, datatype_label in [(list, list), (list, self._turn_to_none)]: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=lambda x: default_data_collator(x, return_tensors="tf"), - base_data=features_base_multiple_labels, - input_key="inputs", - input_datatype=datatype_input, - label_key="label", - label_datatype=datatype_label, - ) - - features_base_single_label_alt = [{"input_ids": (0, 1, 2, 3, 4), "label": float(i)} for i in range(4)] - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=lambda x: default_data_collator(x, return_tensors="tf"), - base_data=features_base_single_label_alt, - input_key="input_ids", - input_datatype=list, - label_key="label", - label_datatype=float, - ) - - def test_with_padding_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_original = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}] - features_batch = [{"input_ids": [0, 1, 2]}, {"input_ids": [0, 1, 2, 3, 4, 5]}] - - data_collator = DataCollatorWithPadding(tokenizer, padding="max_length", max_length=10, return_tensors="tf") - self._validate_original_data_against_collated_data( - collator=data_collator, original_data=features_original, batch_data=features_batch - ) - - data_collator = DataCollatorWithPadding(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - self._validate_original_data_against_collated_data( - collator=data_collator, original_data=features_original, batch_data=features_batch - ) - - def test_for_token_classification_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_base = [ - {"input_ids": (0, 1, 2), "labels": (0, 1, 2)}, - {"input_ids": (0, 1, 2, 3, 4, 5), "labels": (0, 1, 2, 3, 4, 5)}, - ] - token_classification_collators = [ - DataCollatorForTokenClassification(tokenizer, return_tensors="tf"), - DataCollatorForTokenClassification(tokenizer, padding="max_length", max_length=10, return_tensors="tf"), - DataCollatorForTokenClassification(tokenizer, pad_to_multiple_of=8, return_tensors="tf"), - DataCollatorForTokenClassification(tokenizer, label_pad_token_id=-1, return_tensors="tf"), - ] - - for datatype_input, datatype_label in [(list, list)]: - for collator in token_classification_collators: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=collator, - base_data=features_base, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ) - - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=token_classification_collators[-1], - base_data=features_base, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ignore_label=True, - ) - - def test_seq2seq_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_base = [ - {"input_ids": list(range(3)), "labels": list(range(3))}, - {"input_ids": list(range(6)), "labels": list(range(6))}, - ] - seq2seq_collators = [ - DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.LONGEST, return_tensors="tf"), - DataCollatorForSeq2Seq(tokenizer, padding=PaddingStrategy.MAX_LENGTH, max_length=7, return_tensors="tf"), - DataCollatorForSeq2Seq( - tokenizer, padding=PaddingStrategy.LONGEST, pad_to_multiple_of=8, return_tensors="tf" - ), - DataCollatorForSeq2Seq( - tokenizer, padding=PaddingStrategy.LONGEST, label_pad_token_id=-1, return_tensors="tf" - ), - ] - - for datatype_input, datatype_label in [(list, list)]: - for collator in seq2seq_collators: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=collator, - base_data=features_base, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ) - - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=seq2seq_collators[-1], - base_data=features_base, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ignore_label=True, - ) - - features_base_no_pad = [ - {"input_ids": list(range(3)), "labels": list(range(3))}, - {"input_ids": list(range(3)), "labels": list(range(3))}, - ] - seq2seq_no_padding_collator = DataCollatorForSeq2Seq( - tokenizer, padding=PaddingStrategy.DO_NOT_PAD, return_tensors="tf" - ) - for datatype_input, datatype_label in [(list, list)]: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=seq2seq_no_padding_collator, - base_data=features_base_no_pad, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ) - - def test_language_modelling_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_base_no_pad = [ - {"input_ids": tuple(range(10)), "labels": (1,)}, - {"input_ids": tuple(range(10)), "labels": (1,)}, - ] - features_base_pad = [ - {"input_ids": tuple(range(5)), "labels": (1,)}, - {"input_ids": tuple(range(5)), "labels": (1,)}, - ] - lm_collators = [ - DataCollatorForLanguageModeling(tokenizer, mlm=False, return_tensors="tf"), - DataCollatorForLanguageModeling(tokenizer, mlm=False, pad_to_multiple_of=8, return_tensors="tf"), - DataCollatorForLanguageModeling(tokenizer, return_tensors="tf"), - DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf"), - ] - - for datatype_input, datatype_label in [(list, list)]: - for collator in lm_collators: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=collator, - base_data=features_base_no_pad, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ignore_label=True, - ) - - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=collator, - base_data=features_base_pad, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ignore_label=True, - ) - - def test_whole_world_masking_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_base = [ - {"input_ids": list(range(10)), "labels": (1,)}, - {"input_ids": list(range(10)), "labels": (1,)}, - ] - whole_word_masking_collator = DataCollatorForWholeWordMask(tokenizer, return_tensors="tf") - - for datatype_input, datatype_label in [(list, list), (np.array, np.array)]: - self._validate_original_data_against_collated_data_on_specified_keys_and_datatypes( - collator=whole_word_masking_collator, - base_data=features_base, - input_key="input_ids", - input_datatype=datatype_input, - label_key="labels", - label_datatype=datatype_label, - ignore_label=True, - ) - - def test_permutation_language_modelling_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - plm_collator = DataCollatorForPermutationLanguageModeling(tokenizer, return_tensors="tf") - - no_pad_features_original = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] - no_pad_features_batch = [{"input_ids": list(range(10))}, {"input_ids": list(range(10))}] - self._validate_original_data_against_collated_data( - collator=plm_collator, original_data=no_pad_features_original, batch_data=no_pad_features_batch - ) - - pad_features_original = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}] - pad_features_batch = [{"input_ids": list(range(5))}, {"input_ids": list(range(10))}] - self._validate_original_data_against_collated_data( - collator=plm_collator, original_data=pad_features_original, batch_data=pad_features_batch - ) - - def test_next_sentence_prediction_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_original = [ - {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i} - for i in range(2) - ] - features_batch = [ - {"input_ids": [0, 1, 2, 3, 4], "token_type_ids": [0, 1, 2, 3, 4], "next_sentence_label": i} - for i in range(2) - ] - - nsp_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf") - self._validate_original_data_against_collated_data( - collator=nsp_collator, original_data=features_original, batch_data=features_batch - ) - - nsp_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - self._validate_original_data_against_collated_data( - collator=nsp_collator, original_data=features_original, batch_data=features_batch - ) - - def test_sentence_order_prediction_collator_immutability(self): - tokenizer = BertTokenizer(self.vocab_file) - - features_original = [ - { - "input_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]), - "token_type_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]), - "sentence_order_label": i, - } - for i in range(2) - ] - features_batch = [ - { - "input_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]), - "token_type_ids": tf.convert_to_tensor([0, 1, 2, 3, 4]), - "sentence_order_label": i, - } - for i in range(2) - ] - - sop_collator = DataCollatorForLanguageModeling(tokenizer, return_tensors="tf") - self._validate_original_data_against_collated_data( - collator=sop_collator, original_data=features_original, batch_data=features_batch - ) - - sop_collator = DataCollatorForLanguageModeling(tokenizer, pad_to_multiple_of=8, return_tensors="tf") - self._validate_original_data_against_collated_data( - collator=sop_collator, original_data=features_original, batch_data=features_batch - ) - - class NumpyDataCollatorIntegrationTest(unittest.TestCase): def setUp(self): self.tmpdirname = tempfile.mkdtemp() diff --git a/tests/utils/test_activations_tf.py b/tests/utils/test_activations_tf.py deleted file mode 100644 index 8d418d7fe3f..00000000000 --- a/tests/utils/test_activations_tf.py +++ /dev/null @@ -1,60 +0,0 @@ -# Copyright 2020 The HuggingFace Team. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import unittest - -import numpy as np - -from transformers import is_tf_available -from transformers.testing_utils import require_tf - - -if is_tf_available(): - import tensorflow as tf - - from transformers.activations_tf import get_tf_activation - - -@require_tf -class TestTFActivations(unittest.TestCase): - def test_gelu_10(self): - x = tf.constant([-100, -1.0, -0.1, 0, 0.1, 1.0, 100.0]) - gelu = get_tf_activation("gelu") - gelu10 = get_tf_activation("gelu_10") - - y_gelu = gelu(x) - y_gelu_10 = gelu10(x) - - clipped_mask = tf.where(y_gelu_10 < 10.0, 1.0, 0.0) - - self.assertEqual(tf.math.reduce_max(y_gelu_10).numpy().item(), 10.0) - self.assertTrue(np.allclose(y_gelu * clipped_mask, y_gelu_10 * clipped_mask)) - - def test_get_activation(self): - get_tf_activation("gelu") - get_tf_activation("gelu_10") - get_tf_activation("gelu_fast") - get_tf_activation("gelu_new") - get_tf_activation("glu") - get_tf_activation("mish") - get_tf_activation("quick_gelu") - get_tf_activation("relu") - get_tf_activation("sigmoid") - get_tf_activation("silu") - get_tf_activation("swish") - get_tf_activation("tanh") - with self.assertRaises(KeyError): - get_tf_activation("bogus") - with self.assertRaises(KeyError): - get_tf_activation(None) diff --git a/tests/utils/test_add_new_model_like.py b/tests/utils/test_add_new_model_like.py index 875bf769746..725474291ca 100644 --- a/tests/utils/test_add_new_model_like.py +++ b/tests/utils/test_add_new_model_like.py @@ -36,7 +36,7 @@ from transformers.commands.add_new_model_like import ( retrieve_model_classes, simplify_replacements, ) -from transformers.testing_utils import require_flax, require_tf, require_torch +from transformers.testing_utils import require_flax, require_torch BERT_MODEL_FILES = { @@ -84,7 +84,6 @@ REPO_PATH = Path(transformers.__path__[0]).parent.parent @require_torch -@require_tf @require_flax class TestAddNewModelLike(unittest.TestCase): def init_file(self, file_name, content): diff --git a/tests/utils/test_doc_samples.py b/tests/utils/test_doc_samples.py index 4dd6b2bffe4..7a5150232c1 100644 --- a/tests/utils/test_doc_samples.py +++ b/tests/utils/test_doc_samples.py @@ -19,7 +19,7 @@ from pathlib import Path from typing import Union import transformers -from transformers.testing_utils import require_tf, require_torch, slow +from transformers.testing_utils import require_torch, slow logger = logging.getLogger() @@ -27,7 +27,6 @@ logger = logging.getLogger() @unittest.skip(reason="Temporarily disable the doc tests.") @require_torch -@require_tf @slow class TestCodeExamples(unittest.TestCase): def analyze_directory( diff --git a/tests/utils/test_file_utils.py b/tests/utils/test_file_utils.py index 1cbde0fb18c..162b327197b 100644 --- a/tests/utils/test_file_utils.py +++ b/tests/utils/test_file_utils.py @@ -21,16 +21,13 @@ import transformers # Try to import everything from transformers to ensure every object can be loaded. from transformers import * # noqa F406 -from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER, require_flax, require_tf, require_torch -from transformers.utils import ContextManagers, find_labels, is_flax_available, is_tf_available, is_torch_available +from transformers.testing_utils import DUMMY_UNKNOWN_IDENTIFIER, require_flax, require_torch +from transformers.utils import ContextManagers, find_labels, is_flax_available, is_torch_available if is_torch_available(): from transformers import BertForPreTraining, BertForQuestionAnswering, BertForSequenceClassification -if is_tf_available(): - from transformers import TFBertForPreTraining, TFBertForQuestionAnswering, TFBertForSequenceClassification - if is_flax_available(): from transformers import FlaxBertForPreTraining, FlaxBertForQuestionAnswering, FlaxBertForSequenceClassification @@ -107,18 +104,6 @@ class GenericUtilTests(unittest.TestCase): self.assertEqual(find_labels(DummyModel), ["labels"]) - @require_tf - def test_find_labels_tf(self): - self.assertEqual(find_labels(TFBertForSequenceClassification), ["labels"]) - self.assertEqual(find_labels(TFBertForPreTraining), ["labels", "next_sentence_label"]) - self.assertEqual(find_labels(TFBertForQuestionAnswering), ["start_positions", "end_positions"]) - - # find_labels works regardless of the class name (it detects the framework through inheritance) - class DummyModel(TFBertForSequenceClassification): - pass - - self.assertEqual(find_labels(DummyModel), ["labels"]) - @require_flax def test_find_labels_flax(self): # Flax models don't have labels diff --git a/tests/utils/test_generic.py b/tests/utils/test_generic.py index 85ac32d224f..a230da5dc33 100644 --- a/tests/utils/test_generic.py +++ b/tests/utils/test_generic.py @@ -19,14 +19,13 @@ import numpy as np from transformers.configuration_utils import PretrainedConfig from transformers.modeling_outputs import BaseModelOutput -from transformers.testing_utils import require_flax, require_tf, require_torch +from transformers.testing_utils import require_flax, require_torch from transformers.utils import ( can_return_tuple, expand_dims, filter_out_non_signature_kwargs, flatten_dict, is_flax_available, - is_tf_available, is_torch_available, reshape, squeeze, @@ -38,9 +37,6 @@ from transformers.utils import ( if is_flax_available(): import jax.numpy as jnp -if is_tf_available(): - import tensorflow as tf - if is_torch_available(): import torch @@ -88,16 +84,6 @@ class GenericTester(unittest.TestCase): t = torch.tensor(x) self.assertTrue(np.allclose(transpose(x, axes=(1, 2, 0)), transpose(t, axes=(1, 2, 0)).numpy())) - @require_tf - def test_transpose_tf(self): - x = np.random.randn(3, 4) - t = tf.constant(x) - self.assertTrue(np.allclose(transpose(x), transpose(t).numpy())) - - x = np.random.randn(3, 4, 5) - t = tf.constant(x) - self.assertTrue(np.allclose(transpose(x, axes=(1, 2, 0)), transpose(t, axes=(1, 2, 0)).numpy())) - @require_flax def test_transpose_flax(self): x = np.random.randn(3, 4) @@ -125,16 +111,6 @@ class GenericTester(unittest.TestCase): t = torch.tensor(x) self.assertTrue(np.allclose(reshape(x, (12, 5)), reshape(t, (12, 5)).numpy())) - @require_tf - def test_reshape_tf(self): - x = np.random.randn(3, 4) - t = tf.constant(x) - self.assertTrue(np.allclose(reshape(x, (4, 3)), reshape(t, (4, 3)).numpy())) - - x = np.random.randn(3, 4, 5) - t = tf.constant(x) - self.assertTrue(np.allclose(reshape(x, (12, 5)), reshape(t, (12, 5)).numpy())) - @require_flax def test_reshape_flax(self): x = np.random.randn(3, 4) @@ -162,16 +138,6 @@ class GenericTester(unittest.TestCase): t = torch.tensor(x) self.assertTrue(np.allclose(squeeze(x, axis=2), squeeze(t, axis=2).numpy())) - @require_tf - def test_squeeze_tf(self): - x = np.random.randn(1, 3, 4) - t = tf.constant(x) - self.assertTrue(np.allclose(squeeze(x), squeeze(t).numpy())) - - x = np.random.randn(1, 4, 1, 5) - t = tf.constant(x) - self.assertTrue(np.allclose(squeeze(x, axis=2), squeeze(t, axis=2).numpy())) - @require_flax def test_squeeze_flax(self): x = np.random.randn(1, 3, 4) @@ -192,12 +158,6 @@ class GenericTester(unittest.TestCase): t = torch.tensor(x) self.assertTrue(np.allclose(expand_dims(x, axis=1), expand_dims(t, axis=1).numpy())) - @require_tf - def test_expand_dims_tf(self): - x = np.random.randn(3, 4) - t = tf.constant(x) - self.assertTrue(np.allclose(expand_dims(x, axis=1), expand_dims(t, axis=1).numpy())) - @require_flax def test_expand_dims_flax(self): x = np.random.randn(3, 4) @@ -232,18 +192,6 @@ class GenericTester(unittest.TestCase): self.assertTrue(to_py_obj([t1, t2]) == [x1, x2]) - @require_tf - def test_to_py_obj_tf(self): - x1 = [[1, 2, 3], [4, 5, 6]] - t1 = tf.constant(x1) - self.assertTrue(to_py_obj(t1) == x1) - - x2 = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] - t2 = tf.constant(x2) - self.assertTrue(to_py_obj(t2) == x2) - - self.assertTrue(to_py_obj([t1, t2]) == [x1, x2]) - @require_flax def test_to_py_obj_flax(self): x1 = [[1, 2, 3], [4, 5, 6]] @@ -256,25 +204,6 @@ class GenericTester(unittest.TestCase): self.assertTrue(to_py_obj([t1, t2]) == [x1, x2]) - @require_torch - @require_tf - @require_flax - def test_to_py_obj_mixed(self): - x1 = [[1], [2]] - t1 = np.array(x1) - - x2 = [[1.0, 2.0, 3.0], [4.0, 5.0, 6.0]] - t2 = torch.tensor(x2) - - x3 = [1, 2, 3] - t3 = tf.constant(x3) - - x4 = [[[1.0, 2.0]]] - t4 = jnp.array(x4) - - mixed = [(t1, t2), (t3, t4)] - self.assertTrue(to_py_obj(mixed) == [[x1, x2], [x3, x4]]) - class ValidationDecoratorTester(unittest.TestCase): def test_cases_no_warning(self): diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py index 7df23e02959..e13fee27283 100644 --- a/tests/utils/test_modeling_utils.py +++ b/tests/utils/test_modeling_utils.py @@ -61,7 +61,6 @@ from transformers.testing_utils import ( require_non_hpu, require_read_token, require_safetensors, - require_tf, require_torch, require_torch_accelerator, require_torch_multi_accelerator, @@ -79,7 +78,6 @@ from transformers.utils.import_utils import ( is_flash_attn_2_available, is_flash_attn_3_available, is_flax_available, - is_tf_available, is_torch_npu_available, is_torch_sdpa_available, ) @@ -322,9 +320,6 @@ class TestModelGammaBeta(PreTrainedModel): if is_flax_available(): from transformers import FlaxBertModel -if is_tf_available(): - from transformers import TFBertModel - TINY_T5 = "patrickvonplaten/t5-tiny-random" TINY_BERT_FOR_TOKEN_CLASSIFICATION = "hf-internal-testing/tiny-bert-for-token-classification" @@ -1535,27 +1530,6 @@ class ModelUtilsTest(TestCasePlus): for p1, p2 in zip(hub_model.parameters(), new_model.parameters()): self.assertTrue(torch.equal(p1, p2)) - @require_tf - @require_safetensors - def test_safetensors_torch_from_tf(self): - hub_model = BertModel.from_pretrained("hf-internal-testing/tiny-bert-pt-only") - model = TFBertModel.from_pretrained("hf-internal-testing/tiny-bert-tf-only") - - with tempfile.TemporaryDirectory() as tmp_dir: - model.save_pretrained(tmp_dir, safe_serialization=True) - new_model = BertModel.from_pretrained(tmp_dir) - - for p1, p2 in zip(hub_model.parameters(), new_model.parameters()): - self.assertTrue(torch.equal(p1, p2)) - - @require_tf - def test_torch_from_tf(self): - model = TFBertModel.from_pretrained("hf-internal-testing/tiny-bert-tf-only") - - with tempfile.TemporaryDirectory() as tmp_dir: - model.save_pretrained(tmp_dir) - _ = BertModel.from_pretrained(tmp_dir, from_tf=True) - @require_safetensors def test_safetensors_torch_from_torch_sharded(self): model = BertModel.from_pretrained("hf-internal-testing/tiny-bert-pt-only")