mirror of
https://github.com/huggingface/transformers.git
synced 2025-10-20 17:13:56 +08:00
[CI] Fix bnb quantization tests with accelerate>=1.2.0 (#35172)
This commit is contained in:
@ -385,14 +385,14 @@ class Bnb4BitT5Test(unittest.TestCase):
|
||||
|
||||
# test with `google-t5/t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_4bit=True, device_map="auto")
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
# test with `flan-t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
self.dense_act_model_name, load_in_4bit=True, device_map="auto"
|
||||
)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
T5ForConditionalGeneration._keep_in_fp32_modules = modules
|
||||
|
||||
@ -410,14 +410,14 @@ class Bnb4BitT5Test(unittest.TestCase):
|
||||
# there was a bug with decoders - this test checks that it is fixed
|
||||
self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear4bit))
|
||||
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
# test with `flan-t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
self.dense_act_model_name, load_in_4bit=True, device_map="auto"
|
||||
)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
|
||||
|
@ -514,14 +514,14 @@ class MixedInt8T5Test(unittest.TestCase):
|
||||
|
||||
# test with `google-t5/t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_8bit=True, device_map="auto")
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
# test with `flan-t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
self.dense_act_model_name, load_in_8bit=True, device_map="auto"
|
||||
)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
T5ForConditionalGeneration._keep_in_fp32_modules = modules
|
||||
|
||||
@ -540,14 +540,14 @@ class MixedInt8T5Test(unittest.TestCase):
|
||||
# there was a bug with decoders - this test checks that it is fixed
|
||||
self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear8bitLt))
|
||||
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
# test with `flan-t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
self.dense_act_model_name, load_in_8bit=True, device_map="auto"
|
||||
)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
def test_inference_with_keep_in_fp32_serialized(self):
|
||||
@ -571,14 +571,14 @@ class MixedInt8T5Test(unittest.TestCase):
|
||||
# there was a bug with decoders - this test checks that it is fixed
|
||||
self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear8bitLt))
|
||||
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
# test with `flan-t5-small`
|
||||
model = T5ForConditionalGeneration.from_pretrained(
|
||||
self.dense_act_model_name, load_in_8bit=True, device_map="auto"
|
||||
)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
|
||||
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
|
||||
_ = model.generate(**encoded_input)
|
||||
|
||||
|
||||
|
Reference in New Issue
Block a user