[CI] Fix bnb quantization tests with accelerate>=1.2.0 (#35172)

This commit is contained in:
Matthew Douglas
2024-12-09 13:55:16 -05:00
committed by GitHub
parent fa8763ce17
commit 34f4080ff5
2 changed files with 10 additions and 10 deletions

View File

@ -385,14 +385,14 @@ class Bnb4BitT5Test(unittest.TestCase):
# test with `google-t5/t5-small`
model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_4bit=True, device_map="auto")
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
# test with `flan-t5-small`
model = T5ForConditionalGeneration.from_pretrained(
self.dense_act_model_name, load_in_4bit=True, device_map="auto"
)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
T5ForConditionalGeneration._keep_in_fp32_modules = modules
@ -410,14 +410,14 @@ class Bnb4BitT5Test(unittest.TestCase):
# there was a bug with decoders - this test checks that it is fixed
self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear4bit))
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
# test with `flan-t5-small`
model = T5ForConditionalGeneration.from_pretrained(
self.dense_act_model_name, load_in_4bit=True, device_map="auto"
)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)

View File

@ -514,14 +514,14 @@ class MixedInt8T5Test(unittest.TestCase):
# test with `google-t5/t5-small`
model = T5ForConditionalGeneration.from_pretrained(self.model_name, load_in_8bit=True, device_map="auto")
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
# test with `flan-t5-small`
model = T5ForConditionalGeneration.from_pretrained(
self.dense_act_model_name, load_in_8bit=True, device_map="auto"
)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
T5ForConditionalGeneration._keep_in_fp32_modules = modules
@ -540,14 +540,14 @@ class MixedInt8T5Test(unittest.TestCase):
# there was a bug with decoders - this test checks that it is fixed
self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear8bitLt))
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
# test with `flan-t5-small`
model = T5ForConditionalGeneration.from_pretrained(
self.dense_act_model_name, load_in_8bit=True, device_map="auto"
)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
def test_inference_with_keep_in_fp32_serialized(self):
@ -571,14 +571,14 @@ class MixedInt8T5Test(unittest.TestCase):
# there was a bug with decoders - this test checks that it is fixed
self.assertTrue(isinstance(model.decoder.block[0].layer[0].SelfAttention.q, bnb.nn.Linear8bitLt))
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)
# test with `flan-t5-small`
model = T5ForConditionalGeneration.from_pretrained(
self.dense_act_model_name, load_in_8bit=True, device_map="auto"
)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(torch_device)
encoded_input = self.tokenizer(self.input_text, return_tensors="pt").to(model.device)
_ = model.generate(**encoded_input)