mirror of
https://github.com/huggingface/peft.git
synced 2025-10-20 15:33:48 +08:00
TST Use plain asserts in tests (#1448)
Use pytest style asserts instead of unittest methods. Use `pytest.raises` and `pytest.warns` where suitable.
This commit is contained in:
@ -15,6 +15,7 @@ extend-select = [
|
||||
"I", # Import sorting
|
||||
"UP", # Pyupgrade upgrades
|
||||
"W", # PEP8 warnings
|
||||
"PT009", # Pytest assertions
|
||||
]
|
||||
ignore = [
|
||||
"C901", # Function too complex
|
||||
|
@ -248,7 +248,7 @@ class RegressionTester(unittest.TestCase):
|
||||
base_model = self.load_base_model()
|
||||
model = PeftModel.from_pretrained(base_model, os.path.join(path, version))
|
||||
output = self.get_output(model)
|
||||
self.assertTrue(torch.allclose(output_loaded, output, atol=self.tol, rtol=self.tol))
|
||||
assert torch.allclose(output_loaded, output, atol=self.tol, rtol=self.tol)
|
||||
|
||||
def get_output(self, model):
|
||||
raise NotImplementedError
|
||||
|
@ -73,9 +73,9 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4)
|
||||
model = get_peft_model(model, config)
|
||||
|
||||
self.assertTrue(hasattr(model, "save_pretrained"))
|
||||
self.assertTrue(hasattr(model, "from_pretrained"))
|
||||
self.assertTrue(hasattr(model, "push_to_hub"))
|
||||
assert hasattr(model, "save_pretrained")
|
||||
assert hasattr(model, "from_pretrained")
|
||||
assert hasattr(model, "push_to_hub")
|
||||
|
||||
def test_prepare_for_training(self) -> None:
|
||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||
@ -86,7 +86,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||
|
||||
self.assertTrue(not dummy_output.requires_grad)
|
||||
assert not dummy_output.requires_grad
|
||||
|
||||
def test_prepare_for_int8_training(self) -> None:
|
||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||
@ -94,7 +94,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
model = model.to(self.torch_device)
|
||||
|
||||
for param in model.parameters():
|
||||
self.assertTrue(not param.requires_grad)
|
||||
assert not param.requires_grad
|
||||
|
||||
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4, task_type="CAUSAL_LM")
|
||||
model = get_peft_model(model, config)
|
||||
@ -112,7 +112,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||
|
||||
self.assertTrue(dummy_output.requires_grad)
|
||||
assert dummy_output.requires_grad
|
||||
|
||||
def test_save_pretrained_regression(self) -> None:
|
||||
seed = 420
|
||||
@ -134,30 +134,28 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||
|
||||
# check if same keys
|
||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
||||
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||
|
||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||
self.assertEqual(len(list(state_dict.keys())), 4)
|
||||
assert len(state_dict) == 4
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
# check if `adapter_model.bin` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin"))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
|
||||
# check if `model.safetensors` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
|
||||
def test_save_pretrained(self) -> None:
|
||||
seed = 420
|
||||
@ -179,30 +177,28 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||
|
||||
# check if same keys
|
||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
||||
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||
|
||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||
self.assertEqual(len(list(state_dict.keys())), 4)
|
||||
assert len(state_dict) == 4
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
# check if `adapter_model.bin` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
|
||||
# check if `model.safetensors` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
|
||||
def test_save_pretrained_selected_adapters(self) -> None:
|
||||
seed = 420
|
||||
@ -229,30 +225,28 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||
|
||||
# check if same keys
|
||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
||||
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||
|
||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||
self.assertEqual(len(list(state_dict.keys())), 4)
|
||||
assert len(state_dict) == 4
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
# check if `adapter_model.bin` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
|
||||
# check if `model.safetensors` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
|
||||
def test_generate(self) -> None:
|
||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||
@ -299,7 +293,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
|
||||
# Test that the output changed.
|
||||
default_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||
self.assertFalse(torch.allclose(default_before.logits, default_after.logits))
|
||||
assert not torch.allclose(default_before.logits, default_after.logits)
|
||||
|
||||
with adapted.disable_adapter():
|
||||
# Test that the output is the same as the original output.
|
||||
@ -320,9 +314,9 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
|
||||
# Test that adapter 1 output changed.
|
||||
adapter_1_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||
self.assertFalse(torch.allclose(adapter_1_before.logits, adapter_1_after.logits))
|
||||
self.assertFalse(torch.allclose(original_before.logits, adapter_1_after.logits))
|
||||
self.assertFalse(torch.allclose(default_after.logits, adapter_1_after.logits))
|
||||
assert not torch.allclose(adapter_1_before.logits, adapter_1_after.logits)
|
||||
assert not torch.allclose(original_before.logits, adapter_1_after.logits)
|
||||
assert not torch.allclose(default_after.logits, adapter_1_after.logits)
|
||||
|
||||
with adapted.disable_adapter():
|
||||
# Test that the output is the same as the original output.
|
||||
@ -335,8 +329,8 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
# Test that the output is the same as the default output after training.
|
||||
default_after_set = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||
assert_close(default_after.logits, default_after_set.logits, rtol=0, atol=0)
|
||||
self.assertFalse(torch.allclose(original_before.logits, default_after_set.logits))
|
||||
self.assertFalse(torch.allclose(adapter_1_after.logits, default_after_set.logits))
|
||||
assert not torch.allclose(original_before.logits, default_after_set.logits)
|
||||
assert not torch.allclose(adapter_1_after.logits, default_after_set.logits)
|
||||
|
||||
def test_add_and_set_while_disabled(self):
|
||||
"""Test that adding and setting adapters while disabled works as intended."""
|
||||
@ -373,7 +367,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
|
||||
# Test that adapter 1 output changed.
|
||||
adapter_1_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||
self.assertFalse(torch.allclose(original_before.logits, adapter_1_after.logits))
|
||||
assert not torch.allclose(original_before.logits, adapter_1_after.logits)
|
||||
|
||||
adapted.set_adapter("default")
|
||||
with adapted.disable_adapter():
|
||||
@ -434,8 +428,8 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
||||
# https://github.com/huggingface/peft/blob/062d95a09eb5d1de35c0e5e23d4387daba99e2db/src/peft/tuners/adaption_prompt.py#L303
|
||||
# This is fine for users but makes it difficult to test if anything happens. In the future, we will have a clean
|
||||
# way to control initialization. Until then, this test is expected to fail.
|
||||
self.assertFalse(torch.allclose(output_before, output_peft))
|
||||
assert not torch.allclose(output_before, output_peft)
|
||||
|
||||
with model.disable_adapter():
|
||||
output_peft_disabled = model(dummy_input).logits
|
||||
self.assertTrue(torch.allclose(output_before, output_peft_disabled))
|
||||
assert torch.allclose(output_before, output_peft_disabled)
|
||||
|
@ -38,18 +38,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_causal_lm(self):
|
||||
model_id = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
||||
self.assertTrue(model.base_model.lm_head.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
assert model.base_model.lm_head.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -59,12 +59,12 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_causal_lm_extended_vocab(self):
|
||||
model_id = "peft-internal-testing/tiny-random-OPTForCausalLM-extended-vocab"
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
||||
self.assertTrue(model.base_model.lm_head.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
assert model.base_model.lm_head.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -74,18 +74,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_seq2seq_lm(self):
|
||||
model_id = "peft-internal-testing/tiny_T5ForSeq2SeqLM-lora"
|
||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForSeq2SeqLM))
|
||||
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModelForSeq2SeqLM))
|
||||
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForSeq2SeqLM))
|
||||
self.assertTrue(model.base_model.lm_head.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||
assert model.base_model.lm_head.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -95,18 +95,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_sequence_cls(self):
|
||||
model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
|
||||
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForSequenceClassification))
|
||||
assert isinstance(model, PeftModelForSequenceClassification)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModelForSequenceClassification.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModelForSequenceClassification))
|
||||
assert isinstance(model, PeftModelForSequenceClassification)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForSequenceClassification))
|
||||
self.assertTrue(model.score.original_module.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForSequenceClassification)
|
||||
assert model.score.original_module.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -118,18 +118,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_token_classification(self):
|
||||
model_id = "peft-internal-testing/tiny_GPT2ForTokenClassification-lora"
|
||||
model = AutoPeftModelForTokenClassification.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForTokenClassification))
|
||||
assert isinstance(model, PeftModelForTokenClassification)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModelForTokenClassification.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModelForTokenClassification))
|
||||
assert isinstance(model, PeftModelForTokenClassification)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForTokenClassification))
|
||||
self.assertTrue(model.base_model.classifier.original_module.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForTokenClassification)
|
||||
assert model.base_model.classifier.original_module.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -141,18 +141,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_question_answering(self):
|
||||
model_id = "peft-internal-testing/tiny_OPTForQuestionAnswering-lora"
|
||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForQuestionAnswering))
|
||||
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModelForQuestionAnswering))
|
||||
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForQuestionAnswering))
|
||||
self.assertTrue(model.base_model.qa_outputs.original_module.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||
assert model.base_model.qa_outputs.original_module.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -164,18 +164,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_feature_extraction(self):
|
||||
model_id = "peft-internal-testing/tiny_OPTForFeatureExtraction-lora"
|
||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModelForFeatureExtraction))
|
||||
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModelForFeatureExtraction))
|
||||
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModelForFeatureExtraction))
|
||||
self.assertTrue(model.base_model.model.decoder.embed_tokens.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||
assert model.base_model.model.decoder.embed_tokens.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
@ -187,18 +187,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
||||
def test_peft_whisper(self):
|
||||
model_id = "peft-internal-testing/tiny_WhisperForConditionalGeneration-lora"
|
||||
model = AutoPeftModel.from_pretrained(model_id)
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
assert isinstance(model, PeftModel)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
|
||||
model = AutoPeftModel.from_pretrained(tmp_dirname)
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
assert isinstance(model, PeftModel)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModel.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
self.assertTrue(model.base_model.model.model.encoder.embed_positions.weight.dtype == torch.bfloat16)
|
||||
assert isinstance(model, PeftModel)
|
||||
assert model.base_model.model.model.encoder.embed_positions.weight.dtype == torch.bfloat16
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
|
@ -120,19 +120,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
||||
|
||||
flan_8bit = get_peft_model(flan_8bit, flan_lora_config)
|
||||
self.assertTrue(
|
||||
isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt)
|
||||
)
|
||||
assert isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt)
|
||||
|
||||
opt_8bit = get_peft_model(opt_8bit, opt_lora_config)
|
||||
self.assertTrue(
|
||||
isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||
)
|
||||
assert isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||
|
||||
whisper_8bit = get_peft_model(whisper_8bit, config)
|
||||
self.assertTrue(
|
||||
isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||
)
|
||||
assert isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||
|
||||
@require_bitsandbytes
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@ -170,19 +164,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
config = IA3Config(target_modules=["q_proj", "v_proj", "fc2"], feedforward_modules=["fc2"])
|
||||
|
||||
flan_8bit = get_peft_model(flan_8bit, flan_ia3_config)
|
||||
self.assertTrue(
|
||||
isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear8bitLt)
|
||||
)
|
||||
assert isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear8bitLt)
|
||||
|
||||
opt_8bit = get_peft_model(opt_8bit, opt_ia3_config)
|
||||
self.assertTrue(
|
||||
isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
||||
)
|
||||
assert isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
||||
|
||||
whisper_8bit = get_peft_model(whisper_8bit, config)
|
||||
self.assertTrue(
|
||||
isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
||||
)
|
||||
assert isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
||||
|
||||
@require_bitsandbytes
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@ -211,8 +199,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||
|
||||
# check that both adapters are in the same layer
|
||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
||||
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||
|
||||
@require_bitsandbytes
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@ -248,8 +236,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||
|
||||
# check that both adapters are in the same layer
|
||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
||||
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||
|
||||
@require_bitsandbytes
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@ -285,8 +273,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||
|
||||
# check that both adapters are in the same layer
|
||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l)
|
||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l)
|
||||
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l
|
||||
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_lora_gptq_quantization_from_pretrained_safetensors(self):
|
||||
@ -323,8 +311,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||
|
||||
# check that both adapters are in the same layer
|
||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
||||
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||
|
||||
@require_bitsandbytes
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@ -367,17 +355,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
||||
|
||||
flan_4bit = get_peft_model(flan_4bit, flan_lora_config)
|
||||
self.assertTrue(
|
||||
isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear4bit)
|
||||
)
|
||||
assert isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear4bit)
|
||||
|
||||
opt_4bit = get_peft_model(opt_4bit, opt_lora_config)
|
||||
self.assertTrue(isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit))
|
||||
assert isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||
|
||||
whisper_4bit = get_peft_model(whisper_4bit, config)
|
||||
self.assertTrue(
|
||||
isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||
)
|
||||
assert isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||
|
||||
@require_bitsandbytes
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@ -415,17 +399,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
config = IA3Config(target_modules=["q_proj", "v_proj", "fc2"], feedforward_modules=["fc2"])
|
||||
|
||||
flan_4bit = get_peft_model(flan_4bit, flan_ia3_config)
|
||||
self.assertTrue(
|
||||
isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear4bit)
|
||||
)
|
||||
assert isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear4bit)
|
||||
|
||||
opt_4bit = get_peft_model(opt_4bit, opt_ia3_config)
|
||||
self.assertTrue(isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit))
|
||||
assert isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
|
||||
|
||||
whisper_4bit = get_peft_model(whisper_4bit, config)
|
||||
self.assertTrue(
|
||||
isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
|
||||
)
|
||||
assert isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
|
||||
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@require_torch_multi_gpu
|
||||
@ -445,10 +425,10 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map="balanced")
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
||||
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||
|
||||
model = get_peft_model(model, lora_config)
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
assert isinstance(model, PeftModel)
|
||||
|
||||
dummy_input = "This is a dummy input:"
|
||||
input_ids = tokenizer(dummy_input, return_tensors="pt").input_ids.to(self.device)
|
||||
@ -470,11 +450,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(self.seq2seq_model_id, device_map="balanced", load_in_8bit=True)
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
||||
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||
|
||||
model = get_peft_model(model, lora_config)
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
self.assertTrue(isinstance(model.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt))
|
||||
assert isinstance(model, PeftModel)
|
||||
assert isinstance(model.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt)
|
||||
|
||||
dummy_input = "This is a dummy input:"
|
||||
input_ids = tokenizer(dummy_input, return_tensors="pt").input_ids.to(self.device)
|
||||
@ -546,8 +526,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = get_peft_model(model, config)
|
||||
trainable_params, all_params = model.get_nb_trainable_parameters()
|
||||
|
||||
self.assertEqual(trainable_params, EXPECTED_TRAINABLE_PARAMS)
|
||||
self.assertEqual(all_params, EXPECTED_ALL_PARAMS)
|
||||
assert trainable_params == EXPECTED_TRAINABLE_PARAMS
|
||||
assert all_params == EXPECTED_ALL_PARAMS
|
||||
|
||||
# test with double quant
|
||||
bnb_config = BitsAndBytesConfig(
|
||||
@ -566,8 +546,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = get_peft_model(model, config)
|
||||
trainable_params, all_params = model.get_nb_trainable_parameters()
|
||||
|
||||
self.assertEqual(trainable_params, EXPECTED_TRAINABLE_PARAMS)
|
||||
self.assertEqual(all_params, EXPECTED_ALL_PARAMS)
|
||||
assert trainable_params == EXPECTED_TRAINABLE_PARAMS
|
||||
assert all_params == EXPECTED_ALL_PARAMS
|
||||
|
||||
@require_torch_gpu
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -602,9 +582,9 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
o1 = lm_head(inputs)
|
||||
o1.mean().backward()
|
||||
|
||||
self.assertTrue(modules_to_save.weight.requires_grad is True)
|
||||
self.assertTrue(original_module.weight.grad is None)
|
||||
self.assertTrue(modules_to_save.weight.grad is not None)
|
||||
assert modules_to_save.weight.requires_grad is True
|
||||
assert original_module.weight.grad is None
|
||||
assert modules_to_save.weight.grad is not None
|
||||
|
||||
@require_torch_gpu
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -633,15 +613,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
|
||||
atol = 0.01
|
||||
rtol = 10
|
||||
self.assertFalse(torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol))
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
self.assertTrue(
|
||||
isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear8bitLt)
|
||||
)
|
||||
self.assertTrue(
|
||||
isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear8bitLt)
|
||||
)
|
||||
assert not torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol)
|
||||
assert isinstance(model, PeftModel)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear8bitLt)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear8bitLt)
|
||||
|
||||
@require_torch_gpu
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -673,11 +649,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
|
||||
atol = 0.01
|
||||
rtol = 10
|
||||
self.assertFalse(torch.allclose(out_base, out_before, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(out_base, out_after, atol=atol, rtol=rtol))
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear8bitLt))
|
||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt))
|
||||
assert not torch.allclose(out_base, out_before, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(out_base, out_after, atol=atol, rtol=rtol)
|
||||
assert isinstance(model, PeftModel)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear8bitLt)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||
|
||||
@require_torch_gpu
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -715,11 +691,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
# tolerances are pretty high because some deviations are expected with quantization
|
||||
atol = 0.01
|
||||
rtol = 10
|
||||
self.assertFalse(torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol))
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear4bit))
|
||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear4bit))
|
||||
assert not torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol)
|
||||
assert isinstance(model, PeftModel)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear4bit)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear4bit)
|
||||
|
||||
@require_torch_gpu
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -757,11 +733,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
|
||||
atol = 0.01
|
||||
rtol = 10
|
||||
self.assertFalse(torch.allclose(out_base, out_before, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(out_base, out_after, atol=atol, rtol=rtol))
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear4bit))
|
||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit))
|
||||
assert not torch.allclose(out_base, out_before, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(out_base, out_after, atol=atol, rtol=rtol)
|
||||
assert isinstance(model, PeftModel)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear4bit)
|
||||
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||
|
||||
@require_torch_gpu
|
||||
@pytest.mark.single_gpu_tests
|
||||
|
@ -68,10 +68,10 @@ class PeftConfigTester(unittest.TestCase):
|
||||
"""
|
||||
# test if all configs have the expected methods
|
||||
config = config_class()
|
||||
self.assertTrue(hasattr(config, "to_dict"))
|
||||
self.assertTrue(hasattr(config, "save_pretrained"))
|
||||
self.assertTrue(hasattr(config, "from_pretrained"))
|
||||
self.assertTrue(hasattr(config, "from_json_file"))
|
||||
assert hasattr(config, "to_dict")
|
||||
assert hasattr(config, "save_pretrained")
|
||||
assert hasattr(config, "from_pretrained")
|
||||
assert hasattr(config, "from_json_file")
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_task_type(self, config_class):
|
||||
@ -110,7 +110,7 @@ class PeftConfigTester(unittest.TestCase):
|
||||
config.save_pretrained(tmp_dirname)
|
||||
|
||||
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
||||
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
||||
assert config.to_dict() == config_from_pretrained.to_dict()
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_from_json_file(self, config_class):
|
||||
@ -119,7 +119,7 @@ class PeftConfigTester(unittest.TestCase):
|
||||
config.save_pretrained(tmp_dirname)
|
||||
|
||||
config_from_json = config_class.from_json_file(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
self.assertEqual(config.to_dict(), config_from_json)
|
||||
assert config.to_dict() == config_from_json
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_to_dict(self, config_class):
|
||||
@ -128,7 +128,7 @@ class PeftConfigTester(unittest.TestCase):
|
||||
- to_dict
|
||||
"""
|
||||
config = config_class()
|
||||
self.assertTrue(isinstance(config.to_dict(), dict))
|
||||
assert isinstance(config.to_dict(), dict)
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_from_pretrained_cache_dir(self, config_class):
|
||||
@ -146,7 +146,7 @@ class PeftConfigTester(unittest.TestCase):
|
||||
"""
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
PeftConfig.from_pretrained("ybelkada/test-st-lora", cache_dir=tmp_dirname)
|
||||
self.assertTrue("models--ybelkada--test-st-lora" in os.listdir(tmp_dirname))
|
||||
assert "models--ybelkada--test-st-lora" in os.listdir(tmp_dirname)
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_set_attributes(self, config_class):
|
||||
@ -158,28 +158,28 @@ class PeftConfigTester(unittest.TestCase):
|
||||
config.save_pretrained(tmp_dirname)
|
||||
|
||||
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
||||
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
||||
assert config.to_dict() == config_from_pretrained.to_dict()
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_config_copy(self, config_class):
|
||||
# see https://github.com/huggingface/peft/issues/424
|
||||
config = config_class()
|
||||
copied = copy.copy(config)
|
||||
self.assertEqual(config.to_dict(), copied.to_dict())
|
||||
assert config.to_dict() == copied.to_dict()
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_config_deepcopy(self, config_class):
|
||||
# see https://github.com/huggingface/peft/issues/424
|
||||
config = config_class()
|
||||
copied = copy.deepcopy(config)
|
||||
self.assertEqual(config.to_dict(), copied.to_dict())
|
||||
assert config.to_dict() == copied.to_dict()
|
||||
|
||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||
def test_config_pickle_roundtrip(self, config_class):
|
||||
# see https://github.com/huggingface/peft/issues/424
|
||||
config = config_class()
|
||||
copied = pickle.loads(pickle.dumps(config))
|
||||
self.assertEqual(config.to_dict(), copied.to_dict())
|
||||
assert config.to_dict() == copied.to_dict()
|
||||
|
||||
def test_prompt_encoder_warning_num_layers(self):
|
||||
# This test checks that if a prompt encoder config is created with an argument that is ignored, there should be
|
||||
@ -211,9 +211,9 @@ class PeftConfigTester(unittest.TestCase):
|
||||
config.save_pretrained(tmp_dirname)
|
||||
|
||||
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
||||
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
||||
assert config.to_dict() == config_from_pretrained.to_dict()
|
||||
# explicit test that target_modules should be converted to set
|
||||
self.assertTrue(isinstance(config_from_pretrained.target_modules, set))
|
||||
assert isinstance(config_from_pretrained.target_modules, set)
|
||||
|
||||
def test_regex_with_layer_indexing_lora(self):
|
||||
# This test checks that an error is raised if `target_modules` is a regex expression and `layers_to_transform` or
|
||||
@ -224,15 +224,10 @@ class PeftConfigTester(unittest.TestCase):
|
||||
|
||||
valid_config = {"target_modules": ["foo"], "layers_pattern": ["bar"], "layers_to_transform": [0]}
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
ValueError,
|
||||
expected_regex="`layers_to_transform` cannot be used when `target_modules` is a str.",
|
||||
):
|
||||
with pytest.raises(ValueError, match="`layers_to_transform` cannot be used when `target_modules` is a str."):
|
||||
LoraConfig(**invalid_config1)
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, expected_regex="`layers_pattern` cannot be used when `target_modules` is a str."
|
||||
):
|
||||
with pytest.raises(ValueError, match="`layers_pattern` cannot be used when `target_modules` is a str."):
|
||||
LoraConfig(**invalid_config2)
|
||||
|
||||
# should run without errors
|
||||
@ -245,9 +240,7 @@ class PeftConfigTester(unittest.TestCase):
|
||||
# an example invalid config
|
||||
invalid_config = {"target_modules": ["k", "v"], "feedforward_modules": ["q"]}
|
||||
|
||||
with self.assertRaisesRegex(
|
||||
ValueError, expected_regex="^`feedforward_modules` should be a subset of `target_modules`$"
|
||||
):
|
||||
with pytest.raises(ValueError, match="^`feedforward_modules` should be a subset of `target_modules`$"):
|
||||
IA3Config(**invalid_config)
|
||||
|
||||
def test_ia3_is_feedforward_subset_valid_config(self):
|
||||
|
@ -19,6 +19,7 @@ import os
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from parameterized import parameterized
|
||||
from torch import nn
|
||||
@ -540,7 +541,7 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
model.eval()
|
||||
with torch.no_grad():
|
||||
output = model(**X)
|
||||
self.assertTrue(torch.isfinite(output).all())
|
||||
assert torch.isfinite(output).all()
|
||||
|
||||
@parameterized.expand(TEST_CASES)
|
||||
def test_only_params_are_updated(self, test_name, model_id, config_cls, config_kwargs):
|
||||
@ -569,16 +570,16 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
tol = 1e-4
|
||||
params_before = dict(model_before.named_parameters())
|
||||
params_after = dict(model.named_parameters())
|
||||
self.assertEqual(params_before.keys(), params_after.keys())
|
||||
assert params_before.keys() == params_after.keys()
|
||||
|
||||
prefix = PREFIXES[config_cls]
|
||||
for name, param_before in params_before.items():
|
||||
param_after = params_after[name]
|
||||
if (prefix in name) or ("modules_to_save" in name):
|
||||
# target_modules and modules_to_save _are_ updated
|
||||
self.assertFalse(torch.allclose(param_before, param_after, atol=tol, rtol=tol))
|
||||
assert not torch.allclose(param_before, param_after, atol=tol, rtol=tol)
|
||||
else:
|
||||
self.assertTrue(torch.allclose(param_before, param_after, atol=tol, rtol=tol))
|
||||
assert torch.allclose(param_before, param_after, atol=tol, rtol=tol)
|
||||
|
||||
@parameterized.expand(TEST_CASES)
|
||||
def test_parameters_after_loading_model(self, test_name, model_id, config_cls, config_kwargs):
|
||||
@ -614,10 +615,10 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
||||
params_after = get_state_dict(model_from_pretrained)
|
||||
|
||||
self.assertEqual(params_before.keys(), params_after.keys())
|
||||
assert params_before.keys() == params_after.keys()
|
||||
for name, param_before in params_before.items():
|
||||
param_after = params_after[name]
|
||||
self.assertTrue(torch.allclose(param_before, param_after, atol=tol, rtol=tol))
|
||||
assert torch.allclose(param_before, param_after, atol=tol, rtol=tol)
|
||||
|
||||
@parameterized.expand(TEST_CASES)
|
||||
def test_disable_adapters(self, test_name, model_id, config_cls, config_kwargs):
|
||||
@ -633,7 +634,7 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
model.eval()
|
||||
outputs_before = model(**X)
|
||||
|
||||
self.assertTrue(torch.allclose(outputs_base, outputs_before))
|
||||
assert torch.allclose(outputs_base, outputs_before)
|
||||
|
||||
model.train()
|
||||
# EmbConv1D is slow to learn for some reason
|
||||
@ -659,9 +660,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
# check that after leaving the disable_adapter context, everything is enabled again
|
||||
outputs_enabled_after_disable = model(**X)
|
||||
|
||||
self.assertFalse(torch.allclose(outputs_before, outputs_after))
|
||||
self.assertTrue(torch.allclose(outputs_before, outputs_disabled))
|
||||
self.assertTrue(torch.allclose(outputs_after, outputs_enabled_after_disable))
|
||||
assert not torch.allclose(outputs_before, outputs_after)
|
||||
assert torch.allclose(outputs_before, outputs_disabled)
|
||||
assert torch.allclose(outputs_after, outputs_enabled_after_disable)
|
||||
|
||||
@parameterized.expand(TEST_CASES)
|
||||
def test_disable_adapters_with_merging(self, test_name, model_id, config_cls, config_kwargs):
|
||||
@ -707,13 +708,13 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
atol, rtol = 1e-3, 1e-3
|
||||
|
||||
# check that there is a difference in results after training
|
||||
self.assertFalse(torch.allclose(outputs_before, outputs_after, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(outputs_before, outputs_after, atol=atol, rtol=rtol)
|
||||
|
||||
# check that disabling adapters gives the same results as before training
|
||||
self.assertTrue(torch.allclose(outputs_before, outputs_disabled, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(outputs_before, outputs_disabled, atol=atol, rtol=rtol)
|
||||
|
||||
# check that enabling + disabling adapters does not change the results
|
||||
self.assertTrue(torch.allclose(outputs_after, outputs_enabled_after_disable, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(outputs_after, outputs_enabled_after_disable, atol=atol, rtol=rtol)
|
||||
|
||||
@parameterized.expand(TEST_CASES)
|
||||
def test_disable_adapter_with_bias_warns(self, test_name, model_id, config_cls, config_kwargs):
|
||||
@ -743,9 +744,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
|
||||
# check that bias=all and bias=lora_only give a warning with the correct message
|
||||
msg_start = "Careful, disabling adapter layers with bias configured to be"
|
||||
with self.assertWarns(UserWarning, msg=msg_start):
|
||||
with pytest.warns(UserWarning, match=msg_start):
|
||||
run_with_disable(config_kwargs, bias="lora_only")
|
||||
with self.assertWarns(UserWarning, msg=msg_start):
|
||||
with pytest.warns(UserWarning, match=msg_start):
|
||||
run_with_disable(config_kwargs, bias="all")
|
||||
|
||||
# For bias=none, there is no warning. Unfortunately, AFAIK unittest has no option to assert that no warning is
|
||||
@ -793,9 +794,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
with open(os.path.join(tmp_dirname, "README.md")) as f:
|
||||
model_card = f.read()
|
||||
|
||||
self.assertIn("library_name: peft", model_card)
|
||||
self.assertIn("meta: hello", model_card)
|
||||
self.assertIn("This is a model card", model_card)
|
||||
assert "library_name: peft" in model_card
|
||||
assert "meta: hello" in model_card
|
||||
assert "This is a model card" in model_card
|
||||
|
||||
def test_non_existing_model_card(self):
|
||||
# ensure that if there is already a model card, it is not overwritten
|
||||
@ -808,9 +809,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
with open(os.path.join(tmp_dirname, "README.md")) as f:
|
||||
model_card = f.read()
|
||||
|
||||
self.assertIn("library_name: peft", model_card)
|
||||
assert "library_name: peft" in model_card
|
||||
# rough check that the model card is pre-filled
|
||||
self.assertGreater(len(model_card), 1000)
|
||||
assert len(model_card) > 1000
|
||||
|
||||
@parameterized.expand(["auto", True, False])
|
||||
def test_targeting_lora_to_embedding_layer(self, save_embedding_layers):
|
||||
@ -822,7 +823,7 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
if save_embedding_layers == "auto":
|
||||
# assert warning
|
||||
msg_start = "Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`."
|
||||
with self.assertWarns(UserWarning, msg=msg_start):
|
||||
with pytest.warns(UserWarning, match=msg_start):
|
||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||
else:
|
||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||
@ -830,15 +831,13 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
|
||||
state_dict = safe_load_file(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||
if save_embedding_layers in ["auto", True]:
|
||||
self.assertTrue("base_model.model.embed_tokens.base_layer.weight" in state_dict)
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert "base_model.model.embed_tokens.base_layer.weight" in state_dict
|
||||
assert torch.allclose(
|
||||
model.base_model.model.embed_tokens.base_layer.weight,
|
||||
state_dict["base_model.model.embed_tokens.base_layer.weight"],
|
||||
)
|
||||
)
|
||||
else:
|
||||
self.assertFalse("base_model.model.embed_tokens.base_layer.weight" in state_dict)
|
||||
assert "base_model.model.embed_tokens.base_layer.weight" not in state_dict
|
||||
del state_dict
|
||||
|
||||
@parameterized.expand(["auto", True, False])
|
||||
@ -849,16 +848,17 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
if save_embedding_layers is True:
|
||||
# assert warning
|
||||
msg_start = "Could not identify embedding layer(s) because the model is not a 🤗 transformers model."
|
||||
with self.assertWarns(UserWarning, msg=msg_start):
|
||||
with pytest.warns(
|
||||
UserWarning,
|
||||
match=r"Could not identify embedding layer\(s\) because the model is not a 🤗 transformers model\.",
|
||||
):
|
||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||
else:
|
||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||
from safetensors.torch import load_file as safe_load_file
|
||||
|
||||
state_dict = safe_load_file(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||
self.assertFalse("base_model.model.emb.base_layer.weight" in state_dict)
|
||||
assert "base_model.model.emb.base_layer.weight" not in state_dict
|
||||
del state_dict
|
||||
|
||||
@parameterized.expand(
|
||||
@ -917,11 +917,11 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
||||
assert torch.allclose(sd_default[k0], sd_custom1[k1])
|
||||
assert torch.allclose(sd_default[k0], sd_custom2[k2])
|
||||
|
||||
self.assertFalse(torch.allclose(output_base, output_default))
|
||||
self.assertFalse(torch.allclose(output_base, output_custom1))
|
||||
self.assertFalse(torch.allclose(output_base, output_custom2))
|
||||
self.assertTrue(torch.allclose(output_custom1, output_custom2))
|
||||
self.assertTrue(torch.allclose(output_default, output_custom1))
|
||||
assert not torch.allclose(output_base, output_default)
|
||||
assert not torch.allclose(output_base, output_custom1)
|
||||
assert not torch.allclose(output_base, output_custom2)
|
||||
assert torch.allclose(output_custom1, output_custom2)
|
||||
assert torch.allclose(output_default, output_custom1)
|
||||
|
||||
|
||||
class TestMultiRankAdapter(unittest.TestCase):
|
||||
@ -953,7 +953,7 @@ class TestMultiRankAdapter(unittest.TestCase):
|
||||
rank_current = model.lin0.lora_A["second"].weight.shape[0]
|
||||
rank_expected = config_2.rank_pattern["lin0"]
|
||||
|
||||
self.assertTrue(rank_current == rank_expected, f"Rank {rank_current} is not equal to expected {rank_expected}")
|
||||
assert rank_current == rank_expected, f"Rank {rank_current} is not equal to expected {rank_expected}"
|
||||
|
||||
def test_multirank_2(self):
|
||||
rank_pattern = {}
|
||||
@ -987,9 +987,9 @@ class TestMultiRankAdapter(unittest.TestCase):
|
||||
if isinstance(module, BaseTunerLayer):
|
||||
rank_expected = rank_pattern.get(key, r)
|
||||
rank_current = module.lora_A[adapter].weight.shape[0]
|
||||
self.assertTrue(
|
||||
rank_current == rank_expected, f"Rank {rank_current} is not equal to expected {rank_expected}"
|
||||
)
|
||||
assert (
|
||||
rank_current == rank_expected
|
||||
), f"Rank {rank_current} is not equal to expected {rank_expected}"
|
||||
|
||||
|
||||
class TestRepr(unittest.TestCase):
|
||||
@ -999,45 +999,45 @@ class TestRepr(unittest.TestCase):
|
||||
config = LoraConfig(target_modules=["lin0"])
|
||||
model = get_peft_model(MLP(), config)
|
||||
print_output = repr(model.model.lin0)
|
||||
self.assertTrue(print_output.startswith("lora.Linear"))
|
||||
self.assertTrue("in_features=10" in print_output)
|
||||
self.assertTrue("out_features=20" in print_output)
|
||||
self.assertTrue("lora_A" in print_output)
|
||||
self.assertTrue("lora_B" in print_output)
|
||||
self.assertTrue("default" in print_output)
|
||||
assert print_output.startswith("lora.Linear")
|
||||
assert "in_features=10" in print_output
|
||||
assert "out_features=20" in print_output
|
||||
assert "lora_A" in print_output
|
||||
assert "lora_B" in print_output
|
||||
assert "default" in print_output
|
||||
|
||||
def test_repr_lora_embedding(self):
|
||||
config = LoraConfig(target_modules=["emb"])
|
||||
model = get_peft_model(ModelEmbConv1D(), config)
|
||||
print_output = repr(model.model.emb)
|
||||
self.assertTrue(print_output.startswith("lora.Embedding"))
|
||||
self.assertTrue("100, 5" in print_output)
|
||||
self.assertTrue("lora_embedding_A" in print_output)
|
||||
self.assertTrue("lora_embedding_B" in print_output)
|
||||
self.assertTrue("default" in print_output)
|
||||
assert print_output.startswith("lora.Embedding")
|
||||
assert "100, 5" in print_output
|
||||
assert "lora_embedding_A" in print_output
|
||||
assert "lora_embedding_B" in print_output
|
||||
assert "default" in print_output
|
||||
|
||||
def test_repr_lora_conv1d(self):
|
||||
config = LoraConfig(target_modules=["conv1d"])
|
||||
model = get_peft_model(ModelEmbConv1D(), config)
|
||||
print_output = repr(model.model.conv1d)
|
||||
self.assertTrue(print_output.startswith("lora.Linear"))
|
||||
self.assertTrue("in_features=5" in print_output)
|
||||
self.assertTrue("out_features=1" in print_output)
|
||||
self.assertTrue("lora_A" in print_output)
|
||||
self.assertTrue("lora_B" in print_output)
|
||||
self.assertTrue("default" in print_output)
|
||||
assert print_output.startswith("lora.Linear")
|
||||
assert "in_features=5" in print_output
|
||||
assert "out_features=1" in print_output
|
||||
assert "lora_A" in print_output
|
||||
assert "lora_B" in print_output
|
||||
assert "default" in print_output
|
||||
|
||||
def test_repr_lora_conv2d(self):
|
||||
config = LoraConfig(target_modules=["conv2d"])
|
||||
model = get_peft_model(ModelConv2D(), config)
|
||||
print_output = repr(model.model.conv2d)
|
||||
self.assertTrue(print_output.startswith("lora.Conv2d"))
|
||||
self.assertTrue("5, 10" in print_output)
|
||||
self.assertTrue("kernel_size=(3, 3)" in print_output)
|
||||
self.assertTrue("stride=(1, 1)" in print_output)
|
||||
self.assertTrue("lora_A" in print_output)
|
||||
self.assertTrue("lora_B" in print_output)
|
||||
self.assertTrue("default" in print_output)
|
||||
assert print_output.startswith("lora.Conv2d")
|
||||
assert "5, 10" in print_output
|
||||
assert "kernel_size=(3, 3)" in print_output
|
||||
assert "stride=(1, 1)" in print_output
|
||||
assert "lora_A" in print_output
|
||||
assert "lora_B" in print_output
|
||||
assert "default" in print_output
|
||||
|
||||
|
||||
class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||
@ -1084,9 +1084,9 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||
self.set_multiple_active_adapters(peft_model, ["adapter_1", "adapter_2"])
|
||||
combined_output = peft_model(**X)
|
||||
|
||||
self.assertFalse(torch.allclose(adapter_1_output, adapter_2_output, atol=1e-5))
|
||||
self.assertFalse(torch.allclose(adapter_1_output, combined_output, atol=1e-5))
|
||||
self.assertFalse(torch.allclose(adapter_2_output, combined_output, atol=1e-5))
|
||||
assert not torch.allclose(adapter_1_output, adapter_2_output, atol=1e-5)
|
||||
assert not torch.allclose(adapter_1_output, combined_output, atol=1e-5)
|
||||
assert not torch.allclose(adapter_2_output, combined_output, atol=1e-5)
|
||||
|
||||
if tuner_method == "lora":
|
||||
# create a weighted adapter combining both adapters and check that
|
||||
@ -1096,7 +1096,7 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||
)
|
||||
peft_model.set_adapter("new_combined_adapter")
|
||||
new_combined_output = peft_model(**X)
|
||||
self.assertTrue(torch.allclose(new_combined_output, combined_output, atol=1e-5))
|
||||
assert torch.allclose(new_combined_output, combined_output, atol=1e-5)
|
||||
|
||||
@parameterized.expand(MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES)
|
||||
def test_multiple_active_adapters_merge_and_unmerge(
|
||||
@ -1120,14 +1120,14 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||
|
||||
peft_model.merge_adapter()
|
||||
merged_combined_output = peft_model(**X)
|
||||
self.assertTrue(torch.allclose(merged_combined_output, combined_output, atol=1e-5))
|
||||
assert torch.allclose(merged_combined_output, combined_output, atol=1e-5)
|
||||
|
||||
peft_model.unmerge_adapter()
|
||||
|
||||
with peft_model.disable_adapter():
|
||||
disabled_adapter_output = peft_model(**X)
|
||||
|
||||
self.assertTrue(torch.allclose(disabled_adapter_output, base_output, atol=1e-4))
|
||||
assert torch.allclose(disabled_adapter_output, base_output, atol=1e-4)
|
||||
|
||||
@parameterized.expand(MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES)
|
||||
def test_merge_layers_multi(self, test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2):
|
||||
@ -1153,14 +1153,14 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||
with torch.inference_mode():
|
||||
logits_adapter_2 = model(**dummy_input)[0]
|
||||
|
||||
self.assertFalse(torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
||||
assert not torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model.set_adapter("default")
|
||||
|
||||
with torch.inference_mode():
|
||||
logits_adapter_1_after_set = model(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model_copy = copy.deepcopy(model)
|
||||
model_copy_2 = copy.deepcopy(model)
|
||||
@ -1169,22 +1169,22 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||
with torch.inference_mode():
|
||||
logits_merged_all = model_merged_all(**dummy_input)[0]
|
||||
|
||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
||||
assert not torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||
assert not torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model_merged_adapter_2 = model_copy.merge_and_unload(adapter_names=["adapter-2"])
|
||||
|
||||
with torch.inference_mode():
|
||||
logits_merged_adapter_2 = model_merged_adapter_2(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model_merged_adapter_default = model_copy_2.merge_and_unload(adapter_names=["default"])
|
||||
|
||||
with torch.inference_mode():
|
||||
logits_merged_adapter_default = model_merged_adapter_default(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||
|
||||
|
||||
class RequiresGradTester(unittest.TestCase):
|
||||
@ -1203,7 +1203,7 @@ class RequiresGradTester(unittest.TestCase):
|
||||
params_with_requires_grad = [name for name, param in model.named_parameters() if param.requires_grad]
|
||||
diff = set(params_expected).symmetric_difference(set(params_with_requires_grad))
|
||||
msg = f"Expected {params_expected} to require gradients, got {params_with_requires_grad}"
|
||||
self.assertEqual(len(diff), 0, msg=msg)
|
||||
assert len(diff) == 0, msg
|
||||
|
||||
def test_requires_grad_modules_to_save_default(self):
|
||||
config = LoraConfig(target_modules=["lin0"], modules_to_save=["lin1"])
|
||||
|
@ -14,6 +14,7 @@
|
||||
import unittest
|
||||
from unittest.mock import Mock, call, patch
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from parameterized import parameterized
|
||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||
@ -114,14 +115,13 @@ class PeftDecoderModelTester(unittest.TestCase, PeftCommonTester):
|
||||
model = get_peft_model(model, config)
|
||||
|
||||
expected_call = call(model_id, trust_remote_code=True, foo="bar")
|
||||
self.assertEqual(mock.call_args, expected_call)
|
||||
assert mock.call_args == expected_call
|
||||
|
||||
def test_prompt_tuning_config_invalid_args(self):
|
||||
# Raise an error when tokenizer_kwargs is used with prompt_tuning_init!='TEXT', because this argument has no
|
||||
# function in that case
|
||||
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
||||
msg = "tokenizer_kwargs only valid when using prompt_tuning_init='TEXT'."
|
||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
||||
with pytest.raises(ValueError, match="tokenizer_kwargs only valid when using prompt_tuning_init='TEXT'."):
|
||||
PromptTuningConfig(
|
||||
base_model_name_or_path=model_id,
|
||||
tokenizer_name_or_path=model_id,
|
||||
|
@ -137,7 +137,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
training = model.training
|
||||
model.eval()
|
||||
output = model(**batch.to(model.device))
|
||||
self.assertTrue(torch.isfinite(output.logits).all())
|
||||
assert torch.isfinite(output.logits).all()
|
||||
model.train(training)
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -192,11 +192,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_causal_lm_training_4bit(self):
|
||||
@ -250,11 +250,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.multi_gpu_tests
|
||||
def test_causal_lm_training_multi_gpu_4bit(self):
|
||||
@ -270,7 +270,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
load_in_4bit=True,
|
||||
)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
||||
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
|
||||
@ -311,11 +311,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
@require_torch_gpu
|
||||
@ -375,11 +375,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
@require_torch_gpu
|
||||
@ -438,11 +438,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@require_torch_multi_gpu
|
||||
@ -460,7 +460,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
device_map="auto",
|
||||
)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
||||
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
|
||||
model = prepare_model_for_int8_training(model)
|
||||
@ -502,11 +502,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_seq2seq_lm_training_single_gpu(self):
|
||||
@ -523,7 +523,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
device_map={"": 0},
|
||||
)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), {0})
|
||||
assert set(model.hf_device_map.values()) == {0}
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||
model = prepare_model_for_int8_training(model)
|
||||
@ -562,11 +562,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@require_torch_multi_gpu
|
||||
@ -584,7 +584,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
device_map="balanced",
|
||||
)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
||||
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||
model = prepare_model_for_int8_training(model)
|
||||
@ -623,11 +623,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_audio_model_training(self):
|
||||
@ -721,11 +721,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_4bit_non_default_adapter_name(self):
|
||||
@ -757,9 +757,10 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
model = get_peft_model(model, config, adapter_name="other")
|
||||
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
||||
|
||||
self.assertGreater(n_trainable_other, 0) # sanity check
|
||||
self.assertEqual(n_trainable_default, n_trainable_other)
|
||||
self.assertEqual(n_total_default, n_total_other)
|
||||
assert n_trainable_other > 0
|
||||
# sanity check
|
||||
assert n_trainable_default == n_trainable_other
|
||||
assert n_total_default == n_total_other
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_8bit_non_default_adapter_name(self):
|
||||
@ -791,9 +792,10 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
||||
model = get_peft_model(model, config, adapter_name="other")
|
||||
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
||||
|
||||
self.assertGreater(n_trainable_other, 0) # sanity check
|
||||
self.assertEqual(n_trainable_default, n_trainable_other)
|
||||
self.assertEqual(n_total_default, n_total_other)
|
||||
assert n_trainable_other > 0
|
||||
# sanity check
|
||||
assert n_trainable_default == n_trainable_other
|
||||
assert n_total_default == n_total_other
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@ -825,7 +827,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
training = model.training
|
||||
model.eval()
|
||||
output = model(**batch.to(model.device))
|
||||
self.assertTrue(torch.isfinite(output.logits).all())
|
||||
assert torch.isfinite(output.logits).all()
|
||||
model.train(training)
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
@ -876,11 +878,11 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_adalora_causalLM(self):
|
||||
@ -941,11 +943,11 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.multi_gpu_tests
|
||||
@require_torch_multi_gpu
|
||||
@ -963,7 +965,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
|
||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
||||
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
|
||||
@ -1004,11 +1006,11 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
|
||||
model.cpu().save_pretrained(tmp_dir)
|
||||
|
||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
||||
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||
|
||||
# assert loss is not None
|
||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
||||
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
def test_non_default_adapter_name(self):
|
||||
@ -1041,9 +1043,10 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
model = get_peft_model(model, config, adapter_name="other")
|
||||
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
||||
|
||||
self.assertGreater(n_trainable_other, 0) # sanity check
|
||||
self.assertEqual(n_trainable_default, n_trainable_other)
|
||||
self.assertEqual(n_total_default, n_total_other)
|
||||
assert n_trainable_other > 0
|
||||
# sanity check
|
||||
assert n_trainable_default == n_trainable_other
|
||||
assert n_total_default == n_total_other
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@ -1072,8 +1075,8 @@ class OffloadSaveTests(unittest.TestCase):
|
||||
memory_limits = {0: "0.4GIB", "cpu": "5GIB"}
|
||||
# offloads around half of all transformer modules
|
||||
device_map = infer_auto_device_map(model, max_memory=memory_limits)
|
||||
self.assertTrue(0 in device_map.values())
|
||||
self.assertTrue("cpu" in device_map.values())
|
||||
assert 0 in device_map.values()
|
||||
assert "cpu" in device_map.values()
|
||||
|
||||
config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["c_attn"])
|
||||
|
||||
@ -1082,7 +1085,7 @@ class OffloadSaveTests(unittest.TestCase):
|
||||
model.save_pretrained(tmp_dir)
|
||||
# load the model with device_map
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map=device_map).eval()
|
||||
self.assertTrue(len({p.device for p in model.parameters()}) == 2)
|
||||
assert len({p.device for p in model.parameters()}) == 2
|
||||
model = PeftModel.from_pretrained(model, tmp_dir, max_memory=memory_limits)
|
||||
|
||||
input_tokens = tokenizer.encode("Four score and seven years ago", return_tensors="pt")
|
||||
@ -1092,17 +1095,17 @@ class OffloadSaveTests(unittest.TestCase):
|
||||
pre_merge_olayer = model(input_tokens)[0]
|
||||
model.merge_adapter()
|
||||
post_merge_olayer = model(input_tokens)[0]
|
||||
self.assertTrue(torch.allclose(post_merge_olayer, pre_merge_olayer))
|
||||
assert torch.allclose(post_merge_olayer, pre_merge_olayer)
|
||||
|
||||
# test peft model adapter unmerge
|
||||
model.unmerge_adapter()
|
||||
post_unmerge_olayer = model(input_tokens)[0]
|
||||
self.assertTrue(torch.allclose(post_unmerge_olayer, pre_merge_olayer))
|
||||
assert torch.allclose(post_unmerge_olayer, pre_merge_olayer)
|
||||
|
||||
# test LoRA merge and unload
|
||||
model = model.merge_and_unload()
|
||||
post_unload_merge_olayer = model(input_tokens)[0]
|
||||
self.assertTrue(torch.allclose(post_unload_merge_olayer, pre_merge_olayer))
|
||||
assert torch.allclose(post_unload_merge_olayer, pre_merge_olayer)
|
||||
|
||||
|
||||
@require_torch_gpu
|
||||
@ -1203,15 +1206,15 @@ class LoftQTests(unittest.TestCase):
|
||||
|
||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=4, device=device)
|
||||
# first, sanity check that all errors are > 0.0
|
||||
self.assertTrue(mae_quantized > 0.0)
|
||||
self.assertTrue(mse_quantized > 0.0)
|
||||
self.assertTrue(mae_loftq > 0.0)
|
||||
self.assertTrue(mse_loftq > 0.0)
|
||||
assert mae_quantized > 0.0
|
||||
assert mse_quantized > 0.0
|
||||
assert mae_loftq > 0.0
|
||||
assert mse_loftq > 0.0
|
||||
|
||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||
factor = 3
|
||||
self.assertTrue(mae_loftq < mae_quantized / factor)
|
||||
self.assertTrue(mse_loftq < mse_quantized / factor)
|
||||
assert mae_loftq < (mae_quantized / factor)
|
||||
assert mse_loftq < (mse_quantized / factor)
|
||||
|
||||
@parameterized.expand(["cuda", "cpu"])
|
||||
def test_bloomz_loftq_4bit_iter_5(self, device):
|
||||
@ -1219,14 +1222,14 @@ class LoftQTests(unittest.TestCase):
|
||||
# iterations, but in practice the difference is not that large, at least not for this small base model.
|
||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=4, loftq_iter=5, device=device)
|
||||
# first, sanity check that all errors are > 0.0
|
||||
self.assertTrue(mae_quantized > 0.0)
|
||||
self.assertTrue(mse_quantized > 0.0)
|
||||
self.assertTrue(mae_loftq > 0.0)
|
||||
self.assertTrue(mse_loftq > 0.0)
|
||||
assert mae_quantized > 0.0
|
||||
assert mse_quantized > 0.0
|
||||
assert mae_loftq > 0.0
|
||||
assert mse_loftq > 0.0
|
||||
|
||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||
self.assertTrue(mae_loftq < mae_quantized / self.error_factor)
|
||||
self.assertTrue(mse_loftq < mse_quantized / self.error_factor)
|
||||
assert mae_loftq < (mae_quantized / self.error_factor)
|
||||
assert mse_loftq < (mse_quantized / self.error_factor)
|
||||
|
||||
@parameterized.expand(["cuda", "cpu"])
|
||||
def test_bloomz_loftq_8bit(self, device):
|
||||
@ -1234,14 +1237,14 @@ class LoftQTests(unittest.TestCase):
|
||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=8, device=device)
|
||||
|
||||
# first, sanity check that all errors are > 0.0
|
||||
self.assertTrue(mae_quantized > 0.0)
|
||||
self.assertTrue(mse_quantized > 0.0)
|
||||
self.assertTrue(mae_loftq > 0.0)
|
||||
self.assertTrue(mse_loftq > 0.0)
|
||||
assert mae_quantized > 0.0
|
||||
assert mse_quantized > 0.0
|
||||
assert mae_loftq > 0.0
|
||||
assert mse_loftq > 0.0
|
||||
|
||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||
self.assertTrue(mae_loftq < mae_quantized / self.error_factor)
|
||||
self.assertTrue(mse_loftq < mse_quantized / self.error_factor)
|
||||
assert mae_loftq < (mae_quantized / self.error_factor)
|
||||
assert mse_loftq < (mse_quantized / self.error_factor)
|
||||
|
||||
@parameterized.expand(["cuda", "cpu"])
|
||||
def test_bloomz_loftq_8bit_iter_5(self, device):
|
||||
@ -1249,14 +1252,14 @@ class LoftQTests(unittest.TestCase):
|
||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=8, loftq_iter=5, device=device)
|
||||
|
||||
# first, sanity check that all errors are > 0.0
|
||||
self.assertTrue(mae_quantized > 0.0)
|
||||
self.assertTrue(mse_quantized > 0.0)
|
||||
self.assertTrue(mae_loftq > 0.0)
|
||||
self.assertTrue(mse_loftq > 0.0)
|
||||
assert mae_quantized > 0.0
|
||||
assert mse_quantized > 0.0
|
||||
assert mae_loftq > 0.0
|
||||
assert mse_loftq > 0.0
|
||||
|
||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||
self.assertTrue(mae_loftq < mae_quantized / self.error_factor)
|
||||
self.assertTrue(mse_loftq < mse_quantized / self.error_factor)
|
||||
assert mae_loftq < (mae_quantized / self.error_factor)
|
||||
assert mse_loftq < (mse_quantized / self.error_factor)
|
||||
|
||||
@parameterized.expand(["cuda", "cpu"])
|
||||
def test_t5_loftq_4bit(self, device):
|
||||
@ -1264,15 +1267,15 @@ class LoftQTests(unittest.TestCase):
|
||||
bits=4, device=device, model_id="t5-small"
|
||||
)
|
||||
# first, sanity check that all errors are > 0.0
|
||||
self.assertTrue(mae_quantized > 0.0)
|
||||
self.assertTrue(mse_quantized > 0.0)
|
||||
self.assertTrue(mae_loftq > 0.0)
|
||||
self.assertTrue(mse_loftq > 0.0)
|
||||
assert mae_quantized > 0.0
|
||||
assert mse_quantized > 0.0
|
||||
assert mae_loftq > 0.0
|
||||
assert mse_loftq > 0.0
|
||||
|
||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||
factor = 3
|
||||
self.assertTrue(mae_loftq < mae_quantized / factor)
|
||||
self.assertTrue(mse_loftq < mse_quantized / factor)
|
||||
assert mae_loftq < (mae_quantized / factor)
|
||||
assert mse_loftq < (mse_quantized / factor)
|
||||
|
||||
@parameterized.expand(["cuda", "cpu"])
|
||||
def test_t5_loftq_8bit(self, device):
|
||||
@ -1280,15 +1283,15 @@ class LoftQTests(unittest.TestCase):
|
||||
bits=8, device=device, model_id="t5-small"
|
||||
)
|
||||
# first, sanity check that all errors are > 0.0
|
||||
self.assertTrue(mae_quantized > 0.0)
|
||||
self.assertTrue(mse_quantized > 0.0)
|
||||
self.assertTrue(mae_loftq > 0.0)
|
||||
self.assertTrue(mse_loftq > 0.0)
|
||||
assert mae_quantized > 0.0
|
||||
assert mse_quantized > 0.0
|
||||
assert mae_loftq > 0.0
|
||||
assert mse_loftq > 0.0
|
||||
|
||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||
factor = 3
|
||||
self.assertTrue(mae_loftq < mae_quantized / factor)
|
||||
self.assertTrue(mse_loftq < mse_quantized / factor)
|
||||
assert mae_loftq < (mae_quantized / factor)
|
||||
assert mse_loftq < (mse_quantized / factor)
|
||||
|
||||
|
||||
@require_bitsandbytes
|
||||
@ -1347,8 +1350,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
),
|
||||
data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False),
|
||||
)
|
||||
msg = "Attempting to unscale FP16 gradients."
|
||||
with self.assertRaisesRegex(ValueError, msg):
|
||||
with pytest.raises(ValueError, match="Attempting to unscale FP16 gradients."):
|
||||
trainer.train()
|
||||
|
||||
@pytest.mark.single_gpu_tests
|
||||
|
@ -34,4 +34,4 @@ class PeftHubFeaturesTester(unittest.TestCase):
|
||||
)
|
||||
model = PeftModel.from_pretrained(model, model_id, subfolder=subfolder)
|
||||
|
||||
self.assertTrue(isinstance(model, PeftModel))
|
||||
assert isinstance(model, PeftModel)
|
||||
|
@ -64,15 +64,15 @@ class InitializationTest(unittest.TestCase):
|
||||
# use statistical test to check if weight A is from a uniform distribution
|
||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||
self.assertGreater(p_value, 0.5)
|
||||
assert p_value > 0.5
|
||||
|
||||
# check that weight A is *not* from a normal distribution
|
||||
normal = self.get_normal(weight_A.mean().item(), weight_A.std().item())
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||
self.assertLess(p_value, 0.05)
|
||||
assert p_value < 0.05
|
||||
|
||||
# check that weight B is zero
|
||||
self.assertTrue((weight_B == 0.0).all())
|
||||
assert (weight_B == 0.0).all()
|
||||
|
||||
def test_lora_linear_init_gaussian(self):
|
||||
# use gaussian init
|
||||
@ -92,15 +92,15 @@ class InitializationTest(unittest.TestCase):
|
||||
# x = weight_A.detach().flatten().cpu().numpy()
|
||||
# breakpoint()
|
||||
|
||||
self.assertGreater(p_value, 0.5)
|
||||
assert p_value > 0.5
|
||||
|
||||
# check that weight A is *not* from a uniform distribution
|
||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||
self.assertLess(p_value, 0.05)
|
||||
assert p_value < 0.05
|
||||
|
||||
# check that weight B is zero
|
||||
self.assertTrue((weight_B == 0.0).all())
|
||||
assert (weight_B == 0.0).all()
|
||||
|
||||
def test_lora_linear_false(self):
|
||||
torch.manual_seed(0)
|
||||
@ -112,7 +112,7 @@ class InitializationTest(unittest.TestCase):
|
||||
|
||||
# with init_lora_weights=False, weight B should *not* be zero. We don't care so much about the actual values
|
||||
# as long as they are not zero, in order to avoid identity transformation.
|
||||
self.assertFalse(torch.allclose(weight_B, torch.zeros_like(weight_B)))
|
||||
assert not torch.allclose(weight_B, torch.zeros_like(weight_B))
|
||||
|
||||
def test_lora_embedding_default(self):
|
||||
# embedding is initialized as a normal distribution, not kaiming uniform
|
||||
@ -127,15 +127,15 @@ class InitializationTest(unittest.TestCase):
|
||||
# use statistical test to check if weight B is from a normal distribution
|
||||
normal = self.get_normal(0.0, 1.0)
|
||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||
self.assertGreater(p_value, 0.5)
|
||||
assert p_value > 0.5
|
||||
|
||||
# check that weight B is *not* from a uniform distribution
|
||||
unif = self.get_uniform(weight_B.min().item(), weight_B.max().item())
|
||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||
self.assertLess(p_value, 0.05)
|
||||
assert p_value < 0.05
|
||||
|
||||
# check that weight A is zero
|
||||
self.assertTrue((weight_A == 0.0).all())
|
||||
assert (weight_A == 0.0).all()
|
||||
|
||||
def test_lora_embedding_gaussian(self):
|
||||
# embedding does not change with init_lora_weights="gaussian" vs True
|
||||
@ -150,15 +150,15 @@ class InitializationTest(unittest.TestCase):
|
||||
# use statistical test to check if weight B is from a normal distribution
|
||||
normal = self.get_normal(0.0, 1.0)
|
||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||
self.assertGreater(p_value, 0.5)
|
||||
assert p_value > 0.5
|
||||
|
||||
# check that weight B is *not* from a uniform distribution
|
||||
unif = self.get_uniform(weight_B.min().item(), weight_B.max().item())
|
||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||
self.assertLess(p_value, 0.05)
|
||||
assert p_value < 0.05
|
||||
|
||||
# check that weight A is zero
|
||||
self.assertTrue((weight_A == 0.0).all())
|
||||
assert (weight_A == 0.0).all()
|
||||
|
||||
def test_lora_embedding_false(self):
|
||||
torch.manual_seed(0)
|
||||
@ -170,7 +170,7 @@ class InitializationTest(unittest.TestCase):
|
||||
|
||||
# with init_lora_weights=False, weight A should *not* be zero. We don't care so much about the actual values
|
||||
# as long as they are not zero, in order to avoid identity transformation.
|
||||
self.assertFalse(torch.allclose(weight_A, torch.zeros_like(weight_A)))
|
||||
assert not torch.allclose(weight_A, torch.zeros_like(weight_A))
|
||||
|
||||
def test_lora_conv2d_default(self):
|
||||
# default is True
|
||||
@ -185,15 +185,15 @@ class InitializationTest(unittest.TestCase):
|
||||
# use statistical test to check if weight A is from a uniform distribution
|
||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||
self.assertGreater(p_value, 0.5)
|
||||
assert p_value > 0.5
|
||||
|
||||
# check that weight A is *not* from a normal distribution
|
||||
normal = self.get_normal(weight_A.mean().item(), weight_A.std().item())
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||
self.assertLess(p_value, 0.05)
|
||||
assert p_value < 0.05
|
||||
|
||||
# check that weight B is zero
|
||||
self.assertTrue((weight_B == 0.0).all())
|
||||
assert (weight_B == 0.0).all()
|
||||
|
||||
def test_lora_conv2d_init_gaussian(self):
|
||||
# use gaussian init
|
||||
@ -208,15 +208,15 @@ class InitializationTest(unittest.TestCase):
|
||||
# use statistical test to check if weight A is from a normal distribution
|
||||
normal = self.get_normal(0.0, 1 / config.r)
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||
self.assertGreater(p_value, 0.5)
|
||||
assert p_value > 0.5
|
||||
|
||||
# check that weight A is *not* from a uniform distribution
|
||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||
self.assertLess(p_value, 0.05)
|
||||
assert p_value < 0.05
|
||||
|
||||
# check that weight B is zero
|
||||
self.assertTrue((weight_B == 0.0).all())
|
||||
assert (weight_B == 0.0).all()
|
||||
|
||||
def test_lora_conv2d_false(self):
|
||||
torch.manual_seed(0)
|
||||
@ -228,7 +228,7 @@ class InitializationTest(unittest.TestCase):
|
||||
|
||||
# with init_lora_weights=False, weight B should *not* be zero. We don't care so much about the actual values
|
||||
# as long as they are not zero, in order to avoid identity transformation.
|
||||
self.assertFalse(torch.allclose(weight_B, torch.zeros_like(weight_B)))
|
||||
assert not torch.allclose(weight_B, torch.zeros_like(weight_B))
|
||||
|
||||
def test_lora_scaling_default(self):
|
||||
# default is True
|
||||
@ -242,9 +242,9 @@ class InitializationTest(unittest.TestCase):
|
||||
|
||||
expected_scaling = config.lora_alpha / config.r
|
||||
|
||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling)
|
||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling)
|
||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling)
|
||||
assert model.linear.scaling["default"] == expected_scaling
|
||||
assert model.embed.scaling["default"] == expected_scaling
|
||||
assert model.conv2d.scaling["default"] == expected_scaling
|
||||
|
||||
def test_rslora_scaling(self):
|
||||
# default is True
|
||||
@ -258,9 +258,9 @@ class InitializationTest(unittest.TestCase):
|
||||
|
||||
expected_scaling = config.lora_alpha / (config.r**0.5)
|
||||
|
||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling)
|
||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling)
|
||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling)
|
||||
assert model.linear.scaling["default"] == expected_scaling
|
||||
assert model.embed.scaling["default"] == expected_scaling
|
||||
assert model.conv2d.scaling["default"] == expected_scaling
|
||||
|
||||
def test_lora_default_scaling_pattern(self):
|
||||
# default is True
|
||||
@ -285,9 +285,9 @@ class InitializationTest(unittest.TestCase):
|
||||
"conv2d": config.alpha_pattern["conv2d"] / config.rank_pattern["conv2d"],
|
||||
}
|
||||
|
||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling["linear"])
|
||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling["embed"])
|
||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling["conv2d"])
|
||||
assert model.linear.scaling["default"] == expected_scaling["linear"]
|
||||
assert model.embed.scaling["default"] == expected_scaling["embed"]
|
||||
assert model.conv2d.scaling["default"] == expected_scaling["conv2d"]
|
||||
|
||||
def test_rslora_scaling_pattern(self):
|
||||
# default is True
|
||||
@ -312,6 +312,6 @@ class InitializationTest(unittest.TestCase):
|
||||
"conv2d": config.alpha_pattern["conv2d"] / (config.rank_pattern["conv2d"] ** 0.5),
|
||||
}
|
||||
|
||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling["linear"])
|
||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling["embed"])
|
||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling["conv2d"])
|
||||
assert model.linear.scaling["default"] == expected_scaling["linear"]
|
||||
assert model.embed.scaling["default"] == expected_scaling["embed"]
|
||||
assert model.conv2d.scaling["default"] == expected_scaling["conv2d"]
|
||||
|
@ -123,21 +123,21 @@ if is_megatron_available():
|
||||
|
||||
def test_megatron_lora_module(self):
|
||||
megatron_module = self.megatron_module
|
||||
self.assertTrue(isinstance(megatron_module, PeftModel))
|
||||
assert isinstance(megatron_module, PeftModel)
|
||||
|
||||
for name, module in megatron_module.named_modules():
|
||||
if name.endswith("linear"):
|
||||
self.assertTrue(hasattr(module, "lora_A"))
|
||||
self.assertTrue(hasattr(module, "lora_B"))
|
||||
assert hasattr(module, "lora_A")
|
||||
assert hasattr(module, "lora_B")
|
||||
if name.endswith("linear.lora_A.default"):
|
||||
self.assertTrue(isinstance(module, torch.nn.Linear))
|
||||
assert isinstance(module, torch.nn.Linear)
|
||||
if name.endswith("linear.lora_B.default"):
|
||||
self.assertTrue(isinstance(module, tensor_parallel.ColumnParallelLinear))
|
||||
assert isinstance(module, tensor_parallel.ColumnParallelLinear)
|
||||
|
||||
if name.endswith("lm_head.lora_A.default"):
|
||||
self.assertTrue(isinstance(module, tensor_parallel.RowParallelLinear))
|
||||
assert isinstance(module, tensor_parallel.RowParallelLinear)
|
||||
if name.endswith("lm_head.lora_B.default"):
|
||||
self.assertTrue(isinstance(module, torch.nn.Linear))
|
||||
assert isinstance(module, torch.nn.Linear)
|
||||
|
||||
def test_forward(self):
|
||||
x = torch.ones((2, 4, 10)).cuda()
|
||||
@ -145,7 +145,7 @@ if is_megatron_available():
|
||||
dummt_module_result = self.dummy_module(x)
|
||||
|
||||
# Because lora_B is initialized with 0, the forward results of two models should be equal before backward.
|
||||
self.assertTrue(megatron_module_result.equal(dummt_module_result))
|
||||
assert megatron_module_result.equal(dummt_module_result)
|
||||
|
||||
def test_backward(self):
|
||||
optimizer = torch.optim.AdamW(self.megatron_module.parameters())
|
||||
@ -165,4 +165,4 @@ if is_megatron_available():
|
||||
peft_state_dict = get_peft_model_state_dict(self.megatron_module)
|
||||
|
||||
for key in peft_state_dict.keys():
|
||||
self.assertTrue("lora" in key)
|
||||
assert "lora" in key
|
||||
|
@ -56,14 +56,14 @@ class TestPeft(unittest.TestCase):
|
||||
|
||||
for name, module in self.model.named_modules():
|
||||
if name == "linear":
|
||||
self.assertTrue(hasattr(module, "lora_A"))
|
||||
self.assertTrue(hasattr(module, "lora_B"))
|
||||
assert hasattr(module, "lora_A")
|
||||
assert hasattr(module, "lora_B")
|
||||
|
||||
def test_get_peft_model_state_dict(self):
|
||||
peft_state_dict = get_peft_model_state_dict(self.model)
|
||||
|
||||
for key in peft_state_dict.keys():
|
||||
self.assertTrue("lora" in key)
|
||||
assert "lora" in key
|
||||
|
||||
def test_modules_to_save(self):
|
||||
self.model = DummyModel()
|
||||
@ -81,13 +81,13 @@ class TestPeft(unittest.TestCase):
|
||||
|
||||
for name, module in self.model.named_modules():
|
||||
if name == "linear":
|
||||
self.assertTrue(hasattr(module, "lora_A"))
|
||||
self.assertTrue(hasattr(module, "lora_B"))
|
||||
assert hasattr(module, "lora_A")
|
||||
assert hasattr(module, "lora_B")
|
||||
elif name == "embedding":
|
||||
self.assertTrue(isinstance(module, ModulesToSaveWrapper))
|
||||
assert isinstance(module, ModulesToSaveWrapper)
|
||||
|
||||
state_dict = get_peft_model_state_dict(self.model)
|
||||
|
||||
self.assertTrue("embedding.weight" in state_dict.keys())
|
||||
assert "embedding.weight" in state_dict.keys()
|
||||
|
||||
self.assertTrue(hasattr(self.model.embedding, "weight"))
|
||||
assert hasattr(self.model.embedding, "weight")
|
||||
|
@ -19,6 +19,7 @@ import re
|
||||
import tempfile
|
||||
import unittest
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
from parameterized import parameterized
|
||||
from torch import nn
|
||||
@ -94,22 +95,22 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
# base model
|
||||
base_model = self._get_model(model_cls)
|
||||
output_base = base_model(input)
|
||||
self.assertTrue(torch.isfinite(output_base).all())
|
||||
assert torch.isfinite(output_base).all()
|
||||
|
||||
# adapter 0
|
||||
peft_model_0 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
||||
output_config0 = peft_model_0(input)
|
||||
|
||||
self.assertTrue(torch.isfinite(output_config0).all())
|
||||
self.assertFalse(torch.allclose(output_base, output_config0, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_config0).all()
|
||||
assert not torch.allclose(output_base, output_config0, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1
|
||||
peft_model_1 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||
output_config1 = peft_model_1(input)
|
||||
|
||||
self.assertTrue(torch.isfinite(output_config1).all())
|
||||
self.assertFalse(torch.allclose(output_base, output_config1, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_config0, output_config1, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_config1).all()
|
||||
assert not torch.allclose(output_base, output_config1, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_config0, output_config1, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 0 + 1
|
||||
peft_model_01 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
||||
@ -122,19 +123,19 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
tuner_layers = [mod for mod in peft_model_01.modules() if isinstance(mod, BaseTunerLayer)]
|
||||
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
||||
if type(config0) == type(config1):
|
||||
self.assertEqual(len(tuner_types), 1)
|
||||
assert len(tuner_types) == 1
|
||||
else:
|
||||
self.assertEqual(len(tuner_types), 2)
|
||||
assert len(tuner_types) == 2
|
||||
|
||||
self.assertEqual(peft_model_01.active_adapters, ["adapter0", "adapter1"])
|
||||
self.assertTrue(torch.isfinite(output_mixed_01).all())
|
||||
self.assertFalse(torch.allclose(output_config0, output_mixed_01, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_config1, output_mixed_01, atol=atol, rtol=rtol))
|
||||
assert peft_model_01.active_adapters == ["adapter0", "adapter1"]
|
||||
assert torch.isfinite(output_mixed_01).all()
|
||||
assert not torch.allclose(output_config0, output_mixed_01, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_config1, output_mixed_01, atol=atol, rtol=rtol)
|
||||
if is_commutative:
|
||||
delta0 = output_config0 - output_base
|
||||
delta1 = output_config1 - output_base
|
||||
delta_mixed_01 = output_mixed_01 - output_base
|
||||
self.assertTrue(torch.allclose(delta0 + delta1, delta_mixed_01, atol=atol, rtol=rtol))
|
||||
assert torch.allclose((delta0 + delta1), delta_mixed_01, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1 + 0
|
||||
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||
@ -147,16 +148,16 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
tuner_layers = [mod for mod in peft_model_10.modules() if isinstance(mod, BaseTunerLayer)]
|
||||
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
||||
if type(config0) == type(config1):
|
||||
self.assertEqual(len(tuner_types), 1)
|
||||
assert len(tuner_types) == 1
|
||||
else:
|
||||
self.assertEqual(len(tuner_types), 2)
|
||||
assert len(tuner_types) == 2
|
||||
|
||||
self.assertEqual(peft_model_10.active_adapters, ["adapter1", "adapter0"])
|
||||
self.assertTrue(torch.isfinite(output_mixed_10).all())
|
||||
self.assertFalse(torch.allclose(output_config0, output_mixed_10, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_config1, output_mixed_10, atol=atol, rtol=rtol))
|
||||
assert peft_model_10.active_adapters == ["adapter1", "adapter0"]
|
||||
assert torch.isfinite(output_mixed_10).all()
|
||||
assert not torch.allclose(output_config0, output_mixed_10, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_config1, output_mixed_10, atol=atol, rtol=rtol)
|
||||
if is_commutative:
|
||||
self.assertTrue(torch.allclose(output_mixed_01, output_mixed_10, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_mixed_01, output_mixed_10, atol=atol, rtol=rtol)
|
||||
|
||||
# turn around the order of the adapters of the 0 + 1 mixed model, should behave like the 0 + 1 mixed model
|
||||
peft_model_10.set_adapter(["adapter0", "adapter1"])
|
||||
@ -166,17 +167,17 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
tuner_layers = [mod for mod in peft_model_10.modules() if isinstance(mod, BaseTunerLayer)]
|
||||
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
||||
if type(config0) == type(config1):
|
||||
self.assertEqual(len(tuner_types), 1)
|
||||
assert len(tuner_types) == 1
|
||||
else:
|
||||
self.assertEqual(len(tuner_types), 2)
|
||||
assert len(tuner_types) == 2
|
||||
|
||||
self.assertEqual(peft_model_10.active_adapters, ["adapter0", "adapter1"])
|
||||
self.assertTrue(torch.isfinite(output_mixed_reversed).all())
|
||||
self.assertFalse(torch.allclose(output_mixed_reversed, output_config0, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_mixed_reversed, output_config1, atol=atol, rtol=rtol))
|
||||
assert peft_model_10.active_adapters == ["adapter0", "adapter1"]
|
||||
assert torch.isfinite(output_mixed_reversed).all()
|
||||
assert not torch.allclose(output_mixed_reversed, output_config0, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_mixed_reversed, output_config1, atol=atol, rtol=rtol)
|
||||
if is_commutative:
|
||||
self.assertTrue(torch.allclose(output_mixed_reversed, output_mixed_01, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_mixed_reversed, output_mixed_10, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_mixed_reversed, output_mixed_01, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_mixed_reversed, output_mixed_10, atol=atol, rtol=rtol)
|
||||
|
||||
def _check_merging(self, model_cls, config0, config1, input):
|
||||
# Ensure that when merging mixed adapters, the result is the same as when applying the adapters separately.
|
||||
@ -195,7 +196,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
|
||||
model_merged_01 = peft_model_01.merge_and_unload()
|
||||
output_merged_01 = model_merged_01(input)
|
||||
self.assertTrue(torch.allclose(output_mixed_01, output_merged_01, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_mixed_01, output_merged_01, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1 + 0
|
||||
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||
@ -206,7 +207,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
|
||||
model_merged_10 = peft_model_10.merge_and_unload()
|
||||
output_merged_10 = model_merged_10(input)
|
||||
self.assertTrue(torch.allclose(output_mixed_10, output_merged_10, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_mixed_10, output_merged_10, atol=atol, rtol=rtol)
|
||||
|
||||
def _check_unload(self, model_cls, config0, config1, input):
|
||||
# Ensure that we can unload the base model without merging
|
||||
@ -229,8 +230,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
model_unloaded = peft_model_01.unload()
|
||||
output_unloaded = model_unloaded(input)
|
||||
|
||||
self.assertFalse(torch.allclose(output_mixed, output_unloaded, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_mixed, output_unloaded, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol)
|
||||
|
||||
def _check_disable(self, model_cls, config0, config1, input):
|
||||
# Ensure that we can disable adapters
|
||||
@ -249,8 +250,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
with peft_model_0.disable_adapter():
|
||||
output_disabled0 = peft_model_0(input)
|
||||
|
||||
self.assertFalse(torch.allclose(output_base, output_config0, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_base, output_disabled0, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_config0, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_base, output_disabled0, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1
|
||||
peft_model_1 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||
@ -258,8 +259,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
with peft_model_1.disable_adapter():
|
||||
output_disabled1 = peft_model_1(input)
|
||||
|
||||
self.assertFalse(torch.allclose(output_base, output_config1, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_base, output_disabled1, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_config1, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_base, output_disabled1, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 0 + 1
|
||||
peft_model_01 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
||||
@ -270,8 +271,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
with peft_model_01.disable_adapter():
|
||||
output_disabled01 = peft_model_01(input)
|
||||
|
||||
self.assertFalse(torch.allclose(output_base, output_mixed_01, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_base, output_disabled01, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_mixed_01, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_base, output_disabled01, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1 + 0
|
||||
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||
@ -282,8 +283,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
with peft_model_10.disable_adapter():
|
||||
output_disabled10 = peft_model_10(input)
|
||||
|
||||
self.assertFalse(torch.allclose(output_base, output_mixed_10, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_base, output_disabled10, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_mixed_10, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_base, output_disabled10, atol=atol, rtol=rtol)
|
||||
|
||||
def _check_loading(self, model_cls, config0, config1, input, *, is_commutative):
|
||||
# Check that we can load two adapters into the same model
|
||||
@ -331,7 +332,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
base_model, os.path.join(tmp_dirname, "adapter0", "adapter0"), "adapter0"
|
||||
)
|
||||
output_loaded0 = peft_model_loaded0(input)
|
||||
self.assertTrue(torch.allclose(output_config0, output_loaded0, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_config0, output_loaded0, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1
|
||||
base_model = self._get_model(model_cls)
|
||||
@ -340,7 +341,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
base_model, os.path.join(tmp_dirname, "adapter1", "adapter1"), "adapter1"
|
||||
)
|
||||
output_loaded1 = peft_model_loaded1(input)
|
||||
self.assertTrue(torch.allclose(output_config1, output_loaded1, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_config1, output_loaded1, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 0 + 1
|
||||
base_model = self._get_model(model_cls)
|
||||
@ -350,18 +351,18 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
)
|
||||
peft_model_loaded_01.load_adapter(os.path.join(tmp_dirname, "adapter1", "adapter1"), "adapter1")
|
||||
# at this point, "adapter0" should still be active
|
||||
self.assertEqual(peft_model_loaded_01.active_adapters, ["adapter0"])
|
||||
assert peft_model_loaded_01.active_adapters == ["adapter0"]
|
||||
output_loaded01_0 = peft_model_loaded_01(input)
|
||||
self.assertTrue(torch.allclose(output_config0, output_loaded01_0, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_config0, output_loaded01_0, atol=atol, rtol=rtol)
|
||||
# activate adapter1
|
||||
peft_model_loaded_01.set_adapter(["adapter1"])
|
||||
self.assertEqual(peft_model_loaded_01.active_adapters, ["adapter1"])
|
||||
assert peft_model_loaded_01.active_adapters == ["adapter1"]
|
||||
output_loaded01_1 = peft_model_loaded_01(input)
|
||||
self.assertTrue(torch.allclose(output_config1, output_loaded01_1, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_config1, output_loaded01_1, atol=atol, rtol=rtol)
|
||||
# activate both adapters
|
||||
peft_model_loaded_01.set_adapter(["adapter0", "adapter1"])
|
||||
output_loaded01 = peft_model_loaded_01(input)
|
||||
self.assertTrue(torch.allclose(output_mixed_01, output_loaded01, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_mixed_01, output_loaded01, atol=atol, rtol=rtol)
|
||||
|
||||
# adapter 1 + 0
|
||||
base_model = self._get_model(model_cls)
|
||||
@ -371,22 +372,22 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
)
|
||||
peft_model_loaded_10.load_adapter(os.path.join(tmp_dirname, "adapter0", "adapter0"), "adapter0")
|
||||
# at this point, "adapter1" should still be active
|
||||
self.assertEqual(peft_model_loaded_10.active_adapters, ["adapter1"])
|
||||
assert peft_model_loaded_10.active_adapters == ["adapter1"]
|
||||
output_loaded10_1 = peft_model_loaded_10(input)
|
||||
self.assertTrue(torch.allclose(output_config1, output_loaded10_1, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_config1, output_loaded10_1, atol=atol, rtol=rtol)
|
||||
# activate adapter1
|
||||
peft_model_loaded_10.set_adapter(["adapter0"])
|
||||
self.assertEqual(peft_model_loaded_10.active_adapters, ["adapter0"])
|
||||
assert peft_model_loaded_10.active_adapters == ["adapter0"]
|
||||
output_loaded10_0 = peft_model_loaded_10(input)
|
||||
self.assertTrue(torch.allclose(output_config0, output_loaded10_0, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_config0, output_loaded10_0, atol=atol, rtol=rtol)
|
||||
# activate both adapters
|
||||
peft_model_loaded_10.set_adapter(["adapter1", "adapter0"])
|
||||
output_loaded10 = peft_model_loaded_10(input)
|
||||
self.assertTrue(torch.allclose(output_mixed_10, output_loaded10, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_mixed_10, output_loaded10, atol=atol, rtol=rtol)
|
||||
|
||||
if is_commutative:
|
||||
self.assertTrue(torch.allclose(output_loaded01, output_loaded10, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(output_loaded10, output_mixed_01, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_loaded01, output_loaded10, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(output_loaded10, output_mixed_01, atol=atol, rtol=rtol)
|
||||
|
||||
@parameterized.expand(
|
||||
itertools.combinations(
|
||||
@ -564,42 +565,42 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
|
||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
||||
output_mixed = peft_model(input)
|
||||
self.assertTrue(torch.isfinite(output_base).all())
|
||||
self.assertFalse(torch.allclose(output_base, output_mixed, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_base).all()
|
||||
assert not torch.allclose(output_base, output_mixed, atol=atol, rtol=rtol)
|
||||
|
||||
# test disabling all adapters
|
||||
with peft_model.disable_adapter():
|
||||
output_disabled = peft_model(input)
|
||||
self.assertTrue(torch.isfinite(output_disabled).all())
|
||||
self.assertTrue(torch.allclose(output_base, output_disabled, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_mixed, output_disabled, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_disabled).all()
|
||||
assert torch.allclose(output_base, output_disabled, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_mixed, output_disabled, atol=atol, rtol=rtol)
|
||||
|
||||
# merge and unload all adapters
|
||||
model_copy = copy.deepcopy(peft_model)
|
||||
model = model_copy.merge_and_unload()
|
||||
output_merged = model(input)
|
||||
self.assertTrue(torch.isfinite(output_merged).all())
|
||||
self.assertTrue(torch.allclose(output_mixed, output_merged, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_merged).all()
|
||||
assert torch.allclose(output_mixed, output_merged, atol=atol, rtol=rtol)
|
||||
|
||||
# merge and unload only adapter1 and adapter3
|
||||
model_copy = copy.deepcopy(peft_model)
|
||||
model_copy.set_adapter(["adapter1", "adapter3"])
|
||||
output_13 = model_copy(input)
|
||||
self.assertTrue(torch.isfinite(output_13).all())
|
||||
self.assertFalse(torch.allclose(output_mixed, output_13, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_13).all()
|
||||
assert not torch.allclose(output_mixed, output_13, atol=atol, rtol=rtol)
|
||||
|
||||
model_copy.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
||||
model_merged_unloaded = model_copy.merge_and_unload(adapter_names=["adapter1", "adapter3"])
|
||||
output_merged_13 = model_merged_unloaded(input)
|
||||
self.assertTrue(torch.isfinite(output_merged_13).all())
|
||||
self.assertTrue(torch.allclose(output_13, output_merged_13, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_merged_13).all()
|
||||
assert torch.allclose(output_13, output_merged_13, atol=atol, rtol=rtol)
|
||||
|
||||
# test unloading
|
||||
model_copy = copy.deepcopy(peft_model)
|
||||
model_unloaded = model_copy.unload()
|
||||
output_unloaded = model_unloaded(input)
|
||||
self.assertTrue(torch.isfinite(output_unloaded).all())
|
||||
self.assertTrue(torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output_unloaded).all()
|
||||
assert torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol)
|
||||
|
||||
def test_delete_adapter(self):
|
||||
atol = 1e-5
|
||||
@ -615,7 +616,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
config0 = LoraConfig(r=4, lora_alpha=4, target_modules=["lin0", "lin1"], init_lora_weights=False)
|
||||
peft_model = get_peft_model(model, config0, "adapter0", mixed=True)
|
||||
output_0 = peft_model(input)
|
||||
self.assertFalse(torch.allclose(output_base, output_0, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_0, atol=atol, rtol=rtol)
|
||||
|
||||
# add adapter1
|
||||
torch.manual_seed(1)
|
||||
@ -623,17 +624,17 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
output_01 = peft_model(input)
|
||||
self.assertFalse(torch.allclose(output_base, output_01, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_0, output_01, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_01, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_0, output_01, atol=atol, rtol=rtol)
|
||||
|
||||
# delete adapter1
|
||||
peft_model.delete_adapter("adapter1")
|
||||
self.assertEqual(peft_model.active_adapters, ["adapter0"])
|
||||
assert peft_model.active_adapters == ["adapter0"]
|
||||
output_deleted_1 = peft_model(input)
|
||||
self.assertTrue(torch.allclose(output_0, output_deleted_1, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_0, output_deleted_1, atol=atol, rtol=rtol)
|
||||
|
||||
msg = re.escape("Adapter(s) ['adapter1'] not found, available adapters: ['adapter0']")
|
||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
|
||||
# re-add adapter1
|
||||
@ -641,7 +642,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
output_01_readded = peft_model(input)
|
||||
self.assertFalse(torch.allclose(output_base, output_01_readded, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_base, output_01_readded, atol=atol, rtol=rtol)
|
||||
|
||||
# same as above, but this time delete adapter0 first
|
||||
torch.manual_seed(0)
|
||||
@ -651,19 +652,19 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
torch.manual_seed(1)
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
peft_model.delete_adapter("adapter0")
|
||||
self.assertEqual(peft_model.active_adapters, ["adapter1"])
|
||||
assert peft_model.active_adapters == ["adapter1"]
|
||||
output_deleted_0 = peft_model(input)
|
||||
self.assertFalse(torch.allclose(output_deleted_0, output_base, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output_deleted_0, output_01, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(output_deleted_0, output_base, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output_deleted_0, output_01, atol=atol, rtol=rtol)
|
||||
|
||||
msg = re.escape("Adapter(s) ['adapter0'] not found, available adapters: ['adapter1']")
|
||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
|
||||
peft_model.delete_adapter("adapter1")
|
||||
self.assertEqual(peft_model.active_adapters, [])
|
||||
assert peft_model.active_adapters == []
|
||||
output_deleted_01 = peft_model(input)
|
||||
self.assertTrue(torch.allclose(output_deleted_01, output_base, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(output_deleted_01, output_base, atol=atol, rtol=rtol)
|
||||
|
||||
def test_modules_to_save(self):
|
||||
model = SimpleNet().eval().to(self.torch_device)
|
||||
@ -674,8 +675,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
# TODO: theoretically, we could allow this if it's the same target layer
|
||||
config1 = LoHaConfig(target_modules=["lin0"], modules_to_save=["lin1"])
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
msg = "Only one adapter can be set at a time for modules_to_save"
|
||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
||||
with pytest.raises(ValueError, match="Only one adapter can be set at a time for modules_to_save"):
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
|
||||
def test_get_nb_trainable_parameters(self):
|
||||
@ -687,16 +687,16 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
trainable_params0, all_param0 = peft_model.get_nb_trainable_parameters()
|
||||
|
||||
params_lora = sum(p.numel() for n, p in model.named_parameters() if "adapter0" in n)
|
||||
self.assertEqual(trainable_params0, params_lora)
|
||||
self.assertEqual(all_param0, params_base + params_lora)
|
||||
assert trainable_params0 == params_lora
|
||||
assert all_param0 == (params_base + params_lora)
|
||||
|
||||
config1 = LoHaConfig(target_modules=["lin1"])
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
params_loha = sum(p.numel() for n, p in model.named_parameters() if "adapter1" in n)
|
||||
trainable_params1, all_param1 = peft_model.get_nb_trainable_parameters()
|
||||
self.assertEqual(trainable_params1, params_lora + params_loha)
|
||||
self.assertEqual(all_param1, params_base + params_lora + params_loha)
|
||||
assert trainable_params1 == (params_lora + params_loha)
|
||||
assert all_param1 == ((params_base + params_lora) + params_loha)
|
||||
|
||||
config2 = AdaLoraConfig(target_modules=["lin0", "lin1"])
|
||||
peft_model.add_adapter("adapter2", config2)
|
||||
@ -704,8 +704,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
params_adalora = sum(p.numel() for n, p in model.named_parameters() if "adapter2" in n)
|
||||
trainable_params2, all_param2 = peft_model.get_nb_trainable_parameters()
|
||||
# remove 2 params because we need to exclude "ranknum" for AdaLora trainable params
|
||||
self.assertEqual(trainable_params2, params_lora + params_loha + params_adalora - 2)
|
||||
self.assertEqual(all_param2, params_base + params_lora + params_loha + params_adalora)
|
||||
assert trainable_params2 == (((params_lora + params_loha) + params_adalora) - 2)
|
||||
assert all_param2 == (((params_base + params_lora) + params_loha) + params_adalora)
|
||||
|
||||
def test_incompatible_config_raises(self):
|
||||
model = SimpleNet().eval().to(self.torch_device)
|
||||
@ -714,7 +714,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
|
||||
config1 = PrefixTuningConfig()
|
||||
msg = "The provided `peft_type` 'PREFIX_TUNING' is not compatible with the `PeftMixedModel`."
|
||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
|
||||
def test_decoder_model(self):
|
||||
@ -735,50 +735,50 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
config0 = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
|
||||
peft_model = get_peft_model(model, config0, "adapter0", mixed=True)
|
||||
output0 = peft_model.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output0).all())
|
||||
self.assertFalse(torch.allclose(output_base, output0))
|
||||
assert torch.isfinite(output0).all()
|
||||
assert not torch.allclose(output_base, output0)
|
||||
|
||||
torch.manual_seed(1)
|
||||
config1 = LoHaConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
||||
peft_model.add_adapter("adapter1", config1)
|
||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||
output1 = peft_model.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output1).all())
|
||||
self.assertFalse(torch.allclose(output0, output1))
|
||||
assert torch.isfinite(output1).all()
|
||||
assert not torch.allclose(output0, output1)
|
||||
|
||||
torch.manual_seed(2)
|
||||
config2 = AdaLoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
|
||||
peft_model.add_adapter("adapter2", config2)
|
||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2"])
|
||||
output2 = peft_model.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output2).all())
|
||||
self.assertFalse(torch.allclose(output1, output2))
|
||||
assert torch.isfinite(output2).all()
|
||||
assert not torch.allclose(output1, output2)
|
||||
|
||||
torch.manual_seed(3)
|
||||
config3 = LoKrConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
||||
peft_model.add_adapter("adapter3", config3)
|
||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3"])
|
||||
output3 = peft_model.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output3).all())
|
||||
self.assertFalse(torch.allclose(output2, output3))
|
||||
assert torch.isfinite(output3).all()
|
||||
assert not torch.allclose(output2, output3)
|
||||
|
||||
torch.manual_seed(4)
|
||||
config4 = OFTConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
||||
peft_model.add_adapter("adapter4", config4)
|
||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
||||
output4 = peft_model.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output4).all())
|
||||
self.assertFalse(torch.allclose(output3, output4))
|
||||
assert torch.isfinite(output4).all()
|
||||
assert not torch.allclose(output3, output4)
|
||||
|
||||
with peft_model.disable_adapter():
|
||||
output_disabled = peft_model.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output_disabled).all())
|
||||
self.assertTrue(torch.allclose(output_base, output_disabled))
|
||||
assert torch.isfinite(output_disabled).all()
|
||||
assert torch.allclose(output_base, output_disabled)
|
||||
|
||||
model_unloaded = peft_model.merge_and_unload()
|
||||
output_unloaded = model_unloaded.generate(**input_dict)
|
||||
self.assertTrue(torch.isfinite(output_unloaded).all())
|
||||
self.assertTrue(torch.allclose(output4, output_unloaded))
|
||||
assert torch.isfinite(output_unloaded).all()
|
||||
assert torch.allclose(output4, output_unloaded)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
# save adapter0 (use normal PeftModel, because PeftMixedModel does not support saving)
|
||||
@ -787,7 +787,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
peft_model = get_peft_model(model, config0, "adapter0")
|
||||
output0_save = peft_model(**input_dict).logits
|
||||
self.assertTrue(torch.isfinite(output0_save).all())
|
||||
assert torch.isfinite(output0_save).all()
|
||||
peft_model.save_pretrained(tmp_dir)
|
||||
|
||||
# save adapter1
|
||||
@ -796,7 +796,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
torch.manual_seed(1)
|
||||
peft_model = get_peft_model(model, config1, "adapter1")
|
||||
output1_save = peft_model(**input_dict).logits
|
||||
self.assertTrue(torch.isfinite(output1_save).all())
|
||||
assert torch.isfinite(output1_save).all()
|
||||
peft_model.save_pretrained(tmp_dir)
|
||||
|
||||
# load adapter0 and adapter1
|
||||
@ -807,6 +807,6 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
||||
output01_loaded = peft_model(**input_dict).logits
|
||||
|
||||
atol, rtol = 1e-3, 1e-3
|
||||
self.assertTrue(torch.isfinite(output01_loaded).all())
|
||||
self.assertFalse(torch.allclose(output0_save, output01_loaded, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(output1_save, output01_loaded, atol=atol, rtol=rtol))
|
||||
assert torch.isfinite(output01_loaded).all()
|
||||
assert not torch.allclose(output0_save, output01_loaded, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(output1_save, output01_loaded, atol=atol, rtol=rtol)
|
||||
|
@ -84,7 +84,7 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||
|
||||
self.assertTrue(not dummy_output.requires_grad)
|
||||
assert not dummy_output.requires_grad
|
||||
|
||||
def test_prepare_for_int8_training(self) -> None:
|
||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||
@ -92,7 +92,7 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
||||
model = model.to(self.torch_device)
|
||||
|
||||
for param in model.parameters():
|
||||
self.assertTrue(not param.requires_grad)
|
||||
assert not param.requires_grad
|
||||
|
||||
model = get_peft_model(model, self._create_multitask_prompt_tuning_config())
|
||||
|
||||
@ -109,7 +109,7 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||
|
||||
self.assertTrue(dummy_output.requires_grad)
|
||||
assert dummy_output.requires_grad
|
||||
|
||||
def test_save_pretrained(self) -> None:
|
||||
seed = 420
|
||||
@ -131,30 +131,28 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||
|
||||
# check if same keys
|
||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
||||
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||
|
||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||
self.assertEqual(len(list(state_dict.keys())), 3)
|
||||
assert len(state_dict) == 3
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
# check if `adapter_model.safetensors` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
|
||||
# check if `pytorch_model.bin` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
|
||||
def test_save_pretrained_regression(self) -> None:
|
||||
seed = 420
|
||||
@ -176,30 +174,28 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||
|
||||
# check if same keys
|
||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
||||
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||
|
||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||
self.assertEqual(len(list(state_dict.keys())), 3)
|
||||
assert len(state_dict) == 3
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
# check if `adapter_model.bin` is present for regression
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin"))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
|
||||
# check if `pytorch_model.bin` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
|
||||
def test_generate(self) -> None:
|
||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||
|
@ -54,7 +54,7 @@ class TestPoly(unittest.TestCase):
|
||||
|
||||
# generate some dummy data
|
||||
text = os.__doc__.splitlines()
|
||||
self.assertTrue(len(text) > 10)
|
||||
assert len(text) > 10
|
||||
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
||||
inputs["task_ids"] = torch.arange(len(text)) % n_tasks
|
||||
inputs["labels"] = tokenizer((["A", "B"] * 100)[: len(text)], return_tensors="pt")["input_ids"]
|
||||
@ -72,7 +72,7 @@ class TestPoly(unittest.TestCase):
|
||||
losses.append(loss.item())
|
||||
|
||||
# loss improved by at least 50%
|
||||
self.assertLess(losses[-1], 0.5 * losses[0])
|
||||
assert losses[-1] < (0.5 * losses[0])
|
||||
|
||||
# check that saving and loading works
|
||||
torch.manual_seed(0)
|
||||
@ -84,8 +84,8 @@ class TestPoly(unittest.TestCase):
|
||||
logits_disabled = model(**inputs).logits
|
||||
tokens_disabled = model.generate(**inputs)
|
||||
|
||||
self.assertFalse(torch.allclose(logits_before, logits_disabled, atol=atol, rtol=rtol))
|
||||
self.assertFalse(torch.allclose(tokens_before, tokens_disabled, atol=atol, rtol=rtol))
|
||||
assert not torch.allclose(logits_before, logits_disabled, atol=atol, rtol=rtol)
|
||||
assert not torch.allclose(tokens_before, tokens_disabled, atol=atol, rtol=rtol)
|
||||
|
||||
# saving and loading
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
@ -96,5 +96,5 @@ class TestPoly(unittest.TestCase):
|
||||
torch.manual_seed(0)
|
||||
output_after = loaded(**inputs).logits
|
||||
tokens_after = loaded.generate(**inputs)
|
||||
self.assertTrue(torch.allclose(logits_before, output_after, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(tokens_before, tokens_after, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(logits_before, output_after, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(tokens_before, tokens_after, atol=atol, rtol=rtol)
|
||||
|
@ -152,7 +152,7 @@ class StableDiffusionModelTester(TestCase, PeftCommonTester):
|
||||
merged_output = np.array(model(**dummy_input).images[0]).astype(np.float32)
|
||||
|
||||
# Images are in uint8 drange, so use large atol
|
||||
self.assertTrue(np.allclose(peft_output, merged_output, atol=1.0))
|
||||
assert np.allclose(peft_output, merged_output, atol=1.0)
|
||||
|
||||
@parameterized.expand(
|
||||
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
||||
@ -184,7 +184,7 @@ class StableDiffusionModelTester(TestCase, PeftCommonTester):
|
||||
merged_output = np.array(model(**dummy_input).images[0]).astype(np.float32)
|
||||
|
||||
# Images are in uint8 drange, so use large atol
|
||||
self.assertTrue(np.allclose(peft_output, merged_output, atol=1.0))
|
||||
assert np.allclose(peft_output, merged_output, atol=1.0)
|
||||
|
||||
@parameterized.expand(
|
||||
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
||||
@ -210,10 +210,8 @@ class StableDiffusionModelTester(TestCase, PeftCommonTester):
|
||||
model.unet.add_weighted_adapter([unet_adapter_name], [0.5], "weighted_adapter_test")
|
||||
|
||||
# Assert that base adapters config did not change
|
||||
self.assertTrue(
|
||||
asdict(text_encoder_adapter_config) == asdict(model.text_encoder.peft_config[text_encoder_adapter_name])
|
||||
)
|
||||
self.assertTrue(asdict(unet_adapter_config) == asdict(model.unet.peft_config[unet_adapter_name]))
|
||||
assert asdict(text_encoder_adapter_config) == asdict(model.text_encoder.peft_config[text_encoder_adapter_name])
|
||||
assert asdict(unet_adapter_config) == asdict(model.unet.peft_config[unet_adapter_name])
|
||||
|
||||
@parameterized.expand(
|
||||
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
||||
|
@ -17,6 +17,7 @@
|
||||
import unittest
|
||||
from copy import deepcopy
|
||||
|
||||
import pytest
|
||||
from diffusers import StableDiffusionPipeline
|
||||
from parameterized import parameterized
|
||||
from torch import nn
|
||||
@ -175,7 +176,7 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
||||
layers_to_transform=layers_to_transform,
|
||||
)
|
||||
actual_result = bool(check_target_module_exists(config, key))
|
||||
self.assertEqual(actual_result, expected_result)
|
||||
assert actual_result == expected_result
|
||||
|
||||
def test_module_matching_lora(self):
|
||||
# peft models that have a module matching method to inspect the matching modules to allow
|
||||
@ -197,12 +198,12 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
||||
"h.3.self_attention.query_key_value",
|
||||
"h.4.self_attention.query_key_value",
|
||||
]
|
||||
self.assertEqual(matched, expected) # module lists should match exactly
|
||||
assert matched == expected # module lists should match exactly
|
||||
|
||||
# no overlap with matched modules
|
||||
unmatched = output["unmatched"]
|
||||
for key in expected:
|
||||
self.assertFalse(key in unmatched)
|
||||
assert key not in unmatched
|
||||
|
||||
def test_feedforward_matching_ia3(self):
|
||||
model_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
|
||||
@ -227,14 +228,14 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
||||
"encoder.block.0.layer.1.DenseReluDense.wi",
|
||||
"encoder.block.0.layer.1.DenseReluDense.wo",
|
||||
]
|
||||
self.assertEqual(matched, expected) # not required since we do similar checks above, but just to be sure
|
||||
assert matched == expected # not required since we do similar checks above, but just to be sure
|
||||
module_dict = dict(model.named_modules())
|
||||
for key in matched:
|
||||
module = module_dict[key]
|
||||
if key in expected_feedforward:
|
||||
self.assertTrue(module.is_feedforward)
|
||||
assert module.is_feedforward
|
||||
else: # other IA3 modules should not be marked as feedforward
|
||||
self.assertFalse(module.is_feedforward)
|
||||
assert not module.is_feedforward
|
||||
|
||||
@parameterized.expand(MAYBE_INCLUDE_ALL_LINEAR_LAYERS_TEST_CASES)
|
||||
def test_maybe_include_all_linear_layers_lora(
|
||||
@ -277,7 +278,7 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
||||
# compare the two models and assert that all layers are of the same type
|
||||
for name, actual_module in actual_model.named_modules():
|
||||
expected_module = expected_model_module_dict[name]
|
||||
self.assertEqual(type(actual_module), type(expected_module))
|
||||
assert type(actual_module) == type(expected_module)
|
||||
|
||||
def test_maybe_include_all_linear_layers_ia3_loha(self):
|
||||
model_id, initial_target_modules, expected_target_modules = (
|
||||
@ -302,17 +303,17 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
||||
new_config = _maybe_include_all_linear_layers(config, model)
|
||||
if isinstance(expected_target_modules, list):
|
||||
# assert that expected and actual target_modules have the same items
|
||||
self.assertCountEqual(new_config.target_modules, expected_target_modules)
|
||||
assert set(new_config.target_modules) == set(expected_target_modules)
|
||||
else:
|
||||
self.assertEqual(new_config.target_modules, expected_target_modules)
|
||||
assert new_config.target_modules == expected_target_modules
|
||||
|
||||
def test_maybe_include_all_linear_layers_diffusion(self):
|
||||
model_id = "hf-internal-testing/tiny-stable-diffusion-torch"
|
||||
model = StableDiffusionPipeline.from_pretrained(model_id)
|
||||
config = LoraConfig(base_model_name_or_path=model_id, target_modules="all-linear")
|
||||
with self.assertRaisesRegex(
|
||||
with pytest.raises(
|
||||
ValueError,
|
||||
"Only instances of PreTrainedModel support `target_modules='all-linear'`",
|
||||
match="Only instances of PreTrainedModel support `target_modules='all-linear'`",
|
||||
):
|
||||
model.unet = get_peft_model(model.unet, config)
|
||||
|
||||
@ -336,32 +337,32 @@ class TestTargetedModuleNames(unittest.TestCase):
|
||||
def test_one_targeted_module_regex(self):
|
||||
model = MLP()
|
||||
model = get_peft_model(model, LoraConfig(target_modules="lin0"))
|
||||
self.assertEqual(model.targeted_module_names, ["lin0"])
|
||||
assert model.targeted_module_names == ["lin0"]
|
||||
|
||||
def test_two_targeted_module_regex(self):
|
||||
model = MLP()
|
||||
model = get_peft_model(model, LoraConfig(target_modules="lin.*"))
|
||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
||||
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||
|
||||
def test_one_targeted_module_list(self):
|
||||
model = MLP()
|
||||
model = get_peft_model(model, LoraConfig(target_modules=["lin0"]))
|
||||
self.assertEqual(model.targeted_module_names, ["lin0"])
|
||||
assert model.targeted_module_names == ["lin0"]
|
||||
|
||||
def test_two_targeted_module_list(self):
|
||||
model = MLP()
|
||||
model = get_peft_model(model, LoraConfig(target_modules=["lin0", "lin1"]))
|
||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
||||
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||
|
||||
def test_ia3_targeted_module_regex(self):
|
||||
model = MLP()
|
||||
model = get_peft_model(model, IA3Config(target_modules=".*lin.*", feedforward_modules=".*lin.*"))
|
||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
||||
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||
|
||||
def test_ia3_targeted_module_list(self):
|
||||
model = MLP()
|
||||
model = get_peft_model(model, IA3Config(target_modules=["lin0", "lin1"], feedforward_modules=["lin0", "lin1"]))
|
||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
||||
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||
|
||||
def test_realistic_example(self):
|
||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-BloomForCausalLM")
|
||||
@ -370,4 +371,4 @@ class TestTargetedModuleNames(unittest.TestCase):
|
||||
expected = [
|
||||
f"transformer.h.{i}.self_attention.query_key_value" for i in range(len(model.base_model.transformer.h))
|
||||
]
|
||||
self.assertEqual(model.targeted_module_names, expected)
|
||||
assert model.targeted_module_names == expected
|
||||
|
@ -20,6 +20,7 @@ import tempfile
|
||||
from collections import OrderedDict
|
||||
from dataclasses import replace
|
||||
|
||||
import pytest
|
||||
import torch
|
||||
import yaml
|
||||
from diffusers import StableDiffusionPipeline
|
||||
@ -172,27 +173,27 @@ class PeftCommonTester:
|
||||
def check_modelcard(self, tmp_dirname, model):
|
||||
# check the generated README.md
|
||||
filename = os.path.join(tmp_dirname, "README.md")
|
||||
self.assertTrue(os.path.exists(filename))
|
||||
assert os.path.exists(filename)
|
||||
with open(filename, encoding="utf-8") as f:
|
||||
readme = f.read()
|
||||
metainfo = re.search(r"---\n(.*?)\n---", readme, re.DOTALL).group(1)
|
||||
dct = yaml.safe_load(metainfo)
|
||||
self.assertEqual(dct["library_name"], "peft")
|
||||
assert dct["library_name"] == "peft"
|
||||
|
||||
if hasattr(model, "config"):
|
||||
self.assertEqual(dct["base_model"], model.config.to_dict()["_name_or_path"])
|
||||
assert dct["base_model"] == model.config.to_dict()["_name_or_path"]
|
||||
else: # a custom model
|
||||
self.assertTrue("base_model" not in dct)
|
||||
assert "base_model" not in dct
|
||||
|
||||
def check_config_json(self, tmp_dirname, model):
|
||||
# check the generated config.json
|
||||
filename = os.path.join(tmp_dirname, "adapter_config.json")
|
||||
self.assertTrue(os.path.exists(filename))
|
||||
assert os.path.exists(filename)
|
||||
with open(filename, encoding="utf-8") as f:
|
||||
config = json.load(f)
|
||||
|
||||
if hasattr(model, "config"): # custom models don't have a config attribute
|
||||
self.assertEqual(config["base_model_name_or_path"], model.config.to_dict()["_name_or_path"])
|
||||
assert config["base_model_name_or_path"] == model.config.to_dict()["_name_or_path"]
|
||||
|
||||
def _test_model_attr(self, model_id, config_cls, config_kwargs):
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
@ -202,9 +203,9 @@ class PeftCommonTester:
|
||||
)
|
||||
model = get_peft_model(model, config)
|
||||
|
||||
self.assertTrue(hasattr(model, "save_pretrained"))
|
||||
self.assertTrue(hasattr(model, "from_pretrained"))
|
||||
self.assertTrue(hasattr(model, "push_to_hub"))
|
||||
assert hasattr(model, "save_pretrained")
|
||||
assert hasattr(model, "from_pretrained")
|
||||
assert hasattr(model, "push_to_hub")
|
||||
|
||||
def _test_adapter_name(self, model_id, config_cls, config_kwargs):
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
@ -219,7 +220,7 @@ class PeftCommonTester:
|
||||
correctly_converted = True
|
||||
break
|
||||
|
||||
self.assertTrue(correctly_converted)
|
||||
assert correctly_converted
|
||||
|
||||
def _test_prepare_for_training(self, model_id, config_cls, config_kwargs):
|
||||
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
||||
@ -232,14 +233,14 @@ class PeftCommonTester:
|
||||
dummy_input = self.prepare_inputs_for_testing()
|
||||
dummy_output = model.get_input_embeddings()(dummy_input["input_ids"])
|
||||
|
||||
self.assertFalse(dummy_output.requires_grad)
|
||||
assert not dummy_output.requires_grad
|
||||
|
||||
# load with `prepare_model_for_int8_training`
|
||||
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
||||
model = prepare_model_for_int8_training(model)
|
||||
|
||||
for param in model.parameters():
|
||||
self.assertFalse(param.requires_grad)
|
||||
assert not param.requires_grad
|
||||
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
@ -260,7 +261,7 @@ class PeftCommonTester:
|
||||
dummy_input = self.prepare_inputs_for_testing()
|
||||
dummy_output = model.get_input_embeddings()(dummy_input["input_ids"])
|
||||
|
||||
self.assertTrue(dummy_output.requires_grad)
|
||||
assert dummy_output.requires_grad
|
||||
|
||||
def _test_save_pretrained(self, model_id, config_cls, config_kwargs, safe_serialization=True):
|
||||
# ensure that the weights are randomly initialized
|
||||
@ -301,25 +302,23 @@ class PeftCommonTester:
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
target_adapter_filename = "adapter_model.safetensors" if safe_serialization else "adapter_model.bin"
|
||||
|
||||
# check if `adapter_model.safetensors` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, target_adapter_filename)))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, target_adapter_filename))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
|
||||
# check if `model.safetensors` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
|
||||
self.check_modelcard(tmp_dirname, model)
|
||||
self.check_config_json(tmp_dirname, model)
|
||||
@ -376,33 +375,31 @@ class PeftCommonTester:
|
||||
state_dict_from_pretrained = get_state_dict(model_from_pretrained, unwrap_compiled=True)
|
||||
|
||||
# check if same keys
|
||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
||||
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||
|
||||
# check if tensors equal
|
||||
for key in state_dict.keys():
|
||||
self.assertTrue(
|
||||
torch.allclose(
|
||||
assert torch.allclose(
|
||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||
)
|
||||
)
|
||||
|
||||
target_adapter_filename = "adapter_model.safetensors" if safe_serialization else "adapter_model.bin"
|
||||
|
||||
# check if `adapter_model.safetensors` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, target_adapter_filename)))
|
||||
self.assertTrue(os.path.exists(os.path.join(new_adapter_dir, target_adapter_filename)))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, target_adapter_filename))
|
||||
assert os.path.exists(os.path.join(new_adapter_dir, target_adapter_filename))
|
||||
|
||||
# check if `adapter_config.json` is present
|
||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
||||
self.assertTrue(os.path.exists(os.path.join(new_adapter_dir, "adapter_config.json")))
|
||||
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||
assert os.path.exists(os.path.join(new_adapter_dir, "adapter_config.json"))
|
||||
|
||||
# check if `model.safetensors` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
||||
self.assertFalse(os.path.exists(os.path.join(new_adapter_dir, "model.safetensors")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||
assert not os.path.exists(os.path.join(new_adapter_dir, "model.safetensors"))
|
||||
|
||||
# check if `config.json` is not present
|
||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
||||
self.assertFalse(os.path.exists(os.path.join(new_adapter_dir, "config.json")))
|
||||
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||
assert not os.path.exists(os.path.join(new_adapter_dir, "config.json"))
|
||||
|
||||
self.check_modelcard(tmp_dirname, model)
|
||||
self.check_config_json(tmp_dirname, model)
|
||||
@ -413,8 +410,8 @@ class PeftCommonTester:
|
||||
model_from_pretrained = self.transformers_class.from_pretrained(model_id)
|
||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
||||
|
||||
self.assertTrue("default" in model_from_pretrained.peft_config.keys())
|
||||
self.assertTrue("new_adapter" not in model_from_pretrained.peft_config.keys())
|
||||
assert "default" in model_from_pretrained.peft_config.keys()
|
||||
assert "new_adapter" not in model_from_pretrained.peft_config.keys()
|
||||
|
||||
def _test_from_pretrained_config_construction(self, model_id, config_cls, config_kwargs):
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
@ -430,8 +427,8 @@ class PeftCommonTester:
|
||||
model_from_pretrained, tmp_dirname, is_trainable=False, config=config
|
||||
)
|
||||
|
||||
self.assertTrue(model_from_pretrained.peft_config["default"].inference_mode)
|
||||
self.assertIs(model_from_pretrained.peft_config["default"], config)
|
||||
assert model_from_pretrained.peft_config["default"].inference_mode
|
||||
assert model_from_pretrained.peft_config["default"] is config
|
||||
|
||||
def _test_merge_layers_fp16(self, model_id, config_cls, config_kwargs):
|
||||
if config_cls not in (LoraConfig, IA3Config):
|
||||
@ -479,7 +476,7 @@ class PeftCommonTester:
|
||||
model = model.merge_and_unload()
|
||||
logits_merged = model(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_unmerged, logits_merged, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_unmerged, logits_merged, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
config = config_cls(
|
||||
@ -493,26 +490,20 @@ class PeftCommonTester:
|
||||
if "lora_A" in name or "ia3" in name or "lora_E" in name or "lora_B" in name:
|
||||
module.data[0] = torch.nan
|
||||
|
||||
with self.assertRaises(ValueError) as error_context:
|
||||
with pytest.raises(
|
||||
ValueError, match="NaNs detected in the merged weights. The adapter default seems to be broken"
|
||||
):
|
||||
model = model.merge_and_unload(safe_merge=True)
|
||||
|
||||
self.assertEqual(
|
||||
str(error_context.exception),
|
||||
"NaNs detected in the merged weights. The adapter default seems to be broken",
|
||||
)
|
||||
|
||||
for name, module in model.named_parameters():
|
||||
if "lora_A" in name or "ia3" in name or "lora_E" in name or "lora_B" in name:
|
||||
module.data[0] = torch.inf
|
||||
|
||||
with self.assertRaises(ValueError) as error_context:
|
||||
with pytest.raises(
|
||||
ValueError, match="NaNs detected in the merged weights. The adapter default seems to be broken"
|
||||
):
|
||||
model = model.merge_and_unload(safe_merge=True)
|
||||
|
||||
self.assertEqual(
|
||||
str(error_context.exception),
|
||||
"NaNs detected in the merged weights. The adapter default seems to be broken",
|
||||
)
|
||||
|
||||
def _test_merge_layers(self, model_id, config_cls, config_kwargs):
|
||||
if issubclass(config_cls, PromptLearningConfig):
|
||||
return
|
||||
@ -543,15 +534,15 @@ class PeftCommonTester:
|
||||
if (config.peft_type == "IA3") and (model_id == "Conv2d"):
|
||||
# for some reason, the IA³ Conv2d introduces a larger error
|
||||
atol, rtol = 0.3, 0.01
|
||||
self.assertTrue(torch.allclose(logits, logits_merged, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(logits, logits_unmerged, atol=atol, rtol=rtol))
|
||||
self.assertTrue(torch.allclose(logits, logits_merged_unloaded, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(logits, logits_merged, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(logits, logits_unmerged, atol=atol, rtol=rtol)
|
||||
assert torch.allclose(logits, logits_merged_unloaded, atol=atol, rtol=rtol)
|
||||
|
||||
# For this test to work, weights should not be initialized to identity transform (e.g.
|
||||
# init_lora_weights should be False).
|
||||
transformers_model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
||||
logits_transformers = transformers_model(**dummy_input)[0]
|
||||
self.assertFalse(torch.allclose(logits_merged, logits_transformers, atol=1e-10, rtol=1e-10))
|
||||
assert not torch.allclose(logits_merged, logits_transformers, atol=1e-10, rtol=1e-10)
|
||||
|
||||
# test that the logits are identical after a save-load-roundtrip
|
||||
if hasattr(model, "save_pretrained"):
|
||||
@ -564,7 +555,7 @@ class PeftCommonTester:
|
||||
model_from_pretrained = pickle.loads(pickle.dumps(model))
|
||||
|
||||
logits_merged_from_pretrained = model_from_pretrained(**dummy_input)[0]
|
||||
self.assertTrue(torch.allclose(logits_merged, logits_merged_from_pretrained, atol=atol, rtol=rtol))
|
||||
assert torch.allclose(logits_merged, logits_merged_from_pretrained, atol=atol, rtol=rtol)
|
||||
|
||||
def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
|
||||
supported_peft_types = [PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.IA3, PeftType.OFT]
|
||||
@ -598,14 +589,14 @@ class PeftCommonTester:
|
||||
with torch.inference_mode():
|
||||
logits_adapter_2 = model(**dummy_input)[0]
|
||||
|
||||
self.assertFalse(torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
||||
assert not torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model.set_adapter("default")
|
||||
|
||||
with torch.inference_mode():
|
||||
logits_adapter_1_after_set = model(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model_copy = copy.deepcopy(model)
|
||||
model_copy_2 = copy.deepcopy(model)
|
||||
@ -614,22 +605,22 @@ class PeftCommonTester:
|
||||
with torch.inference_mode():
|
||||
logits_merged_all = model_merged_all(**dummy_input)[0]
|
||||
|
||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
||||
assert not torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||
assert not torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model_merged_adapter_2 = model_copy.merge_and_unload(adapter_names=["adapter-2"])
|
||||
|
||||
with torch.inference_mode():
|
||||
logits_merged_adapter_2 = model_merged_adapter_2(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||
|
||||
model_merged_adapter_default = model_copy_2.merge_and_unload(adapter_names=["default"])
|
||||
|
||||
with torch.inference_mode():
|
||||
logits_merged_adapter_default = model_merged_adapter_default(**dummy_input)[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
||||
assert torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||
|
||||
def _test_merge_layers_is_idempotent(self, model_id, config_cls, config_kwargs):
|
||||
if ("gpt2" in model_id.lower()) and (config_cls != LoraConfig):
|
||||
@ -650,11 +641,11 @@ class PeftCommonTester:
|
||||
|
||||
# merging again should not change anything
|
||||
# also check warning:
|
||||
with self.assertWarnsRegex(UserWarning, "All adapters are already merged, nothing to do"):
|
||||
with pytest.warns(UserWarning, match="All adapters are already merged, nothing to do"):
|
||||
model.merge_adapter()
|
||||
logits_1 = model(**self.prepare_inputs_for_testing())[0]
|
||||
|
||||
self.assertTrue(torch.allclose(logits_0, logits_1, atol=1e-6, rtol=1e-6))
|
||||
assert torch.allclose(logits_0, logits_1, atol=1e-6, rtol=1e-6)
|
||||
|
||||
def _test_generate(self, model_id, config_cls, config_kwargs):
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
@ -681,7 +672,7 @@ class PeftCommonTester:
|
||||
|
||||
inputs = self.prepare_inputs_for_testing()
|
||||
if raises_err:
|
||||
with self.assertRaises(TypeError):
|
||||
with pytest.raises(TypeError):
|
||||
# check if `generate` raises an error if positional arguments are passed
|
||||
_ = model.generate(inputs["input_ids"])
|
||||
else:
|
||||
@ -719,7 +710,7 @@ class PeftCommonTester:
|
||||
model = get_peft_model(model, config)
|
||||
model = model.half()
|
||||
|
||||
self.assertEqual(model.base_model_torch_dtype, torch.float16)
|
||||
assert model.base_model_torch_dtype == torch.float16
|
||||
|
||||
def _test_training(self, model_id, config_cls, config_kwargs):
|
||||
if issubclass(config_cls, PromptLearningConfig):
|
||||
@ -745,9 +736,9 @@ class PeftCommonTester:
|
||||
parameter_prefix = model.prefix
|
||||
for n, param in model.named_parameters():
|
||||
if (parameter_prefix in n) or ("modules_to_save" in n):
|
||||
self.assertIsNotNone(param.grad)
|
||||
assert param.grad is not None
|
||||
else:
|
||||
self.assertIsNone(param.grad)
|
||||
assert param.grad is None
|
||||
|
||||
def _test_inference_safetensors(self, model_id, config_cls, config_kwargs):
|
||||
if (config_cls == PrefixTuningConfig) and ("deberta" in model_id.lower()):
|
||||
@ -778,14 +769,14 @@ class PeftCommonTester:
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname, safe_serialization=True)
|
||||
self.assertTrue("adapter_model.safetensors" in os.listdir(tmp_dirname))
|
||||
self.assertTrue("adapter_model.bin" not in os.listdir(tmp_dirname))
|
||||
assert "adapter_model.safetensors" in os.listdir(tmp_dirname)
|
||||
assert "adapter_model.bin" not in os.listdir(tmp_dirname)
|
||||
|
||||
model_from_pretrained = self.transformers_class.from_pretrained(model_id)
|
||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname).to(self.torch_device)
|
||||
|
||||
logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
|
||||
self.assertTrue(torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4))
|
||||
assert torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4)
|
||||
|
||||
def _test_training_layer_indexing(self, model_id, config_cls, config_kwargs):
|
||||
if config_cls not in (LoraConfig,):
|
||||
@ -813,10 +804,10 @@ class PeftCommonTester:
|
||||
|
||||
for n, param in model.named_parameters():
|
||||
if "lora" in n:
|
||||
self.assertIsNotNone(param.grad)
|
||||
assert param.grad is not None
|
||||
nb_trainable += 1
|
||||
else:
|
||||
self.assertIsNone(param.grad)
|
||||
assert param.grad is None
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||
model.save_pretrained(tmp_dirname)
|
||||
@ -825,7 +816,7 @@ class PeftCommonTester:
|
||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname).to(self.torch_device)
|
||||
|
||||
logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
|
||||
self.assertTrue(torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4))
|
||||
assert torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4)
|
||||
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
config = config_cls(
|
||||
@ -839,7 +830,7 @@ class PeftCommonTester:
|
||||
if "lora" in n:
|
||||
nb_trainable_all += 1
|
||||
|
||||
self.assertLess(nb_trainable, nb_trainable_all)
|
||||
assert nb_trainable < nb_trainable_all
|
||||
|
||||
def _test_training_gradient_checkpointing(self, model_id, config_cls, config_kwargs):
|
||||
if issubclass(config_cls, PromptLearningConfig):
|
||||
@ -872,9 +863,9 @@ class PeftCommonTester:
|
||||
parameter_prefix = "ia3" if config_cls == IA3Config else "lora"
|
||||
for n, param in model.named_parameters():
|
||||
if parameter_prefix in n:
|
||||
self.assertIsNotNone(param.grad)
|
||||
assert param.grad is not None
|
||||
else:
|
||||
self.assertIsNone(param.grad)
|
||||
assert param.grad is None
|
||||
|
||||
def _test_peft_model_device_map(self, model_id, config_cls, config_kwargs):
|
||||
if config_cls not in (LoraConfig,):
|
||||
@ -919,7 +910,7 @@ class PeftCommonTester:
|
||||
|
||||
# check that prompt encoder has grads
|
||||
for param in model.prompt_encoder.parameters():
|
||||
self.assertIsNotNone(param.grad)
|
||||
assert param.grad is not None
|
||||
|
||||
def _test_delete_adapter(self, model_id, config_cls, config_kwargs):
|
||||
supported_peft_types = [PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.IA3, PeftType.OFT]
|
||||
@ -939,20 +930,20 @@ class PeftCommonTester:
|
||||
model.set_adapter(adapter_to_delete)
|
||||
model = model.to(self.torch_device)
|
||||
model.delete_adapter(adapter_to_delete)
|
||||
self.assertFalse(adapter_to_delete in model.peft_config)
|
||||
self.assertEqual(model.active_adapters, ["default"])
|
||||
assert adapter_to_delete not in model.peft_config
|
||||
assert model.active_adapters == ["default"]
|
||||
|
||||
key_list = [key for key, _ in model.named_modules()]
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(model, key)
|
||||
attributes_to_check = getattr(target, "adapter_layer_names", []) + getattr(target, "other_param_names", [])
|
||||
for attr in attributes_to_check:
|
||||
self.assertFalse(adapter_to_delete in getattr(target, attr))
|
||||
assert adapter_to_delete not in getattr(target, attr)
|
||||
|
||||
# check that we can also delete the last remaining adapter
|
||||
model.delete_adapter("default")
|
||||
self.assertFalse("default" in model.peft_config)
|
||||
self.assertEqual(model.active_adapters, [])
|
||||
assert "default" not in model.peft_config
|
||||
assert model.active_adapters == []
|
||||
|
||||
input = self.prepare_inputs_for_testing()
|
||||
# note: we cannot call model(**input) because PeftModel always expects there to be at least one adapter
|
||||
@ -977,20 +968,20 @@ class PeftCommonTester:
|
||||
# "delete_me" is added but not activated
|
||||
model = model.to(self.torch_device)
|
||||
model.delete_adapter(adapter_to_delete)
|
||||
self.assertFalse(adapter_to_delete in model.peft_config)
|
||||
self.assertEqual(model.active_adapters, ["default"])
|
||||
assert adapter_to_delete not in model.peft_config
|
||||
assert model.active_adapters == ["default"]
|
||||
|
||||
key_list = [key for key, _ in model.named_modules()]
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(model, key)
|
||||
attributes_to_check = getattr(target, "adapter_layer_names", []) + getattr(target, "other_param_names", [])
|
||||
for attr in attributes_to_check:
|
||||
self.assertFalse(adapter_to_delete in getattr(target, attr))
|
||||
assert adapter_to_delete not in getattr(target, attr)
|
||||
|
||||
# check that we can also delete the last remaining adapter
|
||||
model.delete_adapter("default")
|
||||
self.assertFalse("default" in model.peft_config)
|
||||
self.assertEqual(model.active_adapters, [])
|
||||
assert "default" not in model.peft_config
|
||||
assert model.active_adapters == []
|
||||
|
||||
input = self.prepare_inputs_for_testing()
|
||||
# note: we cannot call model(**input) because PeftModel always expects there to be at least one adapter
|
||||
@ -1006,7 +997,7 @@ class PeftCommonTester:
|
||||
model = model.to(self.torch_device)
|
||||
|
||||
if config.peft_type not in ("LORA", "ADALORA", "IA3"):
|
||||
with self.assertRaises(AttributeError):
|
||||
with pytest.raises(AttributeError):
|
||||
model = model.unload()
|
||||
else:
|
||||
dummy_input = self.prepare_inputs_for_testing()
|
||||
@ -1019,8 +1010,8 @@ class PeftCommonTester:
|
||||
model = model.unload()
|
||||
logits_unload = model(**dummy_input)[0]
|
||||
|
||||
self.assertFalse(torch.allclose(logits_with_adapter, logits_unload, atol=1e-10, rtol=1e-10))
|
||||
self.assertTrue(torch.allclose(logits_transformers, logits_unload, atol=1e-4, rtol=1e-4))
|
||||
assert not torch.allclose(logits_with_adapter, logits_unload, atol=1e-10, rtol=1e-10)
|
||||
assert torch.allclose(logits_transformers, logits_unload, atol=1e-4, rtol=1e-4)
|
||||
|
||||
def _test_weighted_combination_of_adapters(self, model_id, config_cls, config_kwargs):
|
||||
if issubclass(config_cls, AdaLoraConfig):
|
||||
@ -1116,7 +1107,7 @@ class PeftCommonTester:
|
||||
combination_type="linear",
|
||||
)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
with pytest.raises(ValueError):
|
||||
model.add_weighted_adapter(
|
||||
adapter_list[1:],
|
||||
weight_list[1:],
|
||||
@ -1124,7 +1115,7 @@ class PeftCommonTester:
|
||||
combination_type="linear",
|
||||
)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
with pytest.raises(ValueError):
|
||||
model.add_weighted_adapter(
|
||||
adapter_list[1:],
|
||||
weight_list[1:],
|
||||
@ -1133,7 +1124,7 @@ class PeftCommonTester:
|
||||
density=0.5,
|
||||
)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
with pytest.raises(ValueError):
|
||||
model.add_weighted_adapter(
|
||||
adapter_list[1:],
|
||||
weight_list[1:],
|
||||
@ -1142,7 +1133,7 @@ class PeftCommonTester:
|
||||
density=0.5,
|
||||
)
|
||||
|
||||
with self.assertRaises(ValueError):
|
||||
with pytest.raises(ValueError):
|
||||
model.add_weighted_adapter(
|
||||
adapter_list[1:],
|
||||
weight_list[1:],
|
||||
@ -1165,7 +1156,7 @@ class PeftCommonTester:
|
||||
"multi_adapter_dare_ties_reweighting",
|
||||
]
|
||||
for new_adapter in new_adapters:
|
||||
self.assertTrue(new_adapter in model.peft_config)
|
||||
assert new_adapter in model.peft_config
|
||||
|
||||
key_list = [key for key, _ in model.named_modules()]
|
||||
for key in key_list:
|
||||
@ -1175,23 +1166,21 @@ class PeftCommonTester:
|
||||
if "single" in adapter_name:
|
||||
new_delta_weight = target.get_delta_weight(adapter_name)
|
||||
weighted_original_delta_weights = target.get_delta_weight(adapter_list[0]) * weight_list[0]
|
||||
self.assertTrue(
|
||||
torch.allclose(new_delta_weight, weighted_original_delta_weights, atol=1e-4, rtol=1e-4)
|
||||
)
|
||||
assert torch.allclose(new_delta_weight, weighted_original_delta_weights, atol=1e-4, rtol=1e-4)
|
||||
elif "svd" in adapter_name:
|
||||
self.assertTrue(target.r[adapter_name] == 20)
|
||||
assert target.r[adapter_name] == 20
|
||||
elif "linear" in adapter_name:
|
||||
self.assertTrue(target.r[adapter_name] == 8)
|
||||
assert target.r[adapter_name] == 8
|
||||
elif "cat" in adapter_name:
|
||||
self.assertTrue(target.r[adapter_name] == 28)
|
||||
assert target.r[adapter_name] == 28
|
||||
|
||||
dummy_input = self.prepare_inputs_for_testing()
|
||||
model.eval()
|
||||
for adapter_name in new_adapters:
|
||||
# ensuring new adapters pass the forward loop
|
||||
model.set_adapter(adapter_name)
|
||||
self.assertTrue(model.active_adapter == adapter_name)
|
||||
self.assertTrue(model.active_adapters == [adapter_name])
|
||||
assert model.active_adapter == adapter_name
|
||||
assert model.active_adapters == [adapter_name]
|
||||
model(**dummy_input)[0]
|
||||
|
||||
def _test_disable_adapter(self, model_id, config_cls, config_kwargs):
|
||||
@ -1243,9 +1232,9 @@ class PeftCommonTester:
|
||||
# must be False
|
||||
if isinstance(peft_model, StableDiffusionPipeline):
|
||||
# for SD, check that most pixels have different values
|
||||
self.assertTrue((output_before != output_peft).float().mean() > 0.8)
|
||||
assert (output_before != output_peft).float().mean() > 0.8
|
||||
else:
|
||||
self.assertFalse(torch.allclose(output_before, output_peft))
|
||||
assert not torch.allclose(output_before, output_peft)
|
||||
|
||||
# output with DISABLED ADAPTER
|
||||
if isinstance(peft_model, StableDiffusionPipeline):
|
||||
@ -1253,11 +1242,11 @@ class PeftCommonTester:
|
||||
with peft_model.text_encoder.disable_adapter():
|
||||
output_peft_disabled = get_output(peft_model)
|
||||
# for SD, very rarely, a pixel can differ
|
||||
self.assertTrue((output_before != output_peft_disabled).float().mean() < 1e-4)
|
||||
assert (output_before != output_peft_disabled).float().mean() < 1e-4
|
||||
else:
|
||||
with peft_model.disable_adapter():
|
||||
output_peft_disabled = get_output(peft_model)
|
||||
self.assertTrue(torch.allclose(output_before, output_peft_disabled, atol=1e-6, rtol=1e-6))
|
||||
assert torch.allclose(output_before, output_peft_disabled, atol=1e-6, rtol=1e-6)
|
||||
|
||||
# TODO: add tests to check if disabling adapters works after calling merge_adapter
|
||||
|
||||
@ -1276,12 +1265,12 @@ class PeftCommonTester:
|
||||
|
||||
model = self.transformers_class.from_pretrained(model_id)
|
||||
model = get_peft_model(model, config, "adapter0")
|
||||
with self.assertRaises(ValueError):
|
||||
with pytest.raises(ValueError):
|
||||
model.add_adapter("adapter1", replace(config, r=20))
|
||||
|
||||
# (superficial) test that the model is not left in a half-initialized state when adding an adapter fails
|
||||
self.assertFalse("adapter1" in model.peft_config)
|
||||
self.assertFalse("adapter1" in model.base_model.peft_config)
|
||||
assert "adapter1" not in model.peft_config
|
||||
assert "adapter1" not in model.base_model.peft_config
|
||||
|
||||
def _test_passing_input_embeds_works(self, test_name, model_id, config_cls, config_kwargs):
|
||||
# https://github.com/huggingface/peft/issues/727
|
||||
|
Reference in New Issue
Block a user