mirror of
https://github.com/huggingface/peft.git
synced 2025-10-20 15:33:48 +08:00
TST Use plain asserts in tests (#1448)
Use pytest style asserts instead of unittest methods. Use `pytest.raises` and `pytest.warns` where suitable.
This commit is contained in:
@ -15,6 +15,7 @@ extend-select = [
|
|||||||
"I", # Import sorting
|
"I", # Import sorting
|
||||||
"UP", # Pyupgrade upgrades
|
"UP", # Pyupgrade upgrades
|
||||||
"W", # PEP8 warnings
|
"W", # PEP8 warnings
|
||||||
|
"PT009", # Pytest assertions
|
||||||
]
|
]
|
||||||
ignore = [
|
ignore = [
|
||||||
"C901", # Function too complex
|
"C901", # Function too complex
|
||||||
|
@ -248,7 +248,7 @@ class RegressionTester(unittest.TestCase):
|
|||||||
base_model = self.load_base_model()
|
base_model = self.load_base_model()
|
||||||
model = PeftModel.from_pretrained(base_model, os.path.join(path, version))
|
model = PeftModel.from_pretrained(base_model, os.path.join(path, version))
|
||||||
output = self.get_output(model)
|
output = self.get_output(model)
|
||||||
self.assertTrue(torch.allclose(output_loaded, output, atol=self.tol, rtol=self.tol))
|
assert torch.allclose(output_loaded, output, atol=self.tol, rtol=self.tol)
|
||||||
|
|
||||||
def get_output(self, model):
|
def get_output(self, model):
|
||||||
raise NotImplementedError
|
raise NotImplementedError
|
||||||
|
@ -73,9 +73,9 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4)
|
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4)
|
||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
|
|
||||||
self.assertTrue(hasattr(model, "save_pretrained"))
|
assert hasattr(model, "save_pretrained")
|
||||||
self.assertTrue(hasattr(model, "from_pretrained"))
|
assert hasattr(model, "from_pretrained")
|
||||||
self.assertTrue(hasattr(model, "push_to_hub"))
|
assert hasattr(model, "push_to_hub")
|
||||||
|
|
||||||
def test_prepare_for_training(self) -> None:
|
def test_prepare_for_training(self) -> None:
|
||||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||||
@ -86,7 +86,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||||
|
|
||||||
self.assertTrue(not dummy_output.requires_grad)
|
assert not dummy_output.requires_grad
|
||||||
|
|
||||||
def test_prepare_for_int8_training(self) -> None:
|
def test_prepare_for_int8_training(self) -> None:
|
||||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||||
@ -94,7 +94,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
model = model.to(self.torch_device)
|
model = model.to(self.torch_device)
|
||||||
|
|
||||||
for param in model.parameters():
|
for param in model.parameters():
|
||||||
self.assertTrue(not param.requires_grad)
|
assert not param.requires_grad
|
||||||
|
|
||||||
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4, task_type="CAUSAL_LM")
|
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4, task_type="CAUSAL_LM")
|
||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
@ -112,7 +112,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||||
|
|
||||||
self.assertTrue(dummy_output.requires_grad)
|
assert dummy_output.requires_grad
|
||||||
|
|
||||||
def test_save_pretrained_regression(self) -> None:
|
def test_save_pretrained_regression(self) -> None:
|
||||||
seed = 420
|
seed = 420
|
||||||
@ -134,30 +134,28 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||||
|
|
||||||
# check if same keys
|
# check if same keys
|
||||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||||
|
|
||||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||||
self.assertEqual(len(list(state_dict.keys())), 4)
|
assert len(state_dict) == 4
|
||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if `adapter_model.bin` is present
|
# check if `adapter_model.bin` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin"))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `model.safetensors` is not present
|
# check if `model.safetensors` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
|
|
||||||
def test_save_pretrained(self) -> None:
|
def test_save_pretrained(self) -> None:
|
||||||
seed = 420
|
seed = 420
|
||||||
@ -179,30 +177,28 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||||
|
|
||||||
# check if same keys
|
# check if same keys
|
||||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||||
|
|
||||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||||
self.assertEqual(len(list(state_dict.keys())), 4)
|
assert len(state_dict) == 4
|
||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if `adapter_model.bin` is present
|
# check if `adapter_model.bin` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `model.safetensors` is not present
|
# check if `model.safetensors` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
|
|
||||||
def test_save_pretrained_selected_adapters(self) -> None:
|
def test_save_pretrained_selected_adapters(self) -> None:
|
||||||
seed = 420
|
seed = 420
|
||||||
@ -229,30 +225,28 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||||
|
|
||||||
# check if same keys
|
# check if same keys
|
||||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||||
|
|
||||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||||
self.assertEqual(len(list(state_dict.keys())), 4)
|
assert len(state_dict) == 4
|
||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if `adapter_model.bin` is present
|
# check if `adapter_model.bin` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `model.safetensors` is not present
|
# check if `model.safetensors` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
|
|
||||||
def test_generate(self) -> None:
|
def test_generate(self) -> None:
|
||||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||||
@ -299,7 +293,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
|
|
||||||
# Test that the output changed.
|
# Test that the output changed.
|
||||||
default_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
default_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||||
self.assertFalse(torch.allclose(default_before.logits, default_after.logits))
|
assert not torch.allclose(default_before.logits, default_after.logits)
|
||||||
|
|
||||||
with adapted.disable_adapter():
|
with adapted.disable_adapter():
|
||||||
# Test that the output is the same as the original output.
|
# Test that the output is the same as the original output.
|
||||||
@ -320,9 +314,9 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
|
|
||||||
# Test that adapter 1 output changed.
|
# Test that adapter 1 output changed.
|
||||||
adapter_1_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
adapter_1_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||||
self.assertFalse(torch.allclose(adapter_1_before.logits, adapter_1_after.logits))
|
assert not torch.allclose(adapter_1_before.logits, adapter_1_after.logits)
|
||||||
self.assertFalse(torch.allclose(original_before.logits, adapter_1_after.logits))
|
assert not torch.allclose(original_before.logits, adapter_1_after.logits)
|
||||||
self.assertFalse(torch.allclose(default_after.logits, adapter_1_after.logits))
|
assert not torch.allclose(default_after.logits, adapter_1_after.logits)
|
||||||
|
|
||||||
with adapted.disable_adapter():
|
with adapted.disable_adapter():
|
||||||
# Test that the output is the same as the original output.
|
# Test that the output is the same as the original output.
|
||||||
@ -335,8 +329,8 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
# Test that the output is the same as the default output after training.
|
# Test that the output is the same as the default output after training.
|
||||||
default_after_set = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
default_after_set = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||||
assert_close(default_after.logits, default_after_set.logits, rtol=0, atol=0)
|
assert_close(default_after.logits, default_after_set.logits, rtol=0, atol=0)
|
||||||
self.assertFalse(torch.allclose(original_before.logits, default_after_set.logits))
|
assert not torch.allclose(original_before.logits, default_after_set.logits)
|
||||||
self.assertFalse(torch.allclose(adapter_1_after.logits, default_after_set.logits))
|
assert not torch.allclose(adapter_1_after.logits, default_after_set.logits)
|
||||||
|
|
||||||
def test_add_and_set_while_disabled(self):
|
def test_add_and_set_while_disabled(self):
|
||||||
"""Test that adding and setting adapters while disabled works as intended."""
|
"""Test that adding and setting adapters while disabled works as intended."""
|
||||||
@ -373,7 +367,7 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
|
|
||||||
# Test that adapter 1 output changed.
|
# Test that adapter 1 output changed.
|
||||||
adapter_1_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
adapter_1_after = adapted(input_ids=input_ids, attention_mask=attention_mask, labels=target_ids)
|
||||||
self.assertFalse(torch.allclose(original_before.logits, adapter_1_after.logits))
|
assert not torch.allclose(original_before.logits, adapter_1_after.logits)
|
||||||
|
|
||||||
adapted.set_adapter("default")
|
adapted.set_adapter("default")
|
||||||
with adapted.disable_adapter():
|
with adapted.disable_adapter():
|
||||||
@ -434,8 +428,8 @@ class AdaptionPromptTester(TestCase, PeftCommonTester):
|
|||||||
# https://github.com/huggingface/peft/blob/062d95a09eb5d1de35c0e5e23d4387daba99e2db/src/peft/tuners/adaption_prompt.py#L303
|
# https://github.com/huggingface/peft/blob/062d95a09eb5d1de35c0e5e23d4387daba99e2db/src/peft/tuners/adaption_prompt.py#L303
|
||||||
# This is fine for users but makes it difficult to test if anything happens. In the future, we will have a clean
|
# This is fine for users but makes it difficult to test if anything happens. In the future, we will have a clean
|
||||||
# way to control initialization. Until then, this test is expected to fail.
|
# way to control initialization. Until then, this test is expected to fail.
|
||||||
self.assertFalse(torch.allclose(output_before, output_peft))
|
assert not torch.allclose(output_before, output_peft)
|
||||||
|
|
||||||
with model.disable_adapter():
|
with model.disable_adapter():
|
||||||
output_peft_disabled = model(dummy_input).logits
|
output_peft_disabled = model(dummy_input).logits
|
||||||
self.assertTrue(torch.allclose(output_before, output_peft_disabled))
|
assert torch.allclose(output_before, output_peft_disabled)
|
||||||
|
@ -38,18 +38,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_causal_lm(self):
|
def test_peft_causal_lm(self):
|
||||||
model_id = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
model_id = "peft-internal-testing/tiny-OPTForCausalLM-lora"
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
|
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
assert isinstance(model, PeftModelForCausalLM)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(tmp_dirname)
|
model = AutoPeftModelForCausalLM.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
assert isinstance(model, PeftModelForCausalLM)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
assert isinstance(model, PeftModelForCausalLM)
|
||||||
self.assertTrue(model.base_model.lm_head.weight.dtype == torch.bfloat16)
|
assert model.base_model.lm_head.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -59,12 +59,12 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_causal_lm_extended_vocab(self):
|
def test_peft_causal_lm_extended_vocab(self):
|
||||||
model_id = "peft-internal-testing/tiny-random-OPTForCausalLM-extended-vocab"
|
model_id = "peft-internal-testing/tiny-random-OPTForCausalLM-extended-vocab"
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
|
model = AutoPeftModelForCausalLM.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
assert isinstance(model, PeftModelForCausalLM)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForCausalLM))
|
assert isinstance(model, PeftModelForCausalLM)
|
||||||
self.assertTrue(model.base_model.lm_head.weight.dtype == torch.bfloat16)
|
assert model.base_model.lm_head.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -74,18 +74,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_seq2seq_lm(self):
|
def test_peft_seq2seq_lm(self):
|
||||||
model_id = "peft-internal-testing/tiny_T5ForSeq2SeqLM-lora"
|
model_id = "peft-internal-testing/tiny_T5ForSeq2SeqLM-lora"
|
||||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id)
|
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForSeq2SeqLM))
|
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(tmp_dirname)
|
model = AutoPeftModelForSeq2SeqLM.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModelForSeq2SeqLM))
|
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForSeq2SeqLM))
|
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||||
self.assertTrue(model.base_model.lm_head.weight.dtype == torch.bfloat16)
|
assert model.base_model.lm_head.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -95,18 +95,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_sequence_cls(self):
|
def test_peft_sequence_cls(self):
|
||||||
model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
|
model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
|
||||||
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id)
|
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForSequenceClassification))
|
assert isinstance(model, PeftModelForSequenceClassification)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModelForSequenceClassification.from_pretrained(tmp_dirname)
|
model = AutoPeftModelForSequenceClassification.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModelForSequenceClassification))
|
assert isinstance(model, PeftModelForSequenceClassification)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForSequenceClassification))
|
assert isinstance(model, PeftModelForSequenceClassification)
|
||||||
self.assertTrue(model.score.original_module.weight.dtype == torch.bfloat16)
|
assert model.score.original_module.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -118,18 +118,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_token_classification(self):
|
def test_peft_token_classification(self):
|
||||||
model_id = "peft-internal-testing/tiny_GPT2ForTokenClassification-lora"
|
model_id = "peft-internal-testing/tiny_GPT2ForTokenClassification-lora"
|
||||||
model = AutoPeftModelForTokenClassification.from_pretrained(model_id)
|
model = AutoPeftModelForTokenClassification.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForTokenClassification))
|
assert isinstance(model, PeftModelForTokenClassification)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModelForTokenClassification.from_pretrained(tmp_dirname)
|
model = AutoPeftModelForTokenClassification.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModelForTokenClassification))
|
assert isinstance(model, PeftModelForTokenClassification)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForTokenClassification))
|
assert isinstance(model, PeftModelForTokenClassification)
|
||||||
self.assertTrue(model.base_model.classifier.original_module.weight.dtype == torch.bfloat16)
|
assert model.base_model.classifier.original_module.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -141,18 +141,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_question_answering(self):
|
def test_peft_question_answering(self):
|
||||||
model_id = "peft-internal-testing/tiny_OPTForQuestionAnswering-lora"
|
model_id = "peft-internal-testing/tiny_OPTForQuestionAnswering-lora"
|
||||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id)
|
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForQuestionAnswering))
|
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(tmp_dirname)
|
model = AutoPeftModelForQuestionAnswering.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModelForQuestionAnswering))
|
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForQuestionAnswering))
|
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||||
self.assertTrue(model.base_model.qa_outputs.original_module.weight.dtype == torch.bfloat16)
|
assert model.base_model.qa_outputs.original_module.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -164,18 +164,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_feature_extraction(self):
|
def test_peft_feature_extraction(self):
|
||||||
model_id = "peft-internal-testing/tiny_OPTForFeatureExtraction-lora"
|
model_id = "peft-internal-testing/tiny_OPTForFeatureExtraction-lora"
|
||||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id)
|
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModelForFeatureExtraction))
|
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(tmp_dirname)
|
model = AutoPeftModelForFeatureExtraction.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModelForFeatureExtraction))
|
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModelForFeatureExtraction))
|
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||||
self.assertTrue(model.base_model.model.decoder.embed_tokens.weight.dtype == torch.bfloat16)
|
assert model.base_model.model.decoder.embed_tokens.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
@ -187,18 +187,18 @@ class PeftAutoModelTester(unittest.TestCase):
|
|||||||
def test_peft_whisper(self):
|
def test_peft_whisper(self):
|
||||||
model_id = "peft-internal-testing/tiny_WhisperForConditionalGeneration-lora"
|
model_id = "peft-internal-testing/tiny_WhisperForConditionalGeneration-lora"
|
||||||
model = AutoPeftModel.from_pretrained(model_id)
|
model = AutoPeftModel.from_pretrained(model_id)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
model = AutoPeftModel.from_pretrained(tmp_dirname)
|
model = AutoPeftModel.from_pretrained(tmp_dirname)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
|
|
||||||
# check if kwargs are passed correctly
|
# check if kwargs are passed correctly
|
||||||
model = AutoPeftModel.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
model = AutoPeftModel.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
self.assertTrue(model.base_model.model.model.encoder.embed_positions.weight.dtype == torch.bfloat16)
|
assert model.base_model.model.model.encoder.embed_positions.weight.dtype == torch.bfloat16
|
||||||
|
|
||||||
adapter_name = "default"
|
adapter_name = "default"
|
||||||
is_trainable = False
|
is_trainable = False
|
||||||
|
@ -120,19 +120,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
||||||
|
|
||||||
flan_8bit = get_peft_model(flan_8bit, flan_lora_config)
|
flan_8bit = get_peft_model(flan_8bit, flan_lora_config)
|
||||||
self.assertTrue(
|
assert isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt)
|
||||||
isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
opt_8bit = get_peft_model(opt_8bit, opt_lora_config)
|
opt_8bit = get_peft_model(opt_8bit, opt_lora_config)
|
||||||
self.assertTrue(
|
assert isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||||
isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
whisper_8bit = get_peft_model(whisper_8bit, config)
|
whisper_8bit = get_peft_model(whisper_8bit, config)
|
||||||
self.assertTrue(
|
assert isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||||
isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@ -170,19 +164,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
config = IA3Config(target_modules=["q_proj", "v_proj", "fc2"], feedforward_modules=["fc2"])
|
config = IA3Config(target_modules=["q_proj", "v_proj", "fc2"], feedforward_modules=["fc2"])
|
||||||
|
|
||||||
flan_8bit = get_peft_model(flan_8bit, flan_ia3_config)
|
flan_8bit = get_peft_model(flan_8bit, flan_ia3_config)
|
||||||
self.assertTrue(
|
assert isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear8bitLt)
|
||||||
isinstance(flan_8bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
opt_8bit = get_peft_model(opt_8bit, opt_ia3_config)
|
opt_8bit = get_peft_model(opt_8bit, opt_ia3_config)
|
||||||
self.assertTrue(
|
assert isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
||||||
isinstance(opt_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
whisper_8bit = get_peft_model(whisper_8bit, config)
|
whisper_8bit = get_peft_model(whisper_8bit, config)
|
||||||
self.assertTrue(
|
assert isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
||||||
isinstance(whisper_8bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@ -211,8 +199,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||||
|
|
||||||
# check that both adapters are in the same layer
|
# check that both adapters are in the same layer
|
||||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@ -248,8 +236,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||||
|
|
||||||
# check that both adapters are in the same layer
|
# check that both adapters are in the same layer
|
||||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@ -285,8 +273,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||||
|
|
||||||
# check that both adapters are in the same layer
|
# check that both adapters are in the same layer
|
||||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l)
|
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l
|
||||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l)
|
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.ia3_l
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_lora_gptq_quantization_from_pretrained_safetensors(self):
|
def test_lora_gptq_quantization_from_pretrained_safetensors(self):
|
||||||
@ -323,8 +311,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
model.generate(input_ids=torch.LongTensor([[0, 2, 3, 1]]).to(0))
|
||||||
|
|
||||||
# check that both adapters are in the same layer
|
# check that both adapters are in the same layer
|
||||||
self.assertIn("default", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
assert "default" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||||
self.assertIn("adapter2", model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A)
|
assert "adapter2" in model.base_model.model.model.decoder.layers[0].self_attn.q_proj.lora_A
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@ -367,17 +355,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
config = LoraConfig(r=32, lora_alpha=64, target_modules=["q_proj", "v_proj"], lora_dropout=0.05, bias="none")
|
||||||
|
|
||||||
flan_4bit = get_peft_model(flan_4bit, flan_lora_config)
|
flan_4bit = get_peft_model(flan_4bit, flan_lora_config)
|
||||||
self.assertTrue(
|
assert isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear4bit)
|
||||||
isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear4bit)
|
|
||||||
)
|
|
||||||
|
|
||||||
opt_4bit = get_peft_model(opt_4bit, opt_lora_config)
|
opt_4bit = get_peft_model(opt_4bit, opt_lora_config)
|
||||||
self.assertTrue(isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit))
|
assert isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||||
|
|
||||||
whisper_4bit = get_peft_model(whisper_4bit, config)
|
whisper_4bit = get_peft_model(whisper_4bit, config)
|
||||||
self.assertTrue(
|
assert isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||||
isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
|
||||||
)
|
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@ -415,17 +399,13 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
config = IA3Config(target_modules=["q_proj", "v_proj", "fc2"], feedforward_modules=["fc2"])
|
config = IA3Config(target_modules=["q_proj", "v_proj", "fc2"], feedforward_modules=["fc2"])
|
||||||
|
|
||||||
flan_4bit = get_peft_model(flan_4bit, flan_ia3_config)
|
flan_4bit = get_peft_model(flan_4bit, flan_ia3_config)
|
||||||
self.assertTrue(
|
assert isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear4bit)
|
||||||
isinstance(flan_4bit.base_model.model.encoder.block[0].layer[0].SelfAttention.q, IA3Linear4bit)
|
|
||||||
)
|
|
||||||
|
|
||||||
opt_4bit = get_peft_model(opt_4bit, opt_ia3_config)
|
opt_4bit = get_peft_model(opt_4bit, opt_ia3_config)
|
||||||
self.assertTrue(isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit))
|
assert isinstance(opt_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
|
||||||
|
|
||||||
whisper_4bit = get_peft_model(whisper_4bit, config)
|
whisper_4bit = get_peft_model(whisper_4bit, config)
|
||||||
self.assertTrue(
|
assert isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
|
||||||
isinstance(whisper_4bit.base_model.model.model.decoder.layers[0].self_attn.v_proj, IA3Linear4bit)
|
|
||||||
)
|
|
||||||
|
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@ -445,10 +425,10 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map="balanced")
|
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map="balanced")
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||||
|
|
||||||
model = get_peft_model(model, lora_config)
|
model = get_peft_model(model, lora_config)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
|
|
||||||
dummy_input = "This is a dummy input:"
|
dummy_input = "This is a dummy input:"
|
||||||
input_ids = tokenizer(dummy_input, return_tensors="pt").input_ids.to(self.device)
|
input_ids = tokenizer(dummy_input, return_tensors="pt").input_ids.to(self.device)
|
||||||
@ -470,11 +450,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model = AutoModelForSeq2SeqLM.from_pretrained(self.seq2seq_model_id, device_map="balanced", load_in_8bit=True)
|
model = AutoModelForSeq2SeqLM.from_pretrained(self.seq2seq_model_id, device_map="balanced", load_in_8bit=True)
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||||
|
|
||||||
model = get_peft_model(model, lora_config)
|
model = get_peft_model(model, lora_config)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
self.assertTrue(isinstance(model.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt))
|
assert isinstance(model.base_model.model.encoder.block[0].layer[0].SelfAttention.q, LoraLinear8bitLt)
|
||||||
|
|
||||||
dummy_input = "This is a dummy input:"
|
dummy_input = "This is a dummy input:"
|
||||||
input_ids = tokenizer(dummy_input, return_tensors="pt").input_ids.to(self.device)
|
input_ids = tokenizer(dummy_input, return_tensors="pt").input_ids.to(self.device)
|
||||||
@ -546,8 +526,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
trainable_params, all_params = model.get_nb_trainable_parameters()
|
trainable_params, all_params = model.get_nb_trainable_parameters()
|
||||||
|
|
||||||
self.assertEqual(trainable_params, EXPECTED_TRAINABLE_PARAMS)
|
assert trainable_params == EXPECTED_TRAINABLE_PARAMS
|
||||||
self.assertEqual(all_params, EXPECTED_ALL_PARAMS)
|
assert all_params == EXPECTED_ALL_PARAMS
|
||||||
|
|
||||||
# test with double quant
|
# test with double quant
|
||||||
bnb_config = BitsAndBytesConfig(
|
bnb_config = BitsAndBytesConfig(
|
||||||
@ -566,8 +546,8 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
trainable_params, all_params = model.get_nb_trainable_parameters()
|
trainable_params, all_params = model.get_nb_trainable_parameters()
|
||||||
|
|
||||||
self.assertEqual(trainable_params, EXPECTED_TRAINABLE_PARAMS)
|
assert trainable_params == EXPECTED_TRAINABLE_PARAMS
|
||||||
self.assertEqual(all_params, EXPECTED_ALL_PARAMS)
|
assert all_params == EXPECTED_ALL_PARAMS
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -602,9 +582,9 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
o1 = lm_head(inputs)
|
o1 = lm_head(inputs)
|
||||||
o1.mean().backward()
|
o1.mean().backward()
|
||||||
|
|
||||||
self.assertTrue(modules_to_save.weight.requires_grad is True)
|
assert modules_to_save.weight.requires_grad is True
|
||||||
self.assertTrue(original_module.weight.grad is None)
|
assert original_module.weight.grad is None
|
||||||
self.assertTrue(modules_to_save.weight.grad is not None)
|
assert modules_to_save.weight.grad is not None
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -633,15 +613,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
|
|
||||||
atol = 0.01
|
atol = 0.01
|
||||||
rtol = 10
|
rtol = 10
|
||||||
self.assertFalse(torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol))
|
assert not torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol))
|
assert torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
self.assertTrue(
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear8bitLt)
|
||||||
isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear8bitLt)
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear8bitLt)
|
||||||
)
|
|
||||||
self.assertTrue(
|
|
||||||
isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear8bitLt)
|
|
||||||
)
|
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -673,11 +649,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
|
|
||||||
atol = 0.01
|
atol = 0.01
|
||||||
rtol = 10
|
rtol = 10
|
||||||
self.assertFalse(torch.allclose(out_base, out_before, atol=atol, rtol=rtol))
|
assert not torch.allclose(out_base, out_before, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(out_base, out_after, atol=atol, rtol=rtol))
|
assert torch.allclose(out_base, out_after, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear8bitLt))
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear8bitLt)
|
||||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt))
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear8bitLt)
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -715,11 +691,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
# tolerances are pretty high because some deviations are expected with quantization
|
# tolerances are pretty high because some deviations are expected with quantization
|
||||||
atol = 0.01
|
atol = 0.01
|
||||||
rtol = 10
|
rtol = 10
|
||||||
self.assertFalse(torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol))
|
assert not torch.allclose(out_base, out_before_merge, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol))
|
assert torch.allclose(out_before_merge, out_after_merge, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear4bit))
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, bnb.nn.Linear4bit)
|
||||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear4bit))
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, bnb.nn.Linear4bit)
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -757,11 +733,11 @@ class PeftGPUCommonTests(unittest.TestCase):
|
|||||||
|
|
||||||
atol = 0.01
|
atol = 0.01
|
||||||
rtol = 10
|
rtol = 10
|
||||||
self.assertFalse(torch.allclose(out_base, out_before, atol=atol, rtol=rtol))
|
assert not torch.allclose(out_base, out_before, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(out_base, out_after, atol=atol, rtol=rtol))
|
assert torch.allclose(out_base, out_after, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear4bit))
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.q_proj, LoraLinear4bit)
|
||||||
self.assertTrue(isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit))
|
assert isinstance(model.base_model.model.model.decoder.layers[0].self_attn.v_proj, LoraLinear4bit)
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
|
@ -68,10 +68,10 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
# test if all configs have the expected methods
|
# test if all configs have the expected methods
|
||||||
config = config_class()
|
config = config_class()
|
||||||
self.assertTrue(hasattr(config, "to_dict"))
|
assert hasattr(config, "to_dict")
|
||||||
self.assertTrue(hasattr(config, "save_pretrained"))
|
assert hasattr(config, "save_pretrained")
|
||||||
self.assertTrue(hasattr(config, "from_pretrained"))
|
assert hasattr(config, "from_pretrained")
|
||||||
self.assertTrue(hasattr(config, "from_json_file"))
|
assert hasattr(config, "from_json_file")
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_task_type(self, config_class):
|
def test_task_type(self, config_class):
|
||||||
@ -110,7 +110,7 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
config.save_pretrained(tmp_dirname)
|
config.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
||||||
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
assert config.to_dict() == config_from_pretrained.to_dict()
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_from_json_file(self, config_class):
|
def test_from_json_file(self, config_class):
|
||||||
@ -119,7 +119,7 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
config.save_pretrained(tmp_dirname)
|
config.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
config_from_json = config_class.from_json_file(os.path.join(tmp_dirname, "adapter_config.json"))
|
config_from_json = config_class.from_json_file(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
self.assertEqual(config.to_dict(), config_from_json)
|
assert config.to_dict() == config_from_json
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_to_dict(self, config_class):
|
def test_to_dict(self, config_class):
|
||||||
@ -128,7 +128,7 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
- to_dict
|
- to_dict
|
||||||
"""
|
"""
|
||||||
config = config_class()
|
config = config_class()
|
||||||
self.assertTrue(isinstance(config.to_dict(), dict))
|
assert isinstance(config.to_dict(), dict)
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_from_pretrained_cache_dir(self, config_class):
|
def test_from_pretrained_cache_dir(self, config_class):
|
||||||
@ -146,7 +146,7 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
"""
|
"""
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
PeftConfig.from_pretrained("ybelkada/test-st-lora", cache_dir=tmp_dirname)
|
PeftConfig.from_pretrained("ybelkada/test-st-lora", cache_dir=tmp_dirname)
|
||||||
self.assertTrue("models--ybelkada--test-st-lora" in os.listdir(tmp_dirname))
|
assert "models--ybelkada--test-st-lora" in os.listdir(tmp_dirname)
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_set_attributes(self, config_class):
|
def test_set_attributes(self, config_class):
|
||||||
@ -158,28 +158,28 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
config.save_pretrained(tmp_dirname)
|
config.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
||||||
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
assert config.to_dict() == config_from_pretrained.to_dict()
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_config_copy(self, config_class):
|
def test_config_copy(self, config_class):
|
||||||
# see https://github.com/huggingface/peft/issues/424
|
# see https://github.com/huggingface/peft/issues/424
|
||||||
config = config_class()
|
config = config_class()
|
||||||
copied = copy.copy(config)
|
copied = copy.copy(config)
|
||||||
self.assertEqual(config.to_dict(), copied.to_dict())
|
assert config.to_dict() == copied.to_dict()
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_config_deepcopy(self, config_class):
|
def test_config_deepcopy(self, config_class):
|
||||||
# see https://github.com/huggingface/peft/issues/424
|
# see https://github.com/huggingface/peft/issues/424
|
||||||
config = config_class()
|
config = config_class()
|
||||||
copied = copy.deepcopy(config)
|
copied = copy.deepcopy(config)
|
||||||
self.assertEqual(config.to_dict(), copied.to_dict())
|
assert config.to_dict() == copied.to_dict()
|
||||||
|
|
||||||
@parameterized.expand(ALL_CONFIG_CLASSES)
|
@parameterized.expand(ALL_CONFIG_CLASSES)
|
||||||
def test_config_pickle_roundtrip(self, config_class):
|
def test_config_pickle_roundtrip(self, config_class):
|
||||||
# see https://github.com/huggingface/peft/issues/424
|
# see https://github.com/huggingface/peft/issues/424
|
||||||
config = config_class()
|
config = config_class()
|
||||||
copied = pickle.loads(pickle.dumps(config))
|
copied = pickle.loads(pickle.dumps(config))
|
||||||
self.assertEqual(config.to_dict(), copied.to_dict())
|
assert config.to_dict() == copied.to_dict()
|
||||||
|
|
||||||
def test_prompt_encoder_warning_num_layers(self):
|
def test_prompt_encoder_warning_num_layers(self):
|
||||||
# This test checks that if a prompt encoder config is created with an argument that is ignored, there should be
|
# This test checks that if a prompt encoder config is created with an argument that is ignored, there should be
|
||||||
@ -211,9 +211,9 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
config.save_pretrained(tmp_dirname)
|
config.save_pretrained(tmp_dirname)
|
||||||
|
|
||||||
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
config_from_pretrained = config_class.from_pretrained(tmp_dirname)
|
||||||
self.assertEqual(config.to_dict(), config_from_pretrained.to_dict())
|
assert config.to_dict() == config_from_pretrained.to_dict()
|
||||||
# explicit test that target_modules should be converted to set
|
# explicit test that target_modules should be converted to set
|
||||||
self.assertTrue(isinstance(config_from_pretrained.target_modules, set))
|
assert isinstance(config_from_pretrained.target_modules, set)
|
||||||
|
|
||||||
def test_regex_with_layer_indexing_lora(self):
|
def test_regex_with_layer_indexing_lora(self):
|
||||||
# This test checks that an error is raised if `target_modules` is a regex expression and `layers_to_transform` or
|
# This test checks that an error is raised if `target_modules` is a regex expression and `layers_to_transform` or
|
||||||
@ -224,15 +224,10 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
|
|
||||||
valid_config = {"target_modules": ["foo"], "layers_pattern": ["bar"], "layers_to_transform": [0]}
|
valid_config = {"target_modules": ["foo"], "layers_pattern": ["bar"], "layers_to_transform": [0]}
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
with pytest.raises(ValueError, match="`layers_to_transform` cannot be used when `target_modules` is a str."):
|
||||||
ValueError,
|
|
||||||
expected_regex="`layers_to_transform` cannot be used when `target_modules` is a str.",
|
|
||||||
):
|
|
||||||
LoraConfig(**invalid_config1)
|
LoraConfig(**invalid_config1)
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
with pytest.raises(ValueError, match="`layers_pattern` cannot be used when `target_modules` is a str."):
|
||||||
ValueError, expected_regex="`layers_pattern` cannot be used when `target_modules` is a str."
|
|
||||||
):
|
|
||||||
LoraConfig(**invalid_config2)
|
LoraConfig(**invalid_config2)
|
||||||
|
|
||||||
# should run without errors
|
# should run without errors
|
||||||
@ -245,9 +240,7 @@ class PeftConfigTester(unittest.TestCase):
|
|||||||
# an example invalid config
|
# an example invalid config
|
||||||
invalid_config = {"target_modules": ["k", "v"], "feedforward_modules": ["q"]}
|
invalid_config = {"target_modules": ["k", "v"], "feedforward_modules": ["q"]}
|
||||||
|
|
||||||
with self.assertRaisesRegex(
|
with pytest.raises(ValueError, match="^`feedforward_modules` should be a subset of `target_modules`$"):
|
||||||
ValueError, expected_regex="^`feedforward_modules` should be a subset of `target_modules`$"
|
|
||||||
):
|
|
||||||
IA3Config(**invalid_config)
|
IA3Config(**invalid_config)
|
||||||
|
|
||||||
def test_ia3_is_feedforward_subset_valid_config(self):
|
def test_ia3_is_feedforward_subset_valid_config(self):
|
||||||
|
@ -19,6 +19,7 @@ import os
|
|||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -540,7 +541,7 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
model.eval()
|
model.eval()
|
||||||
with torch.no_grad():
|
with torch.no_grad():
|
||||||
output = model(**X)
|
output = model(**X)
|
||||||
self.assertTrue(torch.isfinite(output).all())
|
assert torch.isfinite(output).all()
|
||||||
|
|
||||||
@parameterized.expand(TEST_CASES)
|
@parameterized.expand(TEST_CASES)
|
||||||
def test_only_params_are_updated(self, test_name, model_id, config_cls, config_kwargs):
|
def test_only_params_are_updated(self, test_name, model_id, config_cls, config_kwargs):
|
||||||
@ -569,16 +570,16 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
tol = 1e-4
|
tol = 1e-4
|
||||||
params_before = dict(model_before.named_parameters())
|
params_before = dict(model_before.named_parameters())
|
||||||
params_after = dict(model.named_parameters())
|
params_after = dict(model.named_parameters())
|
||||||
self.assertEqual(params_before.keys(), params_after.keys())
|
assert params_before.keys() == params_after.keys()
|
||||||
|
|
||||||
prefix = PREFIXES[config_cls]
|
prefix = PREFIXES[config_cls]
|
||||||
for name, param_before in params_before.items():
|
for name, param_before in params_before.items():
|
||||||
param_after = params_after[name]
|
param_after = params_after[name]
|
||||||
if (prefix in name) or ("modules_to_save" in name):
|
if (prefix in name) or ("modules_to_save" in name):
|
||||||
# target_modules and modules_to_save _are_ updated
|
# target_modules and modules_to_save _are_ updated
|
||||||
self.assertFalse(torch.allclose(param_before, param_after, atol=tol, rtol=tol))
|
assert not torch.allclose(param_before, param_after, atol=tol, rtol=tol)
|
||||||
else:
|
else:
|
||||||
self.assertTrue(torch.allclose(param_before, param_after, atol=tol, rtol=tol))
|
assert torch.allclose(param_before, param_after, atol=tol, rtol=tol)
|
||||||
|
|
||||||
@parameterized.expand(TEST_CASES)
|
@parameterized.expand(TEST_CASES)
|
||||||
def test_parameters_after_loading_model(self, test_name, model_id, config_cls, config_kwargs):
|
def test_parameters_after_loading_model(self, test_name, model_id, config_cls, config_kwargs):
|
||||||
@ -614,10 +615,10 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
||||||
params_after = get_state_dict(model_from_pretrained)
|
params_after = get_state_dict(model_from_pretrained)
|
||||||
|
|
||||||
self.assertEqual(params_before.keys(), params_after.keys())
|
assert params_before.keys() == params_after.keys()
|
||||||
for name, param_before in params_before.items():
|
for name, param_before in params_before.items():
|
||||||
param_after = params_after[name]
|
param_after = params_after[name]
|
||||||
self.assertTrue(torch.allclose(param_before, param_after, atol=tol, rtol=tol))
|
assert torch.allclose(param_before, param_after, atol=tol, rtol=tol)
|
||||||
|
|
||||||
@parameterized.expand(TEST_CASES)
|
@parameterized.expand(TEST_CASES)
|
||||||
def test_disable_adapters(self, test_name, model_id, config_cls, config_kwargs):
|
def test_disable_adapters(self, test_name, model_id, config_cls, config_kwargs):
|
||||||
@ -633,7 +634,7 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
model.eval()
|
model.eval()
|
||||||
outputs_before = model(**X)
|
outputs_before = model(**X)
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(outputs_base, outputs_before))
|
assert torch.allclose(outputs_base, outputs_before)
|
||||||
|
|
||||||
model.train()
|
model.train()
|
||||||
# EmbConv1D is slow to learn for some reason
|
# EmbConv1D is slow to learn for some reason
|
||||||
@ -659,9 +660,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
# check that after leaving the disable_adapter context, everything is enabled again
|
# check that after leaving the disable_adapter context, everything is enabled again
|
||||||
outputs_enabled_after_disable = model(**X)
|
outputs_enabled_after_disable = model(**X)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(outputs_before, outputs_after))
|
assert not torch.allclose(outputs_before, outputs_after)
|
||||||
self.assertTrue(torch.allclose(outputs_before, outputs_disabled))
|
assert torch.allclose(outputs_before, outputs_disabled)
|
||||||
self.assertTrue(torch.allclose(outputs_after, outputs_enabled_after_disable))
|
assert torch.allclose(outputs_after, outputs_enabled_after_disable)
|
||||||
|
|
||||||
@parameterized.expand(TEST_CASES)
|
@parameterized.expand(TEST_CASES)
|
||||||
def test_disable_adapters_with_merging(self, test_name, model_id, config_cls, config_kwargs):
|
def test_disable_adapters_with_merging(self, test_name, model_id, config_cls, config_kwargs):
|
||||||
@ -707,13 +708,13 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
atol, rtol = 1e-3, 1e-3
|
atol, rtol = 1e-3, 1e-3
|
||||||
|
|
||||||
# check that there is a difference in results after training
|
# check that there is a difference in results after training
|
||||||
self.assertFalse(torch.allclose(outputs_before, outputs_after, atol=atol, rtol=rtol))
|
assert not torch.allclose(outputs_before, outputs_after, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# check that disabling adapters gives the same results as before training
|
# check that disabling adapters gives the same results as before training
|
||||||
self.assertTrue(torch.allclose(outputs_before, outputs_disabled, atol=atol, rtol=rtol))
|
assert torch.allclose(outputs_before, outputs_disabled, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# check that enabling + disabling adapters does not change the results
|
# check that enabling + disabling adapters does not change the results
|
||||||
self.assertTrue(torch.allclose(outputs_after, outputs_enabled_after_disable, atol=atol, rtol=rtol))
|
assert torch.allclose(outputs_after, outputs_enabled_after_disable, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
@parameterized.expand(TEST_CASES)
|
@parameterized.expand(TEST_CASES)
|
||||||
def test_disable_adapter_with_bias_warns(self, test_name, model_id, config_cls, config_kwargs):
|
def test_disable_adapter_with_bias_warns(self, test_name, model_id, config_cls, config_kwargs):
|
||||||
@ -743,9 +744,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
|
|
||||||
# check that bias=all and bias=lora_only give a warning with the correct message
|
# check that bias=all and bias=lora_only give a warning with the correct message
|
||||||
msg_start = "Careful, disabling adapter layers with bias configured to be"
|
msg_start = "Careful, disabling adapter layers with bias configured to be"
|
||||||
with self.assertWarns(UserWarning, msg=msg_start):
|
with pytest.warns(UserWarning, match=msg_start):
|
||||||
run_with_disable(config_kwargs, bias="lora_only")
|
run_with_disable(config_kwargs, bias="lora_only")
|
||||||
with self.assertWarns(UserWarning, msg=msg_start):
|
with pytest.warns(UserWarning, match=msg_start):
|
||||||
run_with_disable(config_kwargs, bias="all")
|
run_with_disable(config_kwargs, bias="all")
|
||||||
|
|
||||||
# For bias=none, there is no warning. Unfortunately, AFAIK unittest has no option to assert that no warning is
|
# For bias=none, there is no warning. Unfortunately, AFAIK unittest has no option to assert that no warning is
|
||||||
@ -793,9 +794,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
with open(os.path.join(tmp_dirname, "README.md")) as f:
|
with open(os.path.join(tmp_dirname, "README.md")) as f:
|
||||||
model_card = f.read()
|
model_card = f.read()
|
||||||
|
|
||||||
self.assertIn("library_name: peft", model_card)
|
assert "library_name: peft" in model_card
|
||||||
self.assertIn("meta: hello", model_card)
|
assert "meta: hello" in model_card
|
||||||
self.assertIn("This is a model card", model_card)
|
assert "This is a model card" in model_card
|
||||||
|
|
||||||
def test_non_existing_model_card(self):
|
def test_non_existing_model_card(self):
|
||||||
# ensure that if there is already a model card, it is not overwritten
|
# ensure that if there is already a model card, it is not overwritten
|
||||||
@ -808,9 +809,9 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
with open(os.path.join(tmp_dirname, "README.md")) as f:
|
with open(os.path.join(tmp_dirname, "README.md")) as f:
|
||||||
model_card = f.read()
|
model_card = f.read()
|
||||||
|
|
||||||
self.assertIn("library_name: peft", model_card)
|
assert "library_name: peft" in model_card
|
||||||
# rough check that the model card is pre-filled
|
# rough check that the model card is pre-filled
|
||||||
self.assertGreater(len(model_card), 1000)
|
assert len(model_card) > 1000
|
||||||
|
|
||||||
@parameterized.expand(["auto", True, False])
|
@parameterized.expand(["auto", True, False])
|
||||||
def test_targeting_lora_to_embedding_layer(self, save_embedding_layers):
|
def test_targeting_lora_to_embedding_layer(self, save_embedding_layers):
|
||||||
@ -822,7 +823,7 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
if save_embedding_layers == "auto":
|
if save_embedding_layers == "auto":
|
||||||
# assert warning
|
# assert warning
|
||||||
msg_start = "Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`."
|
msg_start = "Setting `save_embedding_layers` to `True` as embedding layers found in `target_modules`."
|
||||||
with self.assertWarns(UserWarning, msg=msg_start):
|
with pytest.warns(UserWarning, match=msg_start):
|
||||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||||
else:
|
else:
|
||||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||||
@ -830,15 +831,13 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
|
|
||||||
state_dict = safe_load_file(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
state_dict = safe_load_file(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||||
if save_embedding_layers in ["auto", True]:
|
if save_embedding_layers in ["auto", True]:
|
||||||
self.assertTrue("base_model.model.embed_tokens.base_layer.weight" in state_dict)
|
assert "base_model.model.embed_tokens.base_layer.weight" in state_dict
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
model.base_model.model.embed_tokens.base_layer.weight,
|
||||||
model.base_model.model.embed_tokens.base_layer.weight,
|
state_dict["base_model.model.embed_tokens.base_layer.weight"],
|
||||||
state_dict["base_model.model.embed_tokens.base_layer.weight"],
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
self.assertFalse("base_model.model.embed_tokens.base_layer.weight" in state_dict)
|
assert "base_model.model.embed_tokens.base_layer.weight" not in state_dict
|
||||||
del state_dict
|
del state_dict
|
||||||
|
|
||||||
@parameterized.expand(["auto", True, False])
|
@parameterized.expand(["auto", True, False])
|
||||||
@ -849,16 +848,17 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
if save_embedding_layers is True:
|
if save_embedding_layers is True:
|
||||||
# assert warning
|
with pytest.warns(
|
||||||
msg_start = "Could not identify embedding layer(s) because the model is not a 🤗 transformers model."
|
UserWarning,
|
||||||
with self.assertWarns(UserWarning, msg=msg_start):
|
match=r"Could not identify embedding layer\(s\) because the model is not a 🤗 transformers model\.",
|
||||||
|
):
|
||||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||||
else:
|
else:
|
||||||
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
model.save_pretrained(tmp_dirname, save_embedding_layers=save_embedding_layers)
|
||||||
from safetensors.torch import load_file as safe_load_file
|
from safetensors.torch import load_file as safe_load_file
|
||||||
|
|
||||||
state_dict = safe_load_file(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
state_dict = safe_load_file(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||||
self.assertFalse("base_model.model.emb.base_layer.weight" in state_dict)
|
assert "base_model.model.emb.base_layer.weight" not in state_dict
|
||||||
del state_dict
|
del state_dict
|
||||||
|
|
||||||
@parameterized.expand(
|
@parameterized.expand(
|
||||||
@ -917,11 +917,11 @@ class PeftCustomModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
assert torch.allclose(sd_default[k0], sd_custom1[k1])
|
assert torch.allclose(sd_default[k0], sd_custom1[k1])
|
||||||
assert torch.allclose(sd_default[k0], sd_custom2[k2])
|
assert torch.allclose(sd_default[k0], sd_custom2[k2])
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(output_base, output_default))
|
assert not torch.allclose(output_base, output_default)
|
||||||
self.assertFalse(torch.allclose(output_base, output_custom1))
|
assert not torch.allclose(output_base, output_custom1)
|
||||||
self.assertFalse(torch.allclose(output_base, output_custom2))
|
assert not torch.allclose(output_base, output_custom2)
|
||||||
self.assertTrue(torch.allclose(output_custom1, output_custom2))
|
assert torch.allclose(output_custom1, output_custom2)
|
||||||
self.assertTrue(torch.allclose(output_default, output_custom1))
|
assert torch.allclose(output_default, output_custom1)
|
||||||
|
|
||||||
|
|
||||||
class TestMultiRankAdapter(unittest.TestCase):
|
class TestMultiRankAdapter(unittest.TestCase):
|
||||||
@ -953,7 +953,7 @@ class TestMultiRankAdapter(unittest.TestCase):
|
|||||||
rank_current = model.lin0.lora_A["second"].weight.shape[0]
|
rank_current = model.lin0.lora_A["second"].weight.shape[0]
|
||||||
rank_expected = config_2.rank_pattern["lin0"]
|
rank_expected = config_2.rank_pattern["lin0"]
|
||||||
|
|
||||||
self.assertTrue(rank_current == rank_expected, f"Rank {rank_current} is not equal to expected {rank_expected}")
|
assert rank_current == rank_expected, f"Rank {rank_current} is not equal to expected {rank_expected}"
|
||||||
|
|
||||||
def test_multirank_2(self):
|
def test_multirank_2(self):
|
||||||
rank_pattern = {}
|
rank_pattern = {}
|
||||||
@ -987,9 +987,9 @@ class TestMultiRankAdapter(unittest.TestCase):
|
|||||||
if isinstance(module, BaseTunerLayer):
|
if isinstance(module, BaseTunerLayer):
|
||||||
rank_expected = rank_pattern.get(key, r)
|
rank_expected = rank_pattern.get(key, r)
|
||||||
rank_current = module.lora_A[adapter].weight.shape[0]
|
rank_current = module.lora_A[adapter].weight.shape[0]
|
||||||
self.assertTrue(
|
assert (
|
||||||
rank_current == rank_expected, f"Rank {rank_current} is not equal to expected {rank_expected}"
|
rank_current == rank_expected
|
||||||
)
|
), f"Rank {rank_current} is not equal to expected {rank_expected}"
|
||||||
|
|
||||||
|
|
||||||
class TestRepr(unittest.TestCase):
|
class TestRepr(unittest.TestCase):
|
||||||
@ -999,45 +999,45 @@ class TestRepr(unittest.TestCase):
|
|||||||
config = LoraConfig(target_modules=["lin0"])
|
config = LoraConfig(target_modules=["lin0"])
|
||||||
model = get_peft_model(MLP(), config)
|
model = get_peft_model(MLP(), config)
|
||||||
print_output = repr(model.model.lin0)
|
print_output = repr(model.model.lin0)
|
||||||
self.assertTrue(print_output.startswith("lora.Linear"))
|
assert print_output.startswith("lora.Linear")
|
||||||
self.assertTrue("in_features=10" in print_output)
|
assert "in_features=10" in print_output
|
||||||
self.assertTrue("out_features=20" in print_output)
|
assert "out_features=20" in print_output
|
||||||
self.assertTrue("lora_A" in print_output)
|
assert "lora_A" in print_output
|
||||||
self.assertTrue("lora_B" in print_output)
|
assert "lora_B" in print_output
|
||||||
self.assertTrue("default" in print_output)
|
assert "default" in print_output
|
||||||
|
|
||||||
def test_repr_lora_embedding(self):
|
def test_repr_lora_embedding(self):
|
||||||
config = LoraConfig(target_modules=["emb"])
|
config = LoraConfig(target_modules=["emb"])
|
||||||
model = get_peft_model(ModelEmbConv1D(), config)
|
model = get_peft_model(ModelEmbConv1D(), config)
|
||||||
print_output = repr(model.model.emb)
|
print_output = repr(model.model.emb)
|
||||||
self.assertTrue(print_output.startswith("lora.Embedding"))
|
assert print_output.startswith("lora.Embedding")
|
||||||
self.assertTrue("100, 5" in print_output)
|
assert "100, 5" in print_output
|
||||||
self.assertTrue("lora_embedding_A" in print_output)
|
assert "lora_embedding_A" in print_output
|
||||||
self.assertTrue("lora_embedding_B" in print_output)
|
assert "lora_embedding_B" in print_output
|
||||||
self.assertTrue("default" in print_output)
|
assert "default" in print_output
|
||||||
|
|
||||||
def test_repr_lora_conv1d(self):
|
def test_repr_lora_conv1d(self):
|
||||||
config = LoraConfig(target_modules=["conv1d"])
|
config = LoraConfig(target_modules=["conv1d"])
|
||||||
model = get_peft_model(ModelEmbConv1D(), config)
|
model = get_peft_model(ModelEmbConv1D(), config)
|
||||||
print_output = repr(model.model.conv1d)
|
print_output = repr(model.model.conv1d)
|
||||||
self.assertTrue(print_output.startswith("lora.Linear"))
|
assert print_output.startswith("lora.Linear")
|
||||||
self.assertTrue("in_features=5" in print_output)
|
assert "in_features=5" in print_output
|
||||||
self.assertTrue("out_features=1" in print_output)
|
assert "out_features=1" in print_output
|
||||||
self.assertTrue("lora_A" in print_output)
|
assert "lora_A" in print_output
|
||||||
self.assertTrue("lora_B" in print_output)
|
assert "lora_B" in print_output
|
||||||
self.assertTrue("default" in print_output)
|
assert "default" in print_output
|
||||||
|
|
||||||
def test_repr_lora_conv2d(self):
|
def test_repr_lora_conv2d(self):
|
||||||
config = LoraConfig(target_modules=["conv2d"])
|
config = LoraConfig(target_modules=["conv2d"])
|
||||||
model = get_peft_model(ModelConv2D(), config)
|
model = get_peft_model(ModelConv2D(), config)
|
||||||
print_output = repr(model.model.conv2d)
|
print_output = repr(model.model.conv2d)
|
||||||
self.assertTrue(print_output.startswith("lora.Conv2d"))
|
assert print_output.startswith("lora.Conv2d")
|
||||||
self.assertTrue("5, 10" in print_output)
|
assert "5, 10" in print_output
|
||||||
self.assertTrue("kernel_size=(3, 3)" in print_output)
|
assert "kernel_size=(3, 3)" in print_output
|
||||||
self.assertTrue("stride=(1, 1)" in print_output)
|
assert "stride=(1, 1)" in print_output
|
||||||
self.assertTrue("lora_A" in print_output)
|
assert "lora_A" in print_output
|
||||||
self.assertTrue("lora_B" in print_output)
|
assert "lora_B" in print_output
|
||||||
self.assertTrue("default" in print_output)
|
assert "default" in print_output
|
||||||
|
|
||||||
|
|
||||||
class MultipleActiveAdaptersTester(unittest.TestCase):
|
class MultipleActiveAdaptersTester(unittest.TestCase):
|
||||||
@ -1084,9 +1084,9 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
|||||||
self.set_multiple_active_adapters(peft_model, ["adapter_1", "adapter_2"])
|
self.set_multiple_active_adapters(peft_model, ["adapter_1", "adapter_2"])
|
||||||
combined_output = peft_model(**X)
|
combined_output = peft_model(**X)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(adapter_1_output, adapter_2_output, atol=1e-5))
|
assert not torch.allclose(adapter_1_output, adapter_2_output, atol=1e-5)
|
||||||
self.assertFalse(torch.allclose(adapter_1_output, combined_output, atol=1e-5))
|
assert not torch.allclose(adapter_1_output, combined_output, atol=1e-5)
|
||||||
self.assertFalse(torch.allclose(adapter_2_output, combined_output, atol=1e-5))
|
assert not torch.allclose(adapter_2_output, combined_output, atol=1e-5)
|
||||||
|
|
||||||
if tuner_method == "lora":
|
if tuner_method == "lora":
|
||||||
# create a weighted adapter combining both adapters and check that
|
# create a weighted adapter combining both adapters and check that
|
||||||
@ -1096,7 +1096,7 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
peft_model.set_adapter("new_combined_adapter")
|
peft_model.set_adapter("new_combined_adapter")
|
||||||
new_combined_output = peft_model(**X)
|
new_combined_output = peft_model(**X)
|
||||||
self.assertTrue(torch.allclose(new_combined_output, combined_output, atol=1e-5))
|
assert torch.allclose(new_combined_output, combined_output, atol=1e-5)
|
||||||
|
|
||||||
@parameterized.expand(MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES)
|
@parameterized.expand(MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES)
|
||||||
def test_multiple_active_adapters_merge_and_unmerge(
|
def test_multiple_active_adapters_merge_and_unmerge(
|
||||||
@ -1120,14 +1120,14 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
|||||||
|
|
||||||
peft_model.merge_adapter()
|
peft_model.merge_adapter()
|
||||||
merged_combined_output = peft_model(**X)
|
merged_combined_output = peft_model(**X)
|
||||||
self.assertTrue(torch.allclose(merged_combined_output, combined_output, atol=1e-5))
|
assert torch.allclose(merged_combined_output, combined_output, atol=1e-5)
|
||||||
|
|
||||||
peft_model.unmerge_adapter()
|
peft_model.unmerge_adapter()
|
||||||
|
|
||||||
with peft_model.disable_adapter():
|
with peft_model.disable_adapter():
|
||||||
disabled_adapter_output = peft_model(**X)
|
disabled_adapter_output = peft_model(**X)
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(disabled_adapter_output, base_output, atol=1e-4))
|
assert torch.allclose(disabled_adapter_output, base_output, atol=1e-4)
|
||||||
|
|
||||||
@parameterized.expand(MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES)
|
@parameterized.expand(MULTIPLE_ACTIVE_ADAPTERS_TEST_CASES)
|
||||||
def test_merge_layers_multi(self, test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2):
|
def test_merge_layers_multi(self, test_name, tuner_method, config_cls, config_kwargs_1, config_kwargs_2):
|
||||||
@ -1153,14 +1153,14 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
|||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_adapter_2 = model(**dummy_input)[0]
|
logits_adapter_2 = model(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
assert not torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model.set_adapter("default")
|
model.set_adapter("default")
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_adapter_1_after_set = model(**dummy_input)[0]
|
logits_adapter_1_after_set = model(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model_copy = copy.deepcopy(model)
|
model_copy = copy.deepcopy(model)
|
||||||
model_copy_2 = copy.deepcopy(model)
|
model_copy_2 = copy.deepcopy(model)
|
||||||
@ -1169,22 +1169,22 @@ class MultipleActiveAdaptersTester(unittest.TestCase):
|
|||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_merged_all = model_merged_all(**dummy_input)[0]
|
logits_merged_all = model_merged_all(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
assert not torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
assert not torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model_merged_adapter_2 = model_copy.merge_and_unload(adapter_names=["adapter-2"])
|
model_merged_adapter_2 = model_copy.merge_and_unload(adapter_names=["adapter-2"])
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_merged_adapter_2 = model_merged_adapter_2(**dummy_input)[0]
|
logits_merged_adapter_2 = model_merged_adapter_2(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model_merged_adapter_default = model_copy_2.merge_and_unload(adapter_names=["default"])
|
model_merged_adapter_default = model_copy_2.merge_and_unload(adapter_names=["default"])
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_merged_adapter_default = model_merged_adapter_default(**dummy_input)[0]
|
logits_merged_adapter_default = model_merged_adapter_default(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
|
|
||||||
class RequiresGradTester(unittest.TestCase):
|
class RequiresGradTester(unittest.TestCase):
|
||||||
@ -1203,7 +1203,7 @@ class RequiresGradTester(unittest.TestCase):
|
|||||||
params_with_requires_grad = [name for name, param in model.named_parameters() if param.requires_grad]
|
params_with_requires_grad = [name for name, param in model.named_parameters() if param.requires_grad]
|
||||||
diff = set(params_expected).symmetric_difference(set(params_with_requires_grad))
|
diff = set(params_expected).symmetric_difference(set(params_with_requires_grad))
|
||||||
msg = f"Expected {params_expected} to require gradients, got {params_with_requires_grad}"
|
msg = f"Expected {params_expected} to require gradients, got {params_with_requires_grad}"
|
||||||
self.assertEqual(len(diff), 0, msg=msg)
|
assert len(diff) == 0, msg
|
||||||
|
|
||||||
def test_requires_grad_modules_to_save_default(self):
|
def test_requires_grad_modules_to_save_default(self):
|
||||||
config = LoraConfig(target_modules=["lin0"], modules_to_save=["lin1"])
|
config = LoraConfig(target_modules=["lin0"], modules_to_save=["lin1"])
|
||||||
|
@ -14,6 +14,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from unittest.mock import Mock, call, patch
|
from unittest.mock import Mock, call, patch
|
||||||
|
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
from transformers import AutoModelForCausalLM, AutoTokenizer
|
from transformers import AutoModelForCausalLM, AutoTokenizer
|
||||||
@ -114,14 +115,13 @@ class PeftDecoderModelTester(unittest.TestCase, PeftCommonTester):
|
|||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
|
|
||||||
expected_call = call(model_id, trust_remote_code=True, foo="bar")
|
expected_call = call(model_id, trust_remote_code=True, foo="bar")
|
||||||
self.assertEqual(mock.call_args, expected_call)
|
assert mock.call_args == expected_call
|
||||||
|
|
||||||
def test_prompt_tuning_config_invalid_args(self):
|
def test_prompt_tuning_config_invalid_args(self):
|
||||||
# Raise an error when tokenizer_kwargs is used with prompt_tuning_init!='TEXT', because this argument has no
|
# Raise an error when tokenizer_kwargs is used with prompt_tuning_init!='TEXT', because this argument has no
|
||||||
# function in that case
|
# function in that case
|
||||||
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
model_id = "hf-internal-testing/tiny-random-OPTForCausalLM"
|
||||||
msg = "tokenizer_kwargs only valid when using prompt_tuning_init='TEXT'."
|
with pytest.raises(ValueError, match="tokenizer_kwargs only valid when using prompt_tuning_init='TEXT'."):
|
||||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
|
||||||
PromptTuningConfig(
|
PromptTuningConfig(
|
||||||
base_model_name_or_path=model_id,
|
base_model_name_or_path=model_id,
|
||||||
tokenizer_name_or_path=model_id,
|
tokenizer_name_or_path=model_id,
|
||||||
|
@ -137,7 +137,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
training = model.training
|
training = model.training
|
||||||
model.eval()
|
model.eval()
|
||||||
output = model(**batch.to(model.device))
|
output = model(**batch.to(model.device))
|
||||||
self.assertTrue(torch.isfinite(output.logits).all())
|
assert torch.isfinite(output.logits).all()
|
||||||
model.train(training)
|
model.train(training)
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -192,11 +192,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_causal_lm_training_4bit(self):
|
def test_causal_lm_training_4bit(self):
|
||||||
@ -250,11 +250,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
def test_causal_lm_training_multi_gpu_4bit(self):
|
def test_causal_lm_training_multi_gpu_4bit(self):
|
||||||
@ -270,7 +270,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
load_in_4bit=True,
|
load_in_4bit=True,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||||
|
|
||||||
model = prepare_model_for_kbit_training(model)
|
model = prepare_model_for_kbit_training(model)
|
||||||
|
|
||||||
@ -311,11 +311,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@ -375,11 +375,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@ -438,11 +438,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@ -460,7 +460,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
device_map="auto",
|
device_map="auto",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
|
tokenizer = AutoTokenizer.from_pretrained(self.causal_lm_model_id)
|
||||||
model = prepare_model_for_int8_training(model)
|
model = prepare_model_for_int8_training(model)
|
||||||
@ -502,11 +502,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_seq2seq_lm_training_single_gpu(self):
|
def test_seq2seq_lm_training_single_gpu(self):
|
||||||
@ -523,7 +523,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
device_map={"": 0},
|
device_map={"": 0},
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), {0})
|
assert set(model.hf_device_map.values()) == {0}
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||||
model = prepare_model_for_int8_training(model)
|
model = prepare_model_for_int8_training(model)
|
||||||
@ -562,11 +562,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@ -584,7 +584,7 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
device_map="balanced",
|
device_map="balanced",
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||||
|
|
||||||
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
tokenizer = AutoTokenizer.from_pretrained(self.seq2seq_model_id)
|
||||||
model = prepare_model_for_int8_training(model)
|
model = prepare_model_for_int8_training(model)
|
||||||
@ -623,11 +623,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_audio_model_training(self):
|
def test_audio_model_training(self):
|
||||||
@ -721,11 +721,11 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_4bit_non_default_adapter_name(self):
|
def test_4bit_non_default_adapter_name(self):
|
||||||
@ -757,9 +757,10 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
model = get_peft_model(model, config, adapter_name="other")
|
model = get_peft_model(model, config, adapter_name="other")
|
||||||
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
||||||
|
|
||||||
self.assertGreater(n_trainable_other, 0) # sanity check
|
assert n_trainable_other > 0
|
||||||
self.assertEqual(n_trainable_default, n_trainable_other)
|
# sanity check
|
||||||
self.assertEqual(n_total_default, n_total_other)
|
assert n_trainable_default == n_trainable_other
|
||||||
|
assert n_total_default == n_total_other
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_8bit_non_default_adapter_name(self):
|
def test_8bit_non_default_adapter_name(self):
|
||||||
@ -791,9 +792,10 @@ class PeftBnbGPUExampleTests(unittest.TestCase):
|
|||||||
model = get_peft_model(model, config, adapter_name="other")
|
model = get_peft_model(model, config, adapter_name="other")
|
||||||
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
||||||
|
|
||||||
self.assertGreater(n_trainable_other, 0) # sanity check
|
assert n_trainable_other > 0
|
||||||
self.assertEqual(n_trainable_default, n_trainable_other)
|
# sanity check
|
||||||
self.assertEqual(n_total_default, n_total_other)
|
assert n_trainable_default == n_trainable_other
|
||||||
|
assert n_total_default == n_total_other
|
||||||
|
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@ -825,7 +827,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
|||||||
training = model.training
|
training = model.training
|
||||||
model.eval()
|
model.eval()
|
||||||
output = model(**batch.to(model.device))
|
output = model(**batch.to(model.device))
|
||||||
self.assertTrue(torch.isfinite(output.logits).all())
|
assert torch.isfinite(output.logits).all()
|
||||||
model.train(training)
|
model.train(training)
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
@ -876,11 +878,11 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_adalora_causalLM(self):
|
def test_adalora_causalLM(self):
|
||||||
@ -941,11 +943,11 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.multi_gpu_tests
|
@pytest.mark.multi_gpu_tests
|
||||||
@require_torch_multi_gpu
|
@require_torch_multi_gpu
|
||||||
@ -963,7 +965,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
|||||||
quantization_config=self.quantization_config,
|
quantization_config=self.quantization_config,
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertEqual(set(model.hf_device_map.values()), set(range(torch.cuda.device_count())))
|
assert set(model.hf_device_map.values()) == set(range(torch.cuda.device_count()))
|
||||||
|
|
||||||
model = prepare_model_for_kbit_training(model)
|
model = prepare_model_for_kbit_training(model)
|
||||||
|
|
||||||
@ -1004,11 +1006,11 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
|||||||
|
|
||||||
model.cpu().save_pretrained(tmp_dir)
|
model.cpu().save_pretrained(tmp_dir)
|
||||||
|
|
||||||
self.assertTrue("adapter_config.json" in os.listdir(tmp_dir))
|
assert "adapter_config.json" in os.listdir(tmp_dir)
|
||||||
self.assertTrue(SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir))
|
assert SAFETENSORS_WEIGHTS_NAME in os.listdir(tmp_dir)
|
||||||
|
|
||||||
# assert loss is not None
|
# assert loss is not None
|
||||||
self.assertIsNotNone(trainer.state.log_history[-1]["train_loss"])
|
assert trainer.state.log_history[-1]["train_loss"] is not None
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
def test_non_default_adapter_name(self):
|
def test_non_default_adapter_name(self):
|
||||||
@ -1041,9 +1043,10 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
|||||||
model = get_peft_model(model, config, adapter_name="other")
|
model = get_peft_model(model, config, adapter_name="other")
|
||||||
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
n_trainable_other, n_total_other = model.get_nb_trainable_parameters()
|
||||||
|
|
||||||
self.assertGreater(n_trainable_other, 0) # sanity check
|
assert n_trainable_other > 0
|
||||||
self.assertEqual(n_trainable_default, n_trainable_other)
|
# sanity check
|
||||||
self.assertEqual(n_total_default, n_total_other)
|
assert n_trainable_default == n_trainable_other
|
||||||
|
assert n_total_default == n_total_other
|
||||||
|
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@ -1072,8 +1075,8 @@ class OffloadSaveTests(unittest.TestCase):
|
|||||||
memory_limits = {0: "0.4GIB", "cpu": "5GIB"}
|
memory_limits = {0: "0.4GIB", "cpu": "5GIB"}
|
||||||
# offloads around half of all transformer modules
|
# offloads around half of all transformer modules
|
||||||
device_map = infer_auto_device_map(model, max_memory=memory_limits)
|
device_map = infer_auto_device_map(model, max_memory=memory_limits)
|
||||||
self.assertTrue(0 in device_map.values())
|
assert 0 in device_map.values()
|
||||||
self.assertTrue("cpu" in device_map.values())
|
assert "cpu" in device_map.values()
|
||||||
|
|
||||||
config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["c_attn"])
|
config = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False, target_modules=["c_attn"])
|
||||||
|
|
||||||
@ -1082,7 +1085,7 @@ class OffloadSaveTests(unittest.TestCase):
|
|||||||
model.save_pretrained(tmp_dir)
|
model.save_pretrained(tmp_dir)
|
||||||
# load the model with device_map
|
# load the model with device_map
|
||||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map=device_map).eval()
|
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, device_map=device_map).eval()
|
||||||
self.assertTrue(len({p.device for p in model.parameters()}) == 2)
|
assert len({p.device for p in model.parameters()}) == 2
|
||||||
model = PeftModel.from_pretrained(model, tmp_dir, max_memory=memory_limits)
|
model = PeftModel.from_pretrained(model, tmp_dir, max_memory=memory_limits)
|
||||||
|
|
||||||
input_tokens = tokenizer.encode("Four score and seven years ago", return_tensors="pt")
|
input_tokens = tokenizer.encode("Four score and seven years ago", return_tensors="pt")
|
||||||
@ -1092,17 +1095,17 @@ class OffloadSaveTests(unittest.TestCase):
|
|||||||
pre_merge_olayer = model(input_tokens)[0]
|
pre_merge_olayer = model(input_tokens)[0]
|
||||||
model.merge_adapter()
|
model.merge_adapter()
|
||||||
post_merge_olayer = model(input_tokens)[0]
|
post_merge_olayer = model(input_tokens)[0]
|
||||||
self.assertTrue(torch.allclose(post_merge_olayer, pre_merge_olayer))
|
assert torch.allclose(post_merge_olayer, pre_merge_olayer)
|
||||||
|
|
||||||
# test peft model adapter unmerge
|
# test peft model adapter unmerge
|
||||||
model.unmerge_adapter()
|
model.unmerge_adapter()
|
||||||
post_unmerge_olayer = model(input_tokens)[0]
|
post_unmerge_olayer = model(input_tokens)[0]
|
||||||
self.assertTrue(torch.allclose(post_unmerge_olayer, pre_merge_olayer))
|
assert torch.allclose(post_unmerge_olayer, pre_merge_olayer)
|
||||||
|
|
||||||
# test LoRA merge and unload
|
# test LoRA merge and unload
|
||||||
model = model.merge_and_unload()
|
model = model.merge_and_unload()
|
||||||
post_unload_merge_olayer = model(input_tokens)[0]
|
post_unload_merge_olayer = model(input_tokens)[0]
|
||||||
self.assertTrue(torch.allclose(post_unload_merge_olayer, pre_merge_olayer))
|
assert torch.allclose(post_unload_merge_olayer, pre_merge_olayer)
|
||||||
|
|
||||||
|
|
||||||
@require_torch_gpu
|
@require_torch_gpu
|
||||||
@ -1203,15 +1206,15 @@ class LoftQTests(unittest.TestCase):
|
|||||||
|
|
||||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=4, device=device)
|
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=4, device=device)
|
||||||
# first, sanity check that all errors are > 0.0
|
# first, sanity check that all errors are > 0.0
|
||||||
self.assertTrue(mae_quantized > 0.0)
|
assert mae_quantized > 0.0
|
||||||
self.assertTrue(mse_quantized > 0.0)
|
assert mse_quantized > 0.0
|
||||||
self.assertTrue(mae_loftq > 0.0)
|
assert mae_loftq > 0.0
|
||||||
self.assertTrue(mse_loftq > 0.0)
|
assert mse_loftq > 0.0
|
||||||
|
|
||||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||||
factor = 3
|
factor = 3
|
||||||
self.assertTrue(mae_loftq < mae_quantized / factor)
|
assert mae_loftq < (mae_quantized / factor)
|
||||||
self.assertTrue(mse_loftq < mse_quantized / factor)
|
assert mse_loftq < (mse_quantized / factor)
|
||||||
|
|
||||||
@parameterized.expand(["cuda", "cpu"])
|
@parameterized.expand(["cuda", "cpu"])
|
||||||
def test_bloomz_loftq_4bit_iter_5(self, device):
|
def test_bloomz_loftq_4bit_iter_5(self, device):
|
||||||
@ -1219,14 +1222,14 @@ class LoftQTests(unittest.TestCase):
|
|||||||
# iterations, but in practice the difference is not that large, at least not for this small base model.
|
# iterations, but in practice the difference is not that large, at least not for this small base model.
|
||||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=4, loftq_iter=5, device=device)
|
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=4, loftq_iter=5, device=device)
|
||||||
# first, sanity check that all errors are > 0.0
|
# first, sanity check that all errors are > 0.0
|
||||||
self.assertTrue(mae_quantized > 0.0)
|
assert mae_quantized > 0.0
|
||||||
self.assertTrue(mse_quantized > 0.0)
|
assert mse_quantized > 0.0
|
||||||
self.assertTrue(mae_loftq > 0.0)
|
assert mae_loftq > 0.0
|
||||||
self.assertTrue(mse_loftq > 0.0)
|
assert mse_loftq > 0.0
|
||||||
|
|
||||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||||
self.assertTrue(mae_loftq < mae_quantized / self.error_factor)
|
assert mae_loftq < (mae_quantized / self.error_factor)
|
||||||
self.assertTrue(mse_loftq < mse_quantized / self.error_factor)
|
assert mse_loftq < (mse_quantized / self.error_factor)
|
||||||
|
|
||||||
@parameterized.expand(["cuda", "cpu"])
|
@parameterized.expand(["cuda", "cpu"])
|
||||||
def test_bloomz_loftq_8bit(self, device):
|
def test_bloomz_loftq_8bit(self, device):
|
||||||
@ -1234,14 +1237,14 @@ class LoftQTests(unittest.TestCase):
|
|||||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=8, device=device)
|
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=8, device=device)
|
||||||
|
|
||||||
# first, sanity check that all errors are > 0.0
|
# first, sanity check that all errors are > 0.0
|
||||||
self.assertTrue(mae_quantized > 0.0)
|
assert mae_quantized > 0.0
|
||||||
self.assertTrue(mse_quantized > 0.0)
|
assert mse_quantized > 0.0
|
||||||
self.assertTrue(mae_loftq > 0.0)
|
assert mae_loftq > 0.0
|
||||||
self.assertTrue(mse_loftq > 0.0)
|
assert mse_loftq > 0.0
|
||||||
|
|
||||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||||
self.assertTrue(mae_loftq < mae_quantized / self.error_factor)
|
assert mae_loftq < (mae_quantized / self.error_factor)
|
||||||
self.assertTrue(mse_loftq < mse_quantized / self.error_factor)
|
assert mse_loftq < (mse_quantized / self.error_factor)
|
||||||
|
|
||||||
@parameterized.expand(["cuda", "cpu"])
|
@parameterized.expand(["cuda", "cpu"])
|
||||||
def test_bloomz_loftq_8bit_iter_5(self, device):
|
def test_bloomz_loftq_8bit_iter_5(self, device):
|
||||||
@ -1249,14 +1252,14 @@ class LoftQTests(unittest.TestCase):
|
|||||||
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=8, loftq_iter=5, device=device)
|
mae_quantized, mse_quantized, mae_loftq, mse_loftq = self.get_errors(bits=8, loftq_iter=5, device=device)
|
||||||
|
|
||||||
# first, sanity check that all errors are > 0.0
|
# first, sanity check that all errors are > 0.0
|
||||||
self.assertTrue(mae_quantized > 0.0)
|
assert mae_quantized > 0.0
|
||||||
self.assertTrue(mse_quantized > 0.0)
|
assert mse_quantized > 0.0
|
||||||
self.assertTrue(mae_loftq > 0.0)
|
assert mae_loftq > 0.0
|
||||||
self.assertTrue(mse_loftq > 0.0)
|
assert mse_loftq > 0.0
|
||||||
|
|
||||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||||
self.assertTrue(mae_loftq < mae_quantized / self.error_factor)
|
assert mae_loftq < (mae_quantized / self.error_factor)
|
||||||
self.assertTrue(mse_loftq < mse_quantized / self.error_factor)
|
assert mse_loftq < (mse_quantized / self.error_factor)
|
||||||
|
|
||||||
@parameterized.expand(["cuda", "cpu"])
|
@parameterized.expand(["cuda", "cpu"])
|
||||||
def test_t5_loftq_4bit(self, device):
|
def test_t5_loftq_4bit(self, device):
|
||||||
@ -1264,15 +1267,15 @@ class LoftQTests(unittest.TestCase):
|
|||||||
bits=4, device=device, model_id="t5-small"
|
bits=4, device=device, model_id="t5-small"
|
||||||
)
|
)
|
||||||
# first, sanity check that all errors are > 0.0
|
# first, sanity check that all errors are > 0.0
|
||||||
self.assertTrue(mae_quantized > 0.0)
|
assert mae_quantized > 0.0
|
||||||
self.assertTrue(mse_quantized > 0.0)
|
assert mse_quantized > 0.0
|
||||||
self.assertTrue(mae_loftq > 0.0)
|
assert mae_loftq > 0.0
|
||||||
self.assertTrue(mse_loftq > 0.0)
|
assert mse_loftq > 0.0
|
||||||
|
|
||||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||||
factor = 3
|
factor = 3
|
||||||
self.assertTrue(mae_loftq < mae_quantized / factor)
|
assert mae_loftq < (mae_quantized / factor)
|
||||||
self.assertTrue(mse_loftq < mse_quantized / factor)
|
assert mse_loftq < (mse_quantized / factor)
|
||||||
|
|
||||||
@parameterized.expand(["cuda", "cpu"])
|
@parameterized.expand(["cuda", "cpu"])
|
||||||
def test_t5_loftq_8bit(self, device):
|
def test_t5_loftq_8bit(self, device):
|
||||||
@ -1280,15 +1283,15 @@ class LoftQTests(unittest.TestCase):
|
|||||||
bits=8, device=device, model_id="t5-small"
|
bits=8, device=device, model_id="t5-small"
|
||||||
)
|
)
|
||||||
# first, sanity check that all errors are > 0.0
|
# first, sanity check that all errors are > 0.0
|
||||||
self.assertTrue(mae_quantized > 0.0)
|
assert mae_quantized > 0.0
|
||||||
self.assertTrue(mse_quantized > 0.0)
|
assert mse_quantized > 0.0
|
||||||
self.assertTrue(mae_loftq > 0.0)
|
assert mae_loftq > 0.0
|
||||||
self.assertTrue(mse_loftq > 0.0)
|
assert mse_loftq > 0.0
|
||||||
|
|
||||||
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
# next, check that LoftQ quantization errors are smaller than LoRA errors by a certain margin
|
||||||
factor = 3
|
factor = 3
|
||||||
self.assertTrue(mae_loftq < mae_quantized / factor)
|
assert mae_loftq < (mae_quantized / factor)
|
||||||
self.assertTrue(mse_loftq < mse_quantized / factor)
|
assert mse_loftq < (mse_quantized / factor)
|
||||||
|
|
||||||
|
|
||||||
@require_bitsandbytes
|
@require_bitsandbytes
|
||||||
@ -1347,8 +1350,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
|||||||
),
|
),
|
||||||
data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False),
|
data_collator=DataCollatorForLanguageModeling(self.tokenizer, mlm=False),
|
||||||
)
|
)
|
||||||
msg = "Attempting to unscale FP16 gradients."
|
with pytest.raises(ValueError, match="Attempting to unscale FP16 gradients."):
|
||||||
with self.assertRaisesRegex(ValueError, msg):
|
|
||||||
trainer.train()
|
trainer.train()
|
||||||
|
|
||||||
@pytest.mark.single_gpu_tests
|
@pytest.mark.single_gpu_tests
|
||||||
|
@ -34,4 +34,4 @@ class PeftHubFeaturesTester(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
model = PeftModel.from_pretrained(model, model_id, subfolder=subfolder)
|
model = PeftModel.from_pretrained(model, model_id, subfolder=subfolder)
|
||||||
|
|
||||||
self.assertTrue(isinstance(model, PeftModel))
|
assert isinstance(model, PeftModel)
|
||||||
|
@ -64,15 +64,15 @@ class InitializationTest(unittest.TestCase):
|
|||||||
# use statistical test to check if weight A is from a uniform distribution
|
# use statistical test to check if weight A is from a uniform distribution
|
||||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||||
self.assertGreater(p_value, 0.5)
|
assert p_value > 0.5
|
||||||
|
|
||||||
# check that weight A is *not* from a normal distribution
|
# check that weight A is *not* from a normal distribution
|
||||||
normal = self.get_normal(weight_A.mean().item(), weight_A.std().item())
|
normal = self.get_normal(weight_A.mean().item(), weight_A.std().item())
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||||
self.assertLess(p_value, 0.05)
|
assert p_value < 0.05
|
||||||
|
|
||||||
# check that weight B is zero
|
# check that weight B is zero
|
||||||
self.assertTrue((weight_B == 0.0).all())
|
assert (weight_B == 0.0).all()
|
||||||
|
|
||||||
def test_lora_linear_init_gaussian(self):
|
def test_lora_linear_init_gaussian(self):
|
||||||
# use gaussian init
|
# use gaussian init
|
||||||
@ -92,15 +92,15 @@ class InitializationTest(unittest.TestCase):
|
|||||||
# x = weight_A.detach().flatten().cpu().numpy()
|
# x = weight_A.detach().flatten().cpu().numpy()
|
||||||
# breakpoint()
|
# breakpoint()
|
||||||
|
|
||||||
self.assertGreater(p_value, 0.5)
|
assert p_value > 0.5
|
||||||
|
|
||||||
# check that weight A is *not* from a uniform distribution
|
# check that weight A is *not* from a uniform distribution
|
||||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||||
self.assertLess(p_value, 0.05)
|
assert p_value < 0.05
|
||||||
|
|
||||||
# check that weight B is zero
|
# check that weight B is zero
|
||||||
self.assertTrue((weight_B == 0.0).all())
|
assert (weight_B == 0.0).all()
|
||||||
|
|
||||||
def test_lora_linear_false(self):
|
def test_lora_linear_false(self):
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@ -112,7 +112,7 @@ class InitializationTest(unittest.TestCase):
|
|||||||
|
|
||||||
# with init_lora_weights=False, weight B should *not* be zero. We don't care so much about the actual values
|
# with init_lora_weights=False, weight B should *not* be zero. We don't care so much about the actual values
|
||||||
# as long as they are not zero, in order to avoid identity transformation.
|
# as long as they are not zero, in order to avoid identity transformation.
|
||||||
self.assertFalse(torch.allclose(weight_B, torch.zeros_like(weight_B)))
|
assert not torch.allclose(weight_B, torch.zeros_like(weight_B))
|
||||||
|
|
||||||
def test_lora_embedding_default(self):
|
def test_lora_embedding_default(self):
|
||||||
# embedding is initialized as a normal distribution, not kaiming uniform
|
# embedding is initialized as a normal distribution, not kaiming uniform
|
||||||
@ -127,15 +127,15 @@ class InitializationTest(unittest.TestCase):
|
|||||||
# use statistical test to check if weight B is from a normal distribution
|
# use statistical test to check if weight B is from a normal distribution
|
||||||
normal = self.get_normal(0.0, 1.0)
|
normal = self.get_normal(0.0, 1.0)
|
||||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||||
self.assertGreater(p_value, 0.5)
|
assert p_value > 0.5
|
||||||
|
|
||||||
# check that weight B is *not* from a uniform distribution
|
# check that weight B is *not* from a uniform distribution
|
||||||
unif = self.get_uniform(weight_B.min().item(), weight_B.max().item())
|
unif = self.get_uniform(weight_B.min().item(), weight_B.max().item())
|
||||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||||
self.assertLess(p_value, 0.05)
|
assert p_value < 0.05
|
||||||
|
|
||||||
# check that weight A is zero
|
# check that weight A is zero
|
||||||
self.assertTrue((weight_A == 0.0).all())
|
assert (weight_A == 0.0).all()
|
||||||
|
|
||||||
def test_lora_embedding_gaussian(self):
|
def test_lora_embedding_gaussian(self):
|
||||||
# embedding does not change with init_lora_weights="gaussian" vs True
|
# embedding does not change with init_lora_weights="gaussian" vs True
|
||||||
@ -150,15 +150,15 @@ class InitializationTest(unittest.TestCase):
|
|||||||
# use statistical test to check if weight B is from a normal distribution
|
# use statistical test to check if weight B is from a normal distribution
|
||||||
normal = self.get_normal(0.0, 1.0)
|
normal = self.get_normal(0.0, 1.0)
|
||||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||||
self.assertGreater(p_value, 0.5)
|
assert p_value > 0.5
|
||||||
|
|
||||||
# check that weight B is *not* from a uniform distribution
|
# check that weight B is *not* from a uniform distribution
|
||||||
unif = self.get_uniform(weight_B.min().item(), weight_B.max().item())
|
unif = self.get_uniform(weight_B.min().item(), weight_B.max().item())
|
||||||
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_B.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||||
self.assertLess(p_value, 0.05)
|
assert p_value < 0.05
|
||||||
|
|
||||||
# check that weight A is zero
|
# check that weight A is zero
|
||||||
self.assertTrue((weight_A == 0.0).all())
|
assert (weight_A == 0.0).all()
|
||||||
|
|
||||||
def test_lora_embedding_false(self):
|
def test_lora_embedding_false(self):
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@ -170,7 +170,7 @@ class InitializationTest(unittest.TestCase):
|
|||||||
|
|
||||||
# with init_lora_weights=False, weight A should *not* be zero. We don't care so much about the actual values
|
# with init_lora_weights=False, weight A should *not* be zero. We don't care so much about the actual values
|
||||||
# as long as they are not zero, in order to avoid identity transformation.
|
# as long as they are not zero, in order to avoid identity transformation.
|
||||||
self.assertFalse(torch.allclose(weight_A, torch.zeros_like(weight_A)))
|
assert not torch.allclose(weight_A, torch.zeros_like(weight_A))
|
||||||
|
|
||||||
def test_lora_conv2d_default(self):
|
def test_lora_conv2d_default(self):
|
||||||
# default is True
|
# default is True
|
||||||
@ -185,15 +185,15 @@ class InitializationTest(unittest.TestCase):
|
|||||||
# use statistical test to check if weight A is from a uniform distribution
|
# use statistical test to check if weight A is from a uniform distribution
|
||||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||||
self.assertGreater(p_value, 0.5)
|
assert p_value > 0.5
|
||||||
|
|
||||||
# check that weight A is *not* from a normal distribution
|
# check that weight A is *not* from a normal distribution
|
||||||
normal = self.get_normal(weight_A.mean().item(), weight_A.std().item())
|
normal = self.get_normal(weight_A.mean().item(), weight_A.std().item())
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||||
self.assertLess(p_value, 0.05)
|
assert p_value < 0.05
|
||||||
|
|
||||||
# check that weight B is zero
|
# check that weight B is zero
|
||||||
self.assertTrue((weight_B == 0.0).all())
|
assert (weight_B == 0.0).all()
|
||||||
|
|
||||||
def test_lora_conv2d_init_gaussian(self):
|
def test_lora_conv2d_init_gaussian(self):
|
||||||
# use gaussian init
|
# use gaussian init
|
||||||
@ -208,15 +208,15 @@ class InitializationTest(unittest.TestCase):
|
|||||||
# use statistical test to check if weight A is from a normal distribution
|
# use statistical test to check if weight A is from a normal distribution
|
||||||
normal = self.get_normal(0.0, 1 / config.r)
|
normal = self.get_normal(0.0, 1 / config.r)
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), normal.flatten().cpu().numpy())
|
||||||
self.assertGreater(p_value, 0.5)
|
assert p_value > 0.5
|
||||||
|
|
||||||
# check that weight A is *not* from a uniform distribution
|
# check that weight A is *not* from a uniform distribution
|
||||||
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
unif = self.get_uniform(weight_A.min().item(), weight_A.max().item())
|
||||||
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
_, p_value = stats.kstest(weight_A.detach().flatten().cpu().numpy(), unif.flatten().cpu().numpy())
|
||||||
self.assertLess(p_value, 0.05)
|
assert p_value < 0.05
|
||||||
|
|
||||||
# check that weight B is zero
|
# check that weight B is zero
|
||||||
self.assertTrue((weight_B == 0.0).all())
|
assert (weight_B == 0.0).all()
|
||||||
|
|
||||||
def test_lora_conv2d_false(self):
|
def test_lora_conv2d_false(self):
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@ -228,7 +228,7 @@ class InitializationTest(unittest.TestCase):
|
|||||||
|
|
||||||
# with init_lora_weights=False, weight B should *not* be zero. We don't care so much about the actual values
|
# with init_lora_weights=False, weight B should *not* be zero. We don't care so much about the actual values
|
||||||
# as long as they are not zero, in order to avoid identity transformation.
|
# as long as they are not zero, in order to avoid identity transformation.
|
||||||
self.assertFalse(torch.allclose(weight_B, torch.zeros_like(weight_B)))
|
assert not torch.allclose(weight_B, torch.zeros_like(weight_B))
|
||||||
|
|
||||||
def test_lora_scaling_default(self):
|
def test_lora_scaling_default(self):
|
||||||
# default is True
|
# default is True
|
||||||
@ -242,9 +242,9 @@ class InitializationTest(unittest.TestCase):
|
|||||||
|
|
||||||
expected_scaling = config.lora_alpha / config.r
|
expected_scaling = config.lora_alpha / config.r
|
||||||
|
|
||||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling)
|
assert model.linear.scaling["default"] == expected_scaling
|
||||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling)
|
assert model.embed.scaling["default"] == expected_scaling
|
||||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling)
|
assert model.conv2d.scaling["default"] == expected_scaling
|
||||||
|
|
||||||
def test_rslora_scaling(self):
|
def test_rslora_scaling(self):
|
||||||
# default is True
|
# default is True
|
||||||
@ -258,9 +258,9 @@ class InitializationTest(unittest.TestCase):
|
|||||||
|
|
||||||
expected_scaling = config.lora_alpha / (config.r**0.5)
|
expected_scaling = config.lora_alpha / (config.r**0.5)
|
||||||
|
|
||||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling)
|
assert model.linear.scaling["default"] == expected_scaling
|
||||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling)
|
assert model.embed.scaling["default"] == expected_scaling
|
||||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling)
|
assert model.conv2d.scaling["default"] == expected_scaling
|
||||||
|
|
||||||
def test_lora_default_scaling_pattern(self):
|
def test_lora_default_scaling_pattern(self):
|
||||||
# default is True
|
# default is True
|
||||||
@ -285,9 +285,9 @@ class InitializationTest(unittest.TestCase):
|
|||||||
"conv2d": config.alpha_pattern["conv2d"] / config.rank_pattern["conv2d"],
|
"conv2d": config.alpha_pattern["conv2d"] / config.rank_pattern["conv2d"],
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling["linear"])
|
assert model.linear.scaling["default"] == expected_scaling["linear"]
|
||||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling["embed"])
|
assert model.embed.scaling["default"] == expected_scaling["embed"]
|
||||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling["conv2d"])
|
assert model.conv2d.scaling["default"] == expected_scaling["conv2d"]
|
||||||
|
|
||||||
def test_rslora_scaling_pattern(self):
|
def test_rslora_scaling_pattern(self):
|
||||||
# default is True
|
# default is True
|
||||||
@ -312,6 +312,6 @@ class InitializationTest(unittest.TestCase):
|
|||||||
"conv2d": config.alpha_pattern["conv2d"] / (config.rank_pattern["conv2d"] ** 0.5),
|
"conv2d": config.alpha_pattern["conv2d"] / (config.rank_pattern["conv2d"] ** 0.5),
|
||||||
}
|
}
|
||||||
|
|
||||||
self.assertTrue(model.linear.scaling["default"] == expected_scaling["linear"])
|
assert model.linear.scaling["default"] == expected_scaling["linear"]
|
||||||
self.assertTrue(model.embed.scaling["default"] == expected_scaling["embed"])
|
assert model.embed.scaling["default"] == expected_scaling["embed"]
|
||||||
self.assertTrue(model.conv2d.scaling["default"] == expected_scaling["conv2d"])
|
assert model.conv2d.scaling["default"] == expected_scaling["conv2d"]
|
||||||
|
@ -123,21 +123,21 @@ if is_megatron_available():
|
|||||||
|
|
||||||
def test_megatron_lora_module(self):
|
def test_megatron_lora_module(self):
|
||||||
megatron_module = self.megatron_module
|
megatron_module = self.megatron_module
|
||||||
self.assertTrue(isinstance(megatron_module, PeftModel))
|
assert isinstance(megatron_module, PeftModel)
|
||||||
|
|
||||||
for name, module in megatron_module.named_modules():
|
for name, module in megatron_module.named_modules():
|
||||||
if name.endswith("linear"):
|
if name.endswith("linear"):
|
||||||
self.assertTrue(hasattr(module, "lora_A"))
|
assert hasattr(module, "lora_A")
|
||||||
self.assertTrue(hasattr(module, "lora_B"))
|
assert hasattr(module, "lora_B")
|
||||||
if name.endswith("linear.lora_A.default"):
|
if name.endswith("linear.lora_A.default"):
|
||||||
self.assertTrue(isinstance(module, torch.nn.Linear))
|
assert isinstance(module, torch.nn.Linear)
|
||||||
if name.endswith("linear.lora_B.default"):
|
if name.endswith("linear.lora_B.default"):
|
||||||
self.assertTrue(isinstance(module, tensor_parallel.ColumnParallelLinear))
|
assert isinstance(module, tensor_parallel.ColumnParallelLinear)
|
||||||
|
|
||||||
if name.endswith("lm_head.lora_A.default"):
|
if name.endswith("lm_head.lora_A.default"):
|
||||||
self.assertTrue(isinstance(module, tensor_parallel.RowParallelLinear))
|
assert isinstance(module, tensor_parallel.RowParallelLinear)
|
||||||
if name.endswith("lm_head.lora_B.default"):
|
if name.endswith("lm_head.lora_B.default"):
|
||||||
self.assertTrue(isinstance(module, torch.nn.Linear))
|
assert isinstance(module, torch.nn.Linear)
|
||||||
|
|
||||||
def test_forward(self):
|
def test_forward(self):
|
||||||
x = torch.ones((2, 4, 10)).cuda()
|
x = torch.ones((2, 4, 10)).cuda()
|
||||||
@ -145,7 +145,7 @@ if is_megatron_available():
|
|||||||
dummt_module_result = self.dummy_module(x)
|
dummt_module_result = self.dummy_module(x)
|
||||||
|
|
||||||
# Because lora_B is initialized with 0, the forward results of two models should be equal before backward.
|
# Because lora_B is initialized with 0, the forward results of two models should be equal before backward.
|
||||||
self.assertTrue(megatron_module_result.equal(dummt_module_result))
|
assert megatron_module_result.equal(dummt_module_result)
|
||||||
|
|
||||||
def test_backward(self):
|
def test_backward(self):
|
||||||
optimizer = torch.optim.AdamW(self.megatron_module.parameters())
|
optimizer = torch.optim.AdamW(self.megatron_module.parameters())
|
||||||
@ -165,4 +165,4 @@ if is_megatron_available():
|
|||||||
peft_state_dict = get_peft_model_state_dict(self.megatron_module)
|
peft_state_dict = get_peft_model_state_dict(self.megatron_module)
|
||||||
|
|
||||||
for key in peft_state_dict.keys():
|
for key in peft_state_dict.keys():
|
||||||
self.assertTrue("lora" in key)
|
assert "lora" in key
|
||||||
|
@ -56,14 +56,14 @@ class TestPeft(unittest.TestCase):
|
|||||||
|
|
||||||
for name, module in self.model.named_modules():
|
for name, module in self.model.named_modules():
|
||||||
if name == "linear":
|
if name == "linear":
|
||||||
self.assertTrue(hasattr(module, "lora_A"))
|
assert hasattr(module, "lora_A")
|
||||||
self.assertTrue(hasattr(module, "lora_B"))
|
assert hasattr(module, "lora_B")
|
||||||
|
|
||||||
def test_get_peft_model_state_dict(self):
|
def test_get_peft_model_state_dict(self):
|
||||||
peft_state_dict = get_peft_model_state_dict(self.model)
|
peft_state_dict = get_peft_model_state_dict(self.model)
|
||||||
|
|
||||||
for key in peft_state_dict.keys():
|
for key in peft_state_dict.keys():
|
||||||
self.assertTrue("lora" in key)
|
assert "lora" in key
|
||||||
|
|
||||||
def test_modules_to_save(self):
|
def test_modules_to_save(self):
|
||||||
self.model = DummyModel()
|
self.model = DummyModel()
|
||||||
@ -81,13 +81,13 @@ class TestPeft(unittest.TestCase):
|
|||||||
|
|
||||||
for name, module in self.model.named_modules():
|
for name, module in self.model.named_modules():
|
||||||
if name == "linear":
|
if name == "linear":
|
||||||
self.assertTrue(hasattr(module, "lora_A"))
|
assert hasattr(module, "lora_A")
|
||||||
self.assertTrue(hasattr(module, "lora_B"))
|
assert hasattr(module, "lora_B")
|
||||||
elif name == "embedding":
|
elif name == "embedding":
|
||||||
self.assertTrue(isinstance(module, ModulesToSaveWrapper))
|
assert isinstance(module, ModulesToSaveWrapper)
|
||||||
|
|
||||||
state_dict = get_peft_model_state_dict(self.model)
|
state_dict = get_peft_model_state_dict(self.model)
|
||||||
|
|
||||||
self.assertTrue("embedding.weight" in state_dict.keys())
|
assert "embedding.weight" in state_dict.keys()
|
||||||
|
|
||||||
self.assertTrue(hasattr(self.model.embedding, "weight"))
|
assert hasattr(self.model.embedding, "weight")
|
||||||
|
@ -19,6 +19,7 @@ import re
|
|||||||
import tempfile
|
import tempfile
|
||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -94,22 +95,22 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
# base model
|
# base model
|
||||||
base_model = self._get_model(model_cls)
|
base_model = self._get_model(model_cls)
|
||||||
output_base = base_model(input)
|
output_base = base_model(input)
|
||||||
self.assertTrue(torch.isfinite(output_base).all())
|
assert torch.isfinite(output_base).all()
|
||||||
|
|
||||||
# adapter 0
|
# adapter 0
|
||||||
peft_model_0 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
peft_model_0 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
||||||
output_config0 = peft_model_0(input)
|
output_config0 = peft_model_0(input)
|
||||||
|
|
||||||
self.assertTrue(torch.isfinite(output_config0).all())
|
assert torch.isfinite(output_config0).all()
|
||||||
self.assertFalse(torch.allclose(output_base, output_config0, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_config0, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1
|
# adapter 1
|
||||||
peft_model_1 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
peft_model_1 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||||
output_config1 = peft_model_1(input)
|
output_config1 = peft_model_1(input)
|
||||||
|
|
||||||
self.assertTrue(torch.isfinite(output_config1).all())
|
assert torch.isfinite(output_config1).all()
|
||||||
self.assertFalse(torch.allclose(output_base, output_config1, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_config1, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_config0, output_config1, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_config0, output_config1, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 0 + 1
|
# adapter 0 + 1
|
||||||
peft_model_01 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
peft_model_01 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
||||||
@ -122,19 +123,19 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
tuner_layers = [mod for mod in peft_model_01.modules() if isinstance(mod, BaseTunerLayer)]
|
tuner_layers = [mod for mod in peft_model_01.modules() if isinstance(mod, BaseTunerLayer)]
|
||||||
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
||||||
if type(config0) == type(config1):
|
if type(config0) == type(config1):
|
||||||
self.assertEqual(len(tuner_types), 1)
|
assert len(tuner_types) == 1
|
||||||
else:
|
else:
|
||||||
self.assertEqual(len(tuner_types), 2)
|
assert len(tuner_types) == 2
|
||||||
|
|
||||||
self.assertEqual(peft_model_01.active_adapters, ["adapter0", "adapter1"])
|
assert peft_model_01.active_adapters == ["adapter0", "adapter1"]
|
||||||
self.assertTrue(torch.isfinite(output_mixed_01).all())
|
assert torch.isfinite(output_mixed_01).all()
|
||||||
self.assertFalse(torch.allclose(output_config0, output_mixed_01, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_config0, output_mixed_01, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_config1, output_mixed_01, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_config1, output_mixed_01, atol=atol, rtol=rtol)
|
||||||
if is_commutative:
|
if is_commutative:
|
||||||
delta0 = output_config0 - output_base
|
delta0 = output_config0 - output_base
|
||||||
delta1 = output_config1 - output_base
|
delta1 = output_config1 - output_base
|
||||||
delta_mixed_01 = output_mixed_01 - output_base
|
delta_mixed_01 = output_mixed_01 - output_base
|
||||||
self.assertTrue(torch.allclose(delta0 + delta1, delta_mixed_01, atol=atol, rtol=rtol))
|
assert torch.allclose((delta0 + delta1), delta_mixed_01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1 + 0
|
# adapter 1 + 0
|
||||||
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||||
@ -147,16 +148,16 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
tuner_layers = [mod for mod in peft_model_10.modules() if isinstance(mod, BaseTunerLayer)]
|
tuner_layers = [mod for mod in peft_model_10.modules() if isinstance(mod, BaseTunerLayer)]
|
||||||
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
||||||
if type(config0) == type(config1):
|
if type(config0) == type(config1):
|
||||||
self.assertEqual(len(tuner_types), 1)
|
assert len(tuner_types) == 1
|
||||||
else:
|
else:
|
||||||
self.assertEqual(len(tuner_types), 2)
|
assert len(tuner_types) == 2
|
||||||
|
|
||||||
self.assertEqual(peft_model_10.active_adapters, ["adapter1", "adapter0"])
|
assert peft_model_10.active_adapters == ["adapter1", "adapter0"]
|
||||||
self.assertTrue(torch.isfinite(output_mixed_10).all())
|
assert torch.isfinite(output_mixed_10).all()
|
||||||
self.assertFalse(torch.allclose(output_config0, output_mixed_10, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_config0, output_mixed_10, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_config1, output_mixed_10, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_config1, output_mixed_10, atol=atol, rtol=rtol)
|
||||||
if is_commutative:
|
if is_commutative:
|
||||||
self.assertTrue(torch.allclose(output_mixed_01, output_mixed_10, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_01, output_mixed_10, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# turn around the order of the adapters of the 0 + 1 mixed model, should behave like the 0 + 1 mixed model
|
# turn around the order of the adapters of the 0 + 1 mixed model, should behave like the 0 + 1 mixed model
|
||||||
peft_model_10.set_adapter(["adapter0", "adapter1"])
|
peft_model_10.set_adapter(["adapter0", "adapter1"])
|
||||||
@ -166,17 +167,17 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
tuner_layers = [mod for mod in peft_model_10.modules() if isinstance(mod, BaseTunerLayer)]
|
tuner_layers = [mod for mod in peft_model_10.modules() if isinstance(mod, BaseTunerLayer)]
|
||||||
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
tuner_types = {type(tuner_layer) for tuner_layer in tuner_layers}
|
||||||
if type(config0) == type(config1):
|
if type(config0) == type(config1):
|
||||||
self.assertEqual(len(tuner_types), 1)
|
assert len(tuner_types) == 1
|
||||||
else:
|
else:
|
||||||
self.assertEqual(len(tuner_types), 2)
|
assert len(tuner_types) == 2
|
||||||
|
|
||||||
self.assertEqual(peft_model_10.active_adapters, ["adapter0", "adapter1"])
|
assert peft_model_10.active_adapters == ["adapter0", "adapter1"]
|
||||||
self.assertTrue(torch.isfinite(output_mixed_reversed).all())
|
assert torch.isfinite(output_mixed_reversed).all()
|
||||||
self.assertFalse(torch.allclose(output_mixed_reversed, output_config0, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_mixed_reversed, output_config0, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_mixed_reversed, output_config1, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_mixed_reversed, output_config1, atol=atol, rtol=rtol)
|
||||||
if is_commutative:
|
if is_commutative:
|
||||||
self.assertTrue(torch.allclose(output_mixed_reversed, output_mixed_01, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_reversed, output_mixed_01, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_mixed_reversed, output_mixed_10, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_reversed, output_mixed_10, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def _check_merging(self, model_cls, config0, config1, input):
|
def _check_merging(self, model_cls, config0, config1, input):
|
||||||
# Ensure that when merging mixed adapters, the result is the same as when applying the adapters separately.
|
# Ensure that when merging mixed adapters, the result is the same as when applying the adapters separately.
|
||||||
@ -195,7 +196,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
|
|
||||||
model_merged_01 = peft_model_01.merge_and_unload()
|
model_merged_01 = peft_model_01.merge_and_unload()
|
||||||
output_merged_01 = model_merged_01(input)
|
output_merged_01 = model_merged_01(input)
|
||||||
self.assertTrue(torch.allclose(output_mixed_01, output_merged_01, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_01, output_merged_01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1 + 0
|
# adapter 1 + 0
|
||||||
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||||
@ -206,7 +207,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
|
|
||||||
model_merged_10 = peft_model_10.merge_and_unload()
|
model_merged_10 = peft_model_10.merge_and_unload()
|
||||||
output_merged_10 = model_merged_10(input)
|
output_merged_10 = model_merged_10(input)
|
||||||
self.assertTrue(torch.allclose(output_mixed_10, output_merged_10, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_10, output_merged_10, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def _check_unload(self, model_cls, config0, config1, input):
|
def _check_unload(self, model_cls, config0, config1, input):
|
||||||
# Ensure that we can unload the base model without merging
|
# Ensure that we can unload the base model without merging
|
||||||
@ -229,8 +230,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
model_unloaded = peft_model_01.unload()
|
model_unloaded = peft_model_01.unload()
|
||||||
output_unloaded = model_unloaded(input)
|
output_unloaded = model_unloaded(input)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(output_mixed, output_unloaded, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_mixed, output_unloaded, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def _check_disable(self, model_cls, config0, config1, input):
|
def _check_disable(self, model_cls, config0, config1, input):
|
||||||
# Ensure that we can disable adapters
|
# Ensure that we can disable adapters
|
||||||
@ -249,8 +250,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
with peft_model_0.disable_adapter():
|
with peft_model_0.disable_adapter():
|
||||||
output_disabled0 = peft_model_0(input)
|
output_disabled0 = peft_model_0(input)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(output_base, output_config0, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_config0, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_base, output_disabled0, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_disabled0, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1
|
# adapter 1
|
||||||
peft_model_1 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
peft_model_1 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||||
@ -258,8 +259,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
with peft_model_1.disable_adapter():
|
with peft_model_1.disable_adapter():
|
||||||
output_disabled1 = peft_model_1(input)
|
output_disabled1 = peft_model_1(input)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(output_base, output_config1, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_config1, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_base, output_disabled1, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_disabled1, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 0 + 1
|
# adapter 0 + 1
|
||||||
peft_model_01 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
peft_model_01 = self._get_model(model_cls, config0, "adapter0", seed=seed0)
|
||||||
@ -270,8 +271,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
with peft_model_01.disable_adapter():
|
with peft_model_01.disable_adapter():
|
||||||
output_disabled01 = peft_model_01(input)
|
output_disabled01 = peft_model_01(input)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(output_base, output_mixed_01, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_mixed_01, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_base, output_disabled01, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_disabled01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1 + 0
|
# adapter 1 + 0
|
||||||
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
peft_model_10 = self._get_model(model_cls, config1, "adapter1", seed=seed1)
|
||||||
@ -282,8 +283,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
with peft_model_10.disable_adapter():
|
with peft_model_10.disable_adapter():
|
||||||
output_disabled10 = peft_model_10(input)
|
output_disabled10 = peft_model_10(input)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(output_base, output_mixed_10, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_mixed_10, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_base, output_disabled10, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_disabled10, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def _check_loading(self, model_cls, config0, config1, input, *, is_commutative):
|
def _check_loading(self, model_cls, config0, config1, input, *, is_commutative):
|
||||||
# Check that we can load two adapters into the same model
|
# Check that we can load two adapters into the same model
|
||||||
@ -331,7 +332,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
base_model, os.path.join(tmp_dirname, "adapter0", "adapter0"), "adapter0"
|
base_model, os.path.join(tmp_dirname, "adapter0", "adapter0"), "adapter0"
|
||||||
)
|
)
|
||||||
output_loaded0 = peft_model_loaded0(input)
|
output_loaded0 = peft_model_loaded0(input)
|
||||||
self.assertTrue(torch.allclose(output_config0, output_loaded0, atol=atol, rtol=rtol))
|
assert torch.allclose(output_config0, output_loaded0, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1
|
# adapter 1
|
||||||
base_model = self._get_model(model_cls)
|
base_model = self._get_model(model_cls)
|
||||||
@ -340,7 +341,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
base_model, os.path.join(tmp_dirname, "adapter1", "adapter1"), "adapter1"
|
base_model, os.path.join(tmp_dirname, "adapter1", "adapter1"), "adapter1"
|
||||||
)
|
)
|
||||||
output_loaded1 = peft_model_loaded1(input)
|
output_loaded1 = peft_model_loaded1(input)
|
||||||
self.assertTrue(torch.allclose(output_config1, output_loaded1, atol=atol, rtol=rtol))
|
assert torch.allclose(output_config1, output_loaded1, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 0 + 1
|
# adapter 0 + 1
|
||||||
base_model = self._get_model(model_cls)
|
base_model = self._get_model(model_cls)
|
||||||
@ -350,18 +351,18 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
peft_model_loaded_01.load_adapter(os.path.join(tmp_dirname, "adapter1", "adapter1"), "adapter1")
|
peft_model_loaded_01.load_adapter(os.path.join(tmp_dirname, "adapter1", "adapter1"), "adapter1")
|
||||||
# at this point, "adapter0" should still be active
|
# at this point, "adapter0" should still be active
|
||||||
self.assertEqual(peft_model_loaded_01.active_adapters, ["adapter0"])
|
assert peft_model_loaded_01.active_adapters == ["adapter0"]
|
||||||
output_loaded01_0 = peft_model_loaded_01(input)
|
output_loaded01_0 = peft_model_loaded_01(input)
|
||||||
self.assertTrue(torch.allclose(output_config0, output_loaded01_0, atol=atol, rtol=rtol))
|
assert torch.allclose(output_config0, output_loaded01_0, atol=atol, rtol=rtol)
|
||||||
# activate adapter1
|
# activate adapter1
|
||||||
peft_model_loaded_01.set_adapter(["adapter1"])
|
peft_model_loaded_01.set_adapter(["adapter1"])
|
||||||
self.assertEqual(peft_model_loaded_01.active_adapters, ["adapter1"])
|
assert peft_model_loaded_01.active_adapters == ["adapter1"]
|
||||||
output_loaded01_1 = peft_model_loaded_01(input)
|
output_loaded01_1 = peft_model_loaded_01(input)
|
||||||
self.assertTrue(torch.allclose(output_config1, output_loaded01_1, atol=atol, rtol=rtol))
|
assert torch.allclose(output_config1, output_loaded01_1, atol=atol, rtol=rtol)
|
||||||
# activate both adapters
|
# activate both adapters
|
||||||
peft_model_loaded_01.set_adapter(["adapter0", "adapter1"])
|
peft_model_loaded_01.set_adapter(["adapter0", "adapter1"])
|
||||||
output_loaded01 = peft_model_loaded_01(input)
|
output_loaded01 = peft_model_loaded_01(input)
|
||||||
self.assertTrue(torch.allclose(output_mixed_01, output_loaded01, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_01, output_loaded01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# adapter 1 + 0
|
# adapter 1 + 0
|
||||||
base_model = self._get_model(model_cls)
|
base_model = self._get_model(model_cls)
|
||||||
@ -371,22 +372,22 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
peft_model_loaded_10.load_adapter(os.path.join(tmp_dirname, "adapter0", "adapter0"), "adapter0")
|
peft_model_loaded_10.load_adapter(os.path.join(tmp_dirname, "adapter0", "adapter0"), "adapter0")
|
||||||
# at this point, "adapter1" should still be active
|
# at this point, "adapter1" should still be active
|
||||||
self.assertEqual(peft_model_loaded_10.active_adapters, ["adapter1"])
|
assert peft_model_loaded_10.active_adapters == ["adapter1"]
|
||||||
output_loaded10_1 = peft_model_loaded_10(input)
|
output_loaded10_1 = peft_model_loaded_10(input)
|
||||||
self.assertTrue(torch.allclose(output_config1, output_loaded10_1, atol=atol, rtol=rtol))
|
assert torch.allclose(output_config1, output_loaded10_1, atol=atol, rtol=rtol)
|
||||||
# activate adapter1
|
# activate adapter1
|
||||||
peft_model_loaded_10.set_adapter(["adapter0"])
|
peft_model_loaded_10.set_adapter(["adapter0"])
|
||||||
self.assertEqual(peft_model_loaded_10.active_adapters, ["adapter0"])
|
assert peft_model_loaded_10.active_adapters == ["adapter0"]
|
||||||
output_loaded10_0 = peft_model_loaded_10(input)
|
output_loaded10_0 = peft_model_loaded_10(input)
|
||||||
self.assertTrue(torch.allclose(output_config0, output_loaded10_0, atol=atol, rtol=rtol))
|
assert torch.allclose(output_config0, output_loaded10_0, atol=atol, rtol=rtol)
|
||||||
# activate both adapters
|
# activate both adapters
|
||||||
peft_model_loaded_10.set_adapter(["adapter1", "adapter0"])
|
peft_model_loaded_10.set_adapter(["adapter1", "adapter0"])
|
||||||
output_loaded10 = peft_model_loaded_10(input)
|
output_loaded10 = peft_model_loaded_10(input)
|
||||||
self.assertTrue(torch.allclose(output_mixed_10, output_loaded10, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed_10, output_loaded10, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
if is_commutative:
|
if is_commutative:
|
||||||
self.assertTrue(torch.allclose(output_loaded01, output_loaded10, atol=atol, rtol=rtol))
|
assert torch.allclose(output_loaded01, output_loaded10, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(output_loaded10, output_mixed_01, atol=atol, rtol=rtol))
|
assert torch.allclose(output_loaded10, output_mixed_01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
@parameterized.expand(
|
@parameterized.expand(
|
||||||
itertools.combinations(
|
itertools.combinations(
|
||||||
@ -564,42 +565,42 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
|
|
||||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
||||||
output_mixed = peft_model(input)
|
output_mixed = peft_model(input)
|
||||||
self.assertTrue(torch.isfinite(output_base).all())
|
assert torch.isfinite(output_base).all()
|
||||||
self.assertFalse(torch.allclose(output_base, output_mixed, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_mixed, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# test disabling all adapters
|
# test disabling all adapters
|
||||||
with peft_model.disable_adapter():
|
with peft_model.disable_adapter():
|
||||||
output_disabled = peft_model(input)
|
output_disabled = peft_model(input)
|
||||||
self.assertTrue(torch.isfinite(output_disabled).all())
|
assert torch.isfinite(output_disabled).all()
|
||||||
self.assertTrue(torch.allclose(output_base, output_disabled, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_disabled, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_mixed, output_disabled, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_mixed, output_disabled, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# merge and unload all adapters
|
# merge and unload all adapters
|
||||||
model_copy = copy.deepcopy(peft_model)
|
model_copy = copy.deepcopy(peft_model)
|
||||||
model = model_copy.merge_and_unload()
|
model = model_copy.merge_and_unload()
|
||||||
output_merged = model(input)
|
output_merged = model(input)
|
||||||
self.assertTrue(torch.isfinite(output_merged).all())
|
assert torch.isfinite(output_merged).all()
|
||||||
self.assertTrue(torch.allclose(output_mixed, output_merged, atol=atol, rtol=rtol))
|
assert torch.allclose(output_mixed, output_merged, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# merge and unload only adapter1 and adapter3
|
# merge and unload only adapter1 and adapter3
|
||||||
model_copy = copy.deepcopy(peft_model)
|
model_copy = copy.deepcopy(peft_model)
|
||||||
model_copy.set_adapter(["adapter1", "adapter3"])
|
model_copy.set_adapter(["adapter1", "adapter3"])
|
||||||
output_13 = model_copy(input)
|
output_13 = model_copy(input)
|
||||||
self.assertTrue(torch.isfinite(output_13).all())
|
assert torch.isfinite(output_13).all()
|
||||||
self.assertFalse(torch.allclose(output_mixed, output_13, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_mixed, output_13, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
model_copy.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
model_copy.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
||||||
model_merged_unloaded = model_copy.merge_and_unload(adapter_names=["adapter1", "adapter3"])
|
model_merged_unloaded = model_copy.merge_and_unload(adapter_names=["adapter1", "adapter3"])
|
||||||
output_merged_13 = model_merged_unloaded(input)
|
output_merged_13 = model_merged_unloaded(input)
|
||||||
self.assertTrue(torch.isfinite(output_merged_13).all())
|
assert torch.isfinite(output_merged_13).all()
|
||||||
self.assertTrue(torch.allclose(output_13, output_merged_13, atol=atol, rtol=rtol))
|
assert torch.allclose(output_13, output_merged_13, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# test unloading
|
# test unloading
|
||||||
model_copy = copy.deepcopy(peft_model)
|
model_copy = copy.deepcopy(peft_model)
|
||||||
model_unloaded = model_copy.unload()
|
model_unloaded = model_copy.unload()
|
||||||
output_unloaded = model_unloaded(input)
|
output_unloaded = model_unloaded(input)
|
||||||
self.assertTrue(torch.isfinite(output_unloaded).all())
|
assert torch.isfinite(output_unloaded).all()
|
||||||
self.assertTrue(torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol))
|
assert torch.allclose(output_base, output_unloaded, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def test_delete_adapter(self):
|
def test_delete_adapter(self):
|
||||||
atol = 1e-5
|
atol = 1e-5
|
||||||
@ -615,7 +616,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
config0 = LoraConfig(r=4, lora_alpha=4, target_modules=["lin0", "lin1"], init_lora_weights=False)
|
config0 = LoraConfig(r=4, lora_alpha=4, target_modules=["lin0", "lin1"], init_lora_weights=False)
|
||||||
peft_model = get_peft_model(model, config0, "adapter0", mixed=True)
|
peft_model = get_peft_model(model, config0, "adapter0", mixed=True)
|
||||||
output_0 = peft_model(input)
|
output_0 = peft_model(input)
|
||||||
self.assertFalse(torch.allclose(output_base, output_0, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_0, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# add adapter1
|
# add adapter1
|
||||||
torch.manual_seed(1)
|
torch.manual_seed(1)
|
||||||
@ -623,17 +624,17 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
output_01 = peft_model(input)
|
output_01 = peft_model(input)
|
||||||
self.assertFalse(torch.allclose(output_base, output_01, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_01, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_0, output_01, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_0, output_01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# delete adapter1
|
# delete adapter1
|
||||||
peft_model.delete_adapter("adapter1")
|
peft_model.delete_adapter("adapter1")
|
||||||
self.assertEqual(peft_model.active_adapters, ["adapter0"])
|
assert peft_model.active_adapters == ["adapter0"]
|
||||||
output_deleted_1 = peft_model(input)
|
output_deleted_1 = peft_model(input)
|
||||||
self.assertTrue(torch.allclose(output_0, output_deleted_1, atol=atol, rtol=rtol))
|
assert torch.allclose(output_0, output_deleted_1, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
msg = re.escape("Adapter(s) ['adapter1'] not found, available adapters: ['adapter0']")
|
msg = re.escape("Adapter(s) ['adapter1'] not found, available adapters: ['adapter0']")
|
||||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
with pytest.raises(ValueError, match=msg):
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
|
|
||||||
# re-add adapter1
|
# re-add adapter1
|
||||||
@ -641,7 +642,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
output_01_readded = peft_model(input)
|
output_01_readded = peft_model(input)
|
||||||
self.assertFalse(torch.allclose(output_base, output_01_readded, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_base, output_01_readded, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# same as above, but this time delete adapter0 first
|
# same as above, but this time delete adapter0 first
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@ -651,19 +652,19 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
torch.manual_seed(1)
|
torch.manual_seed(1)
|
||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
peft_model.delete_adapter("adapter0")
|
peft_model.delete_adapter("adapter0")
|
||||||
self.assertEqual(peft_model.active_adapters, ["adapter1"])
|
assert peft_model.active_adapters == ["adapter1"]
|
||||||
output_deleted_0 = peft_model(input)
|
output_deleted_0 = peft_model(input)
|
||||||
self.assertFalse(torch.allclose(output_deleted_0, output_base, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_deleted_0, output_base, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output_deleted_0, output_01, atol=atol, rtol=rtol))
|
assert not torch.allclose(output_deleted_0, output_01, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
msg = re.escape("Adapter(s) ['adapter0'] not found, available adapters: ['adapter1']")
|
msg = re.escape("Adapter(s) ['adapter0'] not found, available adapters: ['adapter1']")
|
||||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
with pytest.raises(ValueError, match=msg):
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
|
|
||||||
peft_model.delete_adapter("adapter1")
|
peft_model.delete_adapter("adapter1")
|
||||||
self.assertEqual(peft_model.active_adapters, [])
|
assert peft_model.active_adapters == []
|
||||||
output_deleted_01 = peft_model(input)
|
output_deleted_01 = peft_model(input)
|
||||||
self.assertTrue(torch.allclose(output_deleted_01, output_base, atol=atol, rtol=rtol))
|
assert torch.allclose(output_deleted_01, output_base, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def test_modules_to_save(self):
|
def test_modules_to_save(self):
|
||||||
model = SimpleNet().eval().to(self.torch_device)
|
model = SimpleNet().eval().to(self.torch_device)
|
||||||
@ -674,8 +675,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
# TODO: theoretically, we could allow this if it's the same target layer
|
# TODO: theoretically, we could allow this if it's the same target layer
|
||||||
config1 = LoHaConfig(target_modules=["lin0"], modules_to_save=["lin1"])
|
config1 = LoHaConfig(target_modules=["lin0"], modules_to_save=["lin1"])
|
||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
msg = "Only one adapter can be set at a time for modules_to_save"
|
with pytest.raises(ValueError, match="Only one adapter can be set at a time for modules_to_save"):
|
||||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
|
|
||||||
def test_get_nb_trainable_parameters(self):
|
def test_get_nb_trainable_parameters(self):
|
||||||
@ -687,16 +687,16 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
trainable_params0, all_param0 = peft_model.get_nb_trainable_parameters()
|
trainable_params0, all_param0 = peft_model.get_nb_trainable_parameters()
|
||||||
|
|
||||||
params_lora = sum(p.numel() for n, p in model.named_parameters() if "adapter0" in n)
|
params_lora = sum(p.numel() for n, p in model.named_parameters() if "adapter0" in n)
|
||||||
self.assertEqual(trainable_params0, params_lora)
|
assert trainable_params0 == params_lora
|
||||||
self.assertEqual(all_param0, params_base + params_lora)
|
assert all_param0 == (params_base + params_lora)
|
||||||
|
|
||||||
config1 = LoHaConfig(target_modules=["lin1"])
|
config1 = LoHaConfig(target_modules=["lin1"])
|
||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
params_loha = sum(p.numel() for n, p in model.named_parameters() if "adapter1" in n)
|
params_loha = sum(p.numel() for n, p in model.named_parameters() if "adapter1" in n)
|
||||||
trainable_params1, all_param1 = peft_model.get_nb_trainable_parameters()
|
trainable_params1, all_param1 = peft_model.get_nb_trainable_parameters()
|
||||||
self.assertEqual(trainable_params1, params_lora + params_loha)
|
assert trainable_params1 == (params_lora + params_loha)
|
||||||
self.assertEqual(all_param1, params_base + params_lora + params_loha)
|
assert all_param1 == ((params_base + params_lora) + params_loha)
|
||||||
|
|
||||||
config2 = AdaLoraConfig(target_modules=["lin0", "lin1"])
|
config2 = AdaLoraConfig(target_modules=["lin0", "lin1"])
|
||||||
peft_model.add_adapter("adapter2", config2)
|
peft_model.add_adapter("adapter2", config2)
|
||||||
@ -704,8 +704,8 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
params_adalora = sum(p.numel() for n, p in model.named_parameters() if "adapter2" in n)
|
params_adalora = sum(p.numel() for n, p in model.named_parameters() if "adapter2" in n)
|
||||||
trainable_params2, all_param2 = peft_model.get_nb_trainable_parameters()
|
trainable_params2, all_param2 = peft_model.get_nb_trainable_parameters()
|
||||||
# remove 2 params because we need to exclude "ranknum" for AdaLora trainable params
|
# remove 2 params because we need to exclude "ranknum" for AdaLora trainable params
|
||||||
self.assertEqual(trainable_params2, params_lora + params_loha + params_adalora - 2)
|
assert trainable_params2 == (((params_lora + params_loha) + params_adalora) - 2)
|
||||||
self.assertEqual(all_param2, params_base + params_lora + params_loha + params_adalora)
|
assert all_param2 == (((params_base + params_lora) + params_loha) + params_adalora)
|
||||||
|
|
||||||
def test_incompatible_config_raises(self):
|
def test_incompatible_config_raises(self):
|
||||||
model = SimpleNet().eval().to(self.torch_device)
|
model = SimpleNet().eval().to(self.torch_device)
|
||||||
@ -714,7 +714,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
|
|
||||||
config1 = PrefixTuningConfig()
|
config1 = PrefixTuningConfig()
|
||||||
msg = "The provided `peft_type` 'PREFIX_TUNING' is not compatible with the `PeftMixedModel`."
|
msg = "The provided `peft_type` 'PREFIX_TUNING' is not compatible with the `PeftMixedModel`."
|
||||||
with self.assertRaisesRegex(ValueError, expected_regex=msg):
|
with pytest.raises(ValueError, match=msg):
|
||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
|
|
||||||
def test_decoder_model(self):
|
def test_decoder_model(self):
|
||||||
@ -735,50 +735,50 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
config0 = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
|
config0 = LoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
|
||||||
peft_model = get_peft_model(model, config0, "adapter0", mixed=True)
|
peft_model = get_peft_model(model, config0, "adapter0", mixed=True)
|
||||||
output0 = peft_model.generate(**input_dict)
|
output0 = peft_model.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output0).all())
|
assert torch.isfinite(output0).all()
|
||||||
self.assertFalse(torch.allclose(output_base, output0))
|
assert not torch.allclose(output_base, output0)
|
||||||
|
|
||||||
torch.manual_seed(1)
|
torch.manual_seed(1)
|
||||||
config1 = LoHaConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
config1 = LoHaConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
||||||
peft_model.add_adapter("adapter1", config1)
|
peft_model.add_adapter("adapter1", config1)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1"])
|
peft_model.set_adapter(["adapter0", "adapter1"])
|
||||||
output1 = peft_model.generate(**input_dict)
|
output1 = peft_model.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output1).all())
|
assert torch.isfinite(output1).all()
|
||||||
self.assertFalse(torch.allclose(output0, output1))
|
assert not torch.allclose(output0, output1)
|
||||||
|
|
||||||
torch.manual_seed(2)
|
torch.manual_seed(2)
|
||||||
config2 = AdaLoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
|
config2 = AdaLoraConfig(task_type="CAUSAL_LM", init_lora_weights=False)
|
||||||
peft_model.add_adapter("adapter2", config2)
|
peft_model.add_adapter("adapter2", config2)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2"])
|
peft_model.set_adapter(["adapter0", "adapter1", "adapter2"])
|
||||||
output2 = peft_model.generate(**input_dict)
|
output2 = peft_model.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output2).all())
|
assert torch.isfinite(output2).all()
|
||||||
self.assertFalse(torch.allclose(output1, output2))
|
assert not torch.allclose(output1, output2)
|
||||||
|
|
||||||
torch.manual_seed(3)
|
torch.manual_seed(3)
|
||||||
config3 = LoKrConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
config3 = LoKrConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
||||||
peft_model.add_adapter("adapter3", config3)
|
peft_model.add_adapter("adapter3", config3)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3"])
|
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3"])
|
||||||
output3 = peft_model.generate(**input_dict)
|
output3 = peft_model.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output3).all())
|
assert torch.isfinite(output3).all()
|
||||||
self.assertFalse(torch.allclose(output2, output3))
|
assert not torch.allclose(output2, output3)
|
||||||
|
|
||||||
torch.manual_seed(4)
|
torch.manual_seed(4)
|
||||||
config4 = OFTConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
config4 = OFTConfig(task_type="CAUSAL_LM", target_modules=["q_proj", "v_proj"], init_weights=False)
|
||||||
peft_model.add_adapter("adapter4", config4)
|
peft_model.add_adapter("adapter4", config4)
|
||||||
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
peft_model.set_adapter(["adapter0", "adapter1", "adapter2", "adapter3", "adapter4"])
|
||||||
output4 = peft_model.generate(**input_dict)
|
output4 = peft_model.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output4).all())
|
assert torch.isfinite(output4).all()
|
||||||
self.assertFalse(torch.allclose(output3, output4))
|
assert not torch.allclose(output3, output4)
|
||||||
|
|
||||||
with peft_model.disable_adapter():
|
with peft_model.disable_adapter():
|
||||||
output_disabled = peft_model.generate(**input_dict)
|
output_disabled = peft_model.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output_disabled).all())
|
assert torch.isfinite(output_disabled).all()
|
||||||
self.assertTrue(torch.allclose(output_base, output_disabled))
|
assert torch.allclose(output_base, output_disabled)
|
||||||
|
|
||||||
model_unloaded = peft_model.merge_and_unload()
|
model_unloaded = peft_model.merge_and_unload()
|
||||||
output_unloaded = model_unloaded.generate(**input_dict)
|
output_unloaded = model_unloaded.generate(**input_dict)
|
||||||
self.assertTrue(torch.isfinite(output_unloaded).all())
|
assert torch.isfinite(output_unloaded).all()
|
||||||
self.assertTrue(torch.allclose(output4, output_unloaded))
|
assert torch.allclose(output4, output_unloaded)
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
# save adapter0 (use normal PeftModel, because PeftMixedModel does not support saving)
|
# save adapter0 (use normal PeftModel, because PeftMixedModel does not support saving)
|
||||||
@ -787,7 +787,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
peft_model = get_peft_model(model, config0, "adapter0")
|
peft_model = get_peft_model(model, config0, "adapter0")
|
||||||
output0_save = peft_model(**input_dict).logits
|
output0_save = peft_model(**input_dict).logits
|
||||||
self.assertTrue(torch.isfinite(output0_save).all())
|
assert torch.isfinite(output0_save).all()
|
||||||
peft_model.save_pretrained(tmp_dir)
|
peft_model.save_pretrained(tmp_dir)
|
||||||
|
|
||||||
# save adapter1
|
# save adapter1
|
||||||
@ -796,7 +796,7 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
torch.manual_seed(1)
|
torch.manual_seed(1)
|
||||||
peft_model = get_peft_model(model, config1, "adapter1")
|
peft_model = get_peft_model(model, config1, "adapter1")
|
||||||
output1_save = peft_model(**input_dict).logits
|
output1_save = peft_model(**input_dict).logits
|
||||||
self.assertTrue(torch.isfinite(output1_save).all())
|
assert torch.isfinite(output1_save).all()
|
||||||
peft_model.save_pretrained(tmp_dir)
|
peft_model.save_pretrained(tmp_dir)
|
||||||
|
|
||||||
# load adapter0 and adapter1
|
# load adapter0 and adapter1
|
||||||
@ -807,6 +807,6 @@ class TestMixedAdapterTypes(unittest.TestCase):
|
|||||||
output01_loaded = peft_model(**input_dict).logits
|
output01_loaded = peft_model(**input_dict).logits
|
||||||
|
|
||||||
atol, rtol = 1e-3, 1e-3
|
atol, rtol = 1e-3, 1e-3
|
||||||
self.assertTrue(torch.isfinite(output01_loaded).all())
|
assert torch.isfinite(output01_loaded).all()
|
||||||
self.assertFalse(torch.allclose(output0_save, output01_loaded, atol=atol, rtol=rtol))
|
assert not torch.allclose(output0_save, output01_loaded, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(output1_save, output01_loaded, atol=atol, rtol=rtol))
|
assert not torch.allclose(output1_save, output01_loaded, atol=atol, rtol=rtol)
|
||||||
|
@ -84,7 +84,7 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
|||||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||||
|
|
||||||
self.assertTrue(not dummy_output.requires_grad)
|
assert not dummy_output.requires_grad
|
||||||
|
|
||||||
def test_prepare_for_int8_training(self) -> None:
|
def test_prepare_for_int8_training(self) -> None:
|
||||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||||
@ -92,7 +92,7 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
|||||||
model = model.to(self.torch_device)
|
model = model.to(self.torch_device)
|
||||||
|
|
||||||
for param in model.parameters():
|
for param in model.parameters():
|
||||||
self.assertTrue(not param.requires_grad)
|
assert not param.requires_grad
|
||||||
|
|
||||||
model = get_peft_model(model, self._create_multitask_prompt_tuning_config())
|
model = get_peft_model(model, self._create_multitask_prompt_tuning_config())
|
||||||
|
|
||||||
@ -109,7 +109,7 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
|||||||
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
dummy_input = torch.LongTensor([[1, 1, 1]]).to(self.torch_device)
|
||||||
dummy_output = model.get_input_embeddings()(dummy_input)
|
dummy_output = model.get_input_embeddings()(dummy_input)
|
||||||
|
|
||||||
self.assertTrue(dummy_output.requires_grad)
|
assert dummy_output.requires_grad
|
||||||
|
|
||||||
def test_save_pretrained(self) -> None:
|
def test_save_pretrained(self) -> None:
|
||||||
seed = 420
|
seed = 420
|
||||||
@ -131,30 +131,28 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
|||||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||||
|
|
||||||
# check if same keys
|
# check if same keys
|
||||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||||
|
|
||||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||||
self.assertEqual(len(list(state_dict.keys())), 3)
|
assert len(state_dict) == 3
|
||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if `adapter_model.safetensors` is present
|
# check if `adapter_model.safetensors` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.safetensors"))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `pytorch_model.bin` is not present
|
# check if `pytorch_model.bin` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
|
|
||||||
def test_save_pretrained_regression(self) -> None:
|
def test_save_pretrained_regression(self) -> None:
|
||||||
seed = 420
|
seed = 420
|
||||||
@ -176,30 +174,28 @@ class MultiTaskPromptTuningTester(TestCase, PeftCommonTester):
|
|||||||
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
state_dict_from_pretrained = get_peft_model_state_dict(model_from_pretrained)
|
||||||
|
|
||||||
# check if same keys
|
# check if same keys
|
||||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||||
|
|
||||||
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
# Check that the number of saved parameters is 4 -- 2 layers of (tokens and gate).
|
||||||
self.assertEqual(len(list(state_dict.keys())), 3)
|
assert len(state_dict) == 3
|
||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# check if `adapter_model.bin` is present for regression
|
# check if `adapter_model.bin` is present for regression
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_model.bin"))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `pytorch_model.bin` is not present
|
# check if `pytorch_model.bin` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "pytorch_model.bin"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
|
|
||||||
def test_generate(self) -> None:
|
def test_generate(self) -> None:
|
||||||
model = LlamaForCausalLM(self._create_test_llama_config())
|
model = LlamaForCausalLM(self._create_test_llama_config())
|
||||||
|
@ -54,7 +54,7 @@ class TestPoly(unittest.TestCase):
|
|||||||
|
|
||||||
# generate some dummy data
|
# generate some dummy data
|
||||||
text = os.__doc__.splitlines()
|
text = os.__doc__.splitlines()
|
||||||
self.assertTrue(len(text) > 10)
|
assert len(text) > 10
|
||||||
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
inputs = tokenizer(text, return_tensors="pt", padding=True)
|
||||||
inputs["task_ids"] = torch.arange(len(text)) % n_tasks
|
inputs["task_ids"] = torch.arange(len(text)) % n_tasks
|
||||||
inputs["labels"] = tokenizer((["A", "B"] * 100)[: len(text)], return_tensors="pt")["input_ids"]
|
inputs["labels"] = tokenizer((["A", "B"] * 100)[: len(text)], return_tensors="pt")["input_ids"]
|
||||||
@ -72,7 +72,7 @@ class TestPoly(unittest.TestCase):
|
|||||||
losses.append(loss.item())
|
losses.append(loss.item())
|
||||||
|
|
||||||
# loss improved by at least 50%
|
# loss improved by at least 50%
|
||||||
self.assertLess(losses[-1], 0.5 * losses[0])
|
assert losses[-1] < (0.5 * losses[0])
|
||||||
|
|
||||||
# check that saving and loading works
|
# check that saving and loading works
|
||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
@ -84,8 +84,8 @@ class TestPoly(unittest.TestCase):
|
|||||||
logits_disabled = model(**inputs).logits
|
logits_disabled = model(**inputs).logits
|
||||||
tokens_disabled = model.generate(**inputs)
|
tokens_disabled = model.generate(**inputs)
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(logits_before, logits_disabled, atol=atol, rtol=rtol))
|
assert not torch.allclose(logits_before, logits_disabled, atol=atol, rtol=rtol)
|
||||||
self.assertFalse(torch.allclose(tokens_before, tokens_disabled, atol=atol, rtol=rtol))
|
assert not torch.allclose(tokens_before, tokens_disabled, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# saving and loading
|
# saving and loading
|
||||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||||
@ -96,5 +96,5 @@ class TestPoly(unittest.TestCase):
|
|||||||
torch.manual_seed(0)
|
torch.manual_seed(0)
|
||||||
output_after = loaded(**inputs).logits
|
output_after = loaded(**inputs).logits
|
||||||
tokens_after = loaded.generate(**inputs)
|
tokens_after = loaded.generate(**inputs)
|
||||||
self.assertTrue(torch.allclose(logits_before, output_after, atol=atol, rtol=rtol))
|
assert torch.allclose(logits_before, output_after, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(tokens_before, tokens_after, atol=atol, rtol=rtol))
|
assert torch.allclose(tokens_before, tokens_after, atol=atol, rtol=rtol)
|
||||||
|
@ -152,7 +152,7 @@ class StableDiffusionModelTester(TestCase, PeftCommonTester):
|
|||||||
merged_output = np.array(model(**dummy_input).images[0]).astype(np.float32)
|
merged_output = np.array(model(**dummy_input).images[0]).astype(np.float32)
|
||||||
|
|
||||||
# Images are in uint8 drange, so use large atol
|
# Images are in uint8 drange, so use large atol
|
||||||
self.assertTrue(np.allclose(peft_output, merged_output, atol=1.0))
|
assert np.allclose(peft_output, merged_output, atol=1.0)
|
||||||
|
|
||||||
@parameterized.expand(
|
@parameterized.expand(
|
||||||
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
||||||
@ -184,7 +184,7 @@ class StableDiffusionModelTester(TestCase, PeftCommonTester):
|
|||||||
merged_output = np.array(model(**dummy_input).images[0]).astype(np.float32)
|
merged_output = np.array(model(**dummy_input).images[0]).astype(np.float32)
|
||||||
|
|
||||||
# Images are in uint8 drange, so use large atol
|
# Images are in uint8 drange, so use large atol
|
||||||
self.assertTrue(np.allclose(peft_output, merged_output, atol=1.0))
|
assert np.allclose(peft_output, merged_output, atol=1.0)
|
||||||
|
|
||||||
@parameterized.expand(
|
@parameterized.expand(
|
||||||
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
||||||
@ -210,10 +210,8 @@ class StableDiffusionModelTester(TestCase, PeftCommonTester):
|
|||||||
model.unet.add_weighted_adapter([unet_adapter_name], [0.5], "weighted_adapter_test")
|
model.unet.add_weighted_adapter([unet_adapter_name], [0.5], "weighted_adapter_test")
|
||||||
|
|
||||||
# Assert that base adapters config did not change
|
# Assert that base adapters config did not change
|
||||||
self.assertTrue(
|
assert asdict(text_encoder_adapter_config) == asdict(model.text_encoder.peft_config[text_encoder_adapter_name])
|
||||||
asdict(text_encoder_adapter_config) == asdict(model.text_encoder.peft_config[text_encoder_adapter_name])
|
assert asdict(unet_adapter_config) == asdict(model.unet.peft_config[unet_adapter_name])
|
||||||
)
|
|
||||||
self.assertTrue(asdict(unet_adapter_config) == asdict(model.unet.peft_config[unet_adapter_name]))
|
|
||||||
|
|
||||||
@parameterized.expand(
|
@parameterized.expand(
|
||||||
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
PeftStableDiffusionTestConfigManager.get_grid_parameters(
|
||||||
|
@ -17,6 +17,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
|
||||||
|
import pytest
|
||||||
from diffusers import StableDiffusionPipeline
|
from diffusers import StableDiffusionPipeline
|
||||||
from parameterized import parameterized
|
from parameterized import parameterized
|
||||||
from torch import nn
|
from torch import nn
|
||||||
@ -175,7 +176,7 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
|||||||
layers_to_transform=layers_to_transform,
|
layers_to_transform=layers_to_transform,
|
||||||
)
|
)
|
||||||
actual_result = bool(check_target_module_exists(config, key))
|
actual_result = bool(check_target_module_exists(config, key))
|
||||||
self.assertEqual(actual_result, expected_result)
|
assert actual_result == expected_result
|
||||||
|
|
||||||
def test_module_matching_lora(self):
|
def test_module_matching_lora(self):
|
||||||
# peft models that have a module matching method to inspect the matching modules to allow
|
# peft models that have a module matching method to inspect the matching modules to allow
|
||||||
@ -197,12 +198,12 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
|||||||
"h.3.self_attention.query_key_value",
|
"h.3.self_attention.query_key_value",
|
||||||
"h.4.self_attention.query_key_value",
|
"h.4.self_attention.query_key_value",
|
||||||
]
|
]
|
||||||
self.assertEqual(matched, expected) # module lists should match exactly
|
assert matched == expected # module lists should match exactly
|
||||||
|
|
||||||
# no overlap with matched modules
|
# no overlap with matched modules
|
||||||
unmatched = output["unmatched"]
|
unmatched = output["unmatched"]
|
||||||
for key in expected:
|
for key in expected:
|
||||||
self.assertFalse(key in unmatched)
|
assert key not in unmatched
|
||||||
|
|
||||||
def test_feedforward_matching_ia3(self):
|
def test_feedforward_matching_ia3(self):
|
||||||
model_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
|
model_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
|
||||||
@ -227,14 +228,14 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
|||||||
"encoder.block.0.layer.1.DenseReluDense.wi",
|
"encoder.block.0.layer.1.DenseReluDense.wi",
|
||||||
"encoder.block.0.layer.1.DenseReluDense.wo",
|
"encoder.block.0.layer.1.DenseReluDense.wo",
|
||||||
]
|
]
|
||||||
self.assertEqual(matched, expected) # not required since we do similar checks above, but just to be sure
|
assert matched == expected # not required since we do similar checks above, but just to be sure
|
||||||
module_dict = dict(model.named_modules())
|
module_dict = dict(model.named_modules())
|
||||||
for key in matched:
|
for key in matched:
|
||||||
module = module_dict[key]
|
module = module_dict[key]
|
||||||
if key in expected_feedforward:
|
if key in expected_feedforward:
|
||||||
self.assertTrue(module.is_feedforward)
|
assert module.is_feedforward
|
||||||
else: # other IA3 modules should not be marked as feedforward
|
else: # other IA3 modules should not be marked as feedforward
|
||||||
self.assertFalse(module.is_feedforward)
|
assert not module.is_feedforward
|
||||||
|
|
||||||
@parameterized.expand(MAYBE_INCLUDE_ALL_LINEAR_LAYERS_TEST_CASES)
|
@parameterized.expand(MAYBE_INCLUDE_ALL_LINEAR_LAYERS_TEST_CASES)
|
||||||
def test_maybe_include_all_linear_layers_lora(
|
def test_maybe_include_all_linear_layers_lora(
|
||||||
@ -277,7 +278,7 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
|||||||
# compare the two models and assert that all layers are of the same type
|
# compare the two models and assert that all layers are of the same type
|
||||||
for name, actual_module in actual_model.named_modules():
|
for name, actual_module in actual_model.named_modules():
|
||||||
expected_module = expected_model_module_dict[name]
|
expected_module = expected_model_module_dict[name]
|
||||||
self.assertEqual(type(actual_module), type(expected_module))
|
assert type(actual_module) == type(expected_module)
|
||||||
|
|
||||||
def test_maybe_include_all_linear_layers_ia3_loha(self):
|
def test_maybe_include_all_linear_layers_ia3_loha(self):
|
||||||
model_id, initial_target_modules, expected_target_modules = (
|
model_id, initial_target_modules, expected_target_modules = (
|
||||||
@ -302,17 +303,17 @@ class PeftCustomKwargsTester(unittest.TestCase):
|
|||||||
new_config = _maybe_include_all_linear_layers(config, model)
|
new_config = _maybe_include_all_linear_layers(config, model)
|
||||||
if isinstance(expected_target_modules, list):
|
if isinstance(expected_target_modules, list):
|
||||||
# assert that expected and actual target_modules have the same items
|
# assert that expected and actual target_modules have the same items
|
||||||
self.assertCountEqual(new_config.target_modules, expected_target_modules)
|
assert set(new_config.target_modules) == set(expected_target_modules)
|
||||||
else:
|
else:
|
||||||
self.assertEqual(new_config.target_modules, expected_target_modules)
|
assert new_config.target_modules == expected_target_modules
|
||||||
|
|
||||||
def test_maybe_include_all_linear_layers_diffusion(self):
|
def test_maybe_include_all_linear_layers_diffusion(self):
|
||||||
model_id = "hf-internal-testing/tiny-stable-diffusion-torch"
|
model_id = "hf-internal-testing/tiny-stable-diffusion-torch"
|
||||||
model = StableDiffusionPipeline.from_pretrained(model_id)
|
model = StableDiffusionPipeline.from_pretrained(model_id)
|
||||||
config = LoraConfig(base_model_name_or_path=model_id, target_modules="all-linear")
|
config = LoraConfig(base_model_name_or_path=model_id, target_modules="all-linear")
|
||||||
with self.assertRaisesRegex(
|
with pytest.raises(
|
||||||
ValueError,
|
ValueError,
|
||||||
"Only instances of PreTrainedModel support `target_modules='all-linear'`",
|
match="Only instances of PreTrainedModel support `target_modules='all-linear'`",
|
||||||
):
|
):
|
||||||
model.unet = get_peft_model(model.unet, config)
|
model.unet = get_peft_model(model.unet, config)
|
||||||
|
|
||||||
@ -336,32 +337,32 @@ class TestTargetedModuleNames(unittest.TestCase):
|
|||||||
def test_one_targeted_module_regex(self):
|
def test_one_targeted_module_regex(self):
|
||||||
model = MLP()
|
model = MLP()
|
||||||
model = get_peft_model(model, LoraConfig(target_modules="lin0"))
|
model = get_peft_model(model, LoraConfig(target_modules="lin0"))
|
||||||
self.assertEqual(model.targeted_module_names, ["lin0"])
|
assert model.targeted_module_names == ["lin0"]
|
||||||
|
|
||||||
def test_two_targeted_module_regex(self):
|
def test_two_targeted_module_regex(self):
|
||||||
model = MLP()
|
model = MLP()
|
||||||
model = get_peft_model(model, LoraConfig(target_modules="lin.*"))
|
model = get_peft_model(model, LoraConfig(target_modules="lin.*"))
|
||||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||||
|
|
||||||
def test_one_targeted_module_list(self):
|
def test_one_targeted_module_list(self):
|
||||||
model = MLP()
|
model = MLP()
|
||||||
model = get_peft_model(model, LoraConfig(target_modules=["lin0"]))
|
model = get_peft_model(model, LoraConfig(target_modules=["lin0"]))
|
||||||
self.assertEqual(model.targeted_module_names, ["lin0"])
|
assert model.targeted_module_names == ["lin0"]
|
||||||
|
|
||||||
def test_two_targeted_module_list(self):
|
def test_two_targeted_module_list(self):
|
||||||
model = MLP()
|
model = MLP()
|
||||||
model = get_peft_model(model, LoraConfig(target_modules=["lin0", "lin1"]))
|
model = get_peft_model(model, LoraConfig(target_modules=["lin0", "lin1"]))
|
||||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||||
|
|
||||||
def test_ia3_targeted_module_regex(self):
|
def test_ia3_targeted_module_regex(self):
|
||||||
model = MLP()
|
model = MLP()
|
||||||
model = get_peft_model(model, IA3Config(target_modules=".*lin.*", feedforward_modules=".*lin.*"))
|
model = get_peft_model(model, IA3Config(target_modules=".*lin.*", feedforward_modules=".*lin.*"))
|
||||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||||
|
|
||||||
def test_ia3_targeted_module_list(self):
|
def test_ia3_targeted_module_list(self):
|
||||||
model = MLP()
|
model = MLP()
|
||||||
model = get_peft_model(model, IA3Config(target_modules=["lin0", "lin1"], feedforward_modules=["lin0", "lin1"]))
|
model = get_peft_model(model, IA3Config(target_modules=["lin0", "lin1"], feedforward_modules=["lin0", "lin1"]))
|
||||||
self.assertEqual(model.targeted_module_names, ["lin0", "lin1"])
|
assert model.targeted_module_names == ["lin0", "lin1"]
|
||||||
|
|
||||||
def test_realistic_example(self):
|
def test_realistic_example(self):
|
||||||
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-BloomForCausalLM")
|
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-BloomForCausalLM")
|
||||||
@ -370,4 +371,4 @@ class TestTargetedModuleNames(unittest.TestCase):
|
|||||||
expected = [
|
expected = [
|
||||||
f"transformer.h.{i}.self_attention.query_key_value" for i in range(len(model.base_model.transformer.h))
|
f"transformer.h.{i}.self_attention.query_key_value" for i in range(len(model.base_model.transformer.h))
|
||||||
]
|
]
|
||||||
self.assertEqual(model.targeted_module_names, expected)
|
assert model.targeted_module_names == expected
|
||||||
|
@ -20,6 +20,7 @@ import tempfile
|
|||||||
from collections import OrderedDict
|
from collections import OrderedDict
|
||||||
from dataclasses import replace
|
from dataclasses import replace
|
||||||
|
|
||||||
|
import pytest
|
||||||
import torch
|
import torch
|
||||||
import yaml
|
import yaml
|
||||||
from diffusers import StableDiffusionPipeline
|
from diffusers import StableDiffusionPipeline
|
||||||
@ -172,27 +173,27 @@ class PeftCommonTester:
|
|||||||
def check_modelcard(self, tmp_dirname, model):
|
def check_modelcard(self, tmp_dirname, model):
|
||||||
# check the generated README.md
|
# check the generated README.md
|
||||||
filename = os.path.join(tmp_dirname, "README.md")
|
filename = os.path.join(tmp_dirname, "README.md")
|
||||||
self.assertTrue(os.path.exists(filename))
|
assert os.path.exists(filename)
|
||||||
with open(filename, encoding="utf-8") as f:
|
with open(filename, encoding="utf-8") as f:
|
||||||
readme = f.read()
|
readme = f.read()
|
||||||
metainfo = re.search(r"---\n(.*?)\n---", readme, re.DOTALL).group(1)
|
metainfo = re.search(r"---\n(.*?)\n---", readme, re.DOTALL).group(1)
|
||||||
dct = yaml.safe_load(metainfo)
|
dct = yaml.safe_load(metainfo)
|
||||||
self.assertEqual(dct["library_name"], "peft")
|
assert dct["library_name"] == "peft"
|
||||||
|
|
||||||
if hasattr(model, "config"):
|
if hasattr(model, "config"):
|
||||||
self.assertEqual(dct["base_model"], model.config.to_dict()["_name_or_path"])
|
assert dct["base_model"] == model.config.to_dict()["_name_or_path"]
|
||||||
else: # a custom model
|
else: # a custom model
|
||||||
self.assertTrue("base_model" not in dct)
|
assert "base_model" not in dct
|
||||||
|
|
||||||
def check_config_json(self, tmp_dirname, model):
|
def check_config_json(self, tmp_dirname, model):
|
||||||
# check the generated config.json
|
# check the generated config.json
|
||||||
filename = os.path.join(tmp_dirname, "adapter_config.json")
|
filename = os.path.join(tmp_dirname, "adapter_config.json")
|
||||||
self.assertTrue(os.path.exists(filename))
|
assert os.path.exists(filename)
|
||||||
with open(filename, encoding="utf-8") as f:
|
with open(filename, encoding="utf-8") as f:
|
||||||
config = json.load(f)
|
config = json.load(f)
|
||||||
|
|
||||||
if hasattr(model, "config"): # custom models don't have a config attribute
|
if hasattr(model, "config"): # custom models don't have a config attribute
|
||||||
self.assertEqual(config["base_model_name_or_path"], model.config.to_dict()["_name_or_path"])
|
assert config["base_model_name_or_path"] == model.config.to_dict()["_name_or_path"]
|
||||||
|
|
||||||
def _test_model_attr(self, model_id, config_cls, config_kwargs):
|
def _test_model_attr(self, model_id, config_cls, config_kwargs):
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
@ -202,9 +203,9 @@ class PeftCommonTester:
|
|||||||
)
|
)
|
||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
|
|
||||||
self.assertTrue(hasattr(model, "save_pretrained"))
|
assert hasattr(model, "save_pretrained")
|
||||||
self.assertTrue(hasattr(model, "from_pretrained"))
|
assert hasattr(model, "from_pretrained")
|
||||||
self.assertTrue(hasattr(model, "push_to_hub"))
|
assert hasattr(model, "push_to_hub")
|
||||||
|
|
||||||
def _test_adapter_name(self, model_id, config_cls, config_kwargs):
|
def _test_adapter_name(self, model_id, config_cls, config_kwargs):
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
@ -219,7 +220,7 @@ class PeftCommonTester:
|
|||||||
correctly_converted = True
|
correctly_converted = True
|
||||||
break
|
break
|
||||||
|
|
||||||
self.assertTrue(correctly_converted)
|
assert correctly_converted
|
||||||
|
|
||||||
def _test_prepare_for_training(self, model_id, config_cls, config_kwargs):
|
def _test_prepare_for_training(self, model_id, config_cls, config_kwargs):
|
||||||
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
||||||
@ -232,14 +233,14 @@ class PeftCommonTester:
|
|||||||
dummy_input = self.prepare_inputs_for_testing()
|
dummy_input = self.prepare_inputs_for_testing()
|
||||||
dummy_output = model.get_input_embeddings()(dummy_input["input_ids"])
|
dummy_output = model.get_input_embeddings()(dummy_input["input_ids"])
|
||||||
|
|
||||||
self.assertFalse(dummy_output.requires_grad)
|
assert not dummy_output.requires_grad
|
||||||
|
|
||||||
# load with `prepare_model_for_int8_training`
|
# load with `prepare_model_for_int8_training`
|
||||||
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
||||||
model = prepare_model_for_int8_training(model)
|
model = prepare_model_for_int8_training(model)
|
||||||
|
|
||||||
for param in model.parameters():
|
for param in model.parameters():
|
||||||
self.assertFalse(param.requires_grad)
|
assert not param.requires_grad
|
||||||
|
|
||||||
config = config_cls(
|
config = config_cls(
|
||||||
base_model_name_or_path=model_id,
|
base_model_name_or_path=model_id,
|
||||||
@ -260,7 +261,7 @@ class PeftCommonTester:
|
|||||||
dummy_input = self.prepare_inputs_for_testing()
|
dummy_input = self.prepare_inputs_for_testing()
|
||||||
dummy_output = model.get_input_embeddings()(dummy_input["input_ids"])
|
dummy_output = model.get_input_embeddings()(dummy_input["input_ids"])
|
||||||
|
|
||||||
self.assertTrue(dummy_output.requires_grad)
|
assert dummy_output.requires_grad
|
||||||
|
|
||||||
def _test_save_pretrained(self, model_id, config_cls, config_kwargs, safe_serialization=True):
|
def _test_save_pretrained(self, model_id, config_cls, config_kwargs, safe_serialization=True):
|
||||||
# ensure that the weights are randomly initialized
|
# ensure that the weights are randomly initialized
|
||||||
@ -301,25 +302,23 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
target_adapter_filename = "adapter_model.safetensors" if safe_serialization else "adapter_model.bin"
|
target_adapter_filename = "adapter_model.safetensors" if safe_serialization else "adapter_model.bin"
|
||||||
|
|
||||||
# check if `adapter_model.safetensors` is present
|
# check if `adapter_model.safetensors` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, target_adapter_filename)))
|
assert os.path.exists(os.path.join(tmp_dirname, target_adapter_filename))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `model.safetensors` is not present
|
# check if `model.safetensors` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
|
|
||||||
self.check_modelcard(tmp_dirname, model)
|
self.check_modelcard(tmp_dirname, model)
|
||||||
self.check_config_json(tmp_dirname, model)
|
self.check_config_json(tmp_dirname, model)
|
||||||
@ -376,33 +375,31 @@ class PeftCommonTester:
|
|||||||
state_dict_from_pretrained = get_state_dict(model_from_pretrained, unwrap_compiled=True)
|
state_dict_from_pretrained = get_state_dict(model_from_pretrained, unwrap_compiled=True)
|
||||||
|
|
||||||
# check if same keys
|
# check if same keys
|
||||||
self.assertEqual(state_dict.keys(), state_dict_from_pretrained.keys())
|
assert state_dict.keys() == state_dict_from_pretrained.keys()
|
||||||
|
|
||||||
# check if tensors equal
|
# check if tensors equal
|
||||||
for key in state_dict.keys():
|
for key in state_dict.keys():
|
||||||
self.assertTrue(
|
assert torch.allclose(
|
||||||
torch.allclose(
|
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
||||||
state_dict[key].to(self.torch_device), state_dict_from_pretrained[key].to(self.torch_device)
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
target_adapter_filename = "adapter_model.safetensors" if safe_serialization else "adapter_model.bin"
|
target_adapter_filename = "adapter_model.safetensors" if safe_serialization else "adapter_model.bin"
|
||||||
|
|
||||||
# check if `adapter_model.safetensors` is present
|
# check if `adapter_model.safetensors` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, target_adapter_filename)))
|
assert os.path.exists(os.path.join(tmp_dirname, target_adapter_filename))
|
||||||
self.assertTrue(os.path.exists(os.path.join(new_adapter_dir, target_adapter_filename)))
|
assert os.path.exists(os.path.join(new_adapter_dir, target_adapter_filename))
|
||||||
|
|
||||||
# check if `adapter_config.json` is present
|
# check if `adapter_config.json` is present
|
||||||
self.assertTrue(os.path.exists(os.path.join(tmp_dirname, "adapter_config.json")))
|
assert os.path.exists(os.path.join(tmp_dirname, "adapter_config.json"))
|
||||||
self.assertTrue(os.path.exists(os.path.join(new_adapter_dir, "adapter_config.json")))
|
assert os.path.exists(os.path.join(new_adapter_dir, "adapter_config.json"))
|
||||||
|
|
||||||
# check if `model.safetensors` is not present
|
# check if `model.safetensors` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "model.safetensors")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "model.safetensors"))
|
||||||
self.assertFalse(os.path.exists(os.path.join(new_adapter_dir, "model.safetensors")))
|
assert not os.path.exists(os.path.join(new_adapter_dir, "model.safetensors"))
|
||||||
|
|
||||||
# check if `config.json` is not present
|
# check if `config.json` is not present
|
||||||
self.assertFalse(os.path.exists(os.path.join(tmp_dirname, "config.json")))
|
assert not os.path.exists(os.path.join(tmp_dirname, "config.json"))
|
||||||
self.assertFalse(os.path.exists(os.path.join(new_adapter_dir, "config.json")))
|
assert not os.path.exists(os.path.join(new_adapter_dir, "config.json"))
|
||||||
|
|
||||||
self.check_modelcard(tmp_dirname, model)
|
self.check_modelcard(tmp_dirname, model)
|
||||||
self.check_config_json(tmp_dirname, model)
|
self.check_config_json(tmp_dirname, model)
|
||||||
@ -413,8 +410,8 @@ class PeftCommonTester:
|
|||||||
model_from_pretrained = self.transformers_class.from_pretrained(model_id)
|
model_from_pretrained = self.transformers_class.from_pretrained(model_id)
|
||||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname)
|
||||||
|
|
||||||
self.assertTrue("default" in model_from_pretrained.peft_config.keys())
|
assert "default" in model_from_pretrained.peft_config.keys()
|
||||||
self.assertTrue("new_adapter" not in model_from_pretrained.peft_config.keys())
|
assert "new_adapter" not in model_from_pretrained.peft_config.keys()
|
||||||
|
|
||||||
def _test_from_pretrained_config_construction(self, model_id, config_cls, config_kwargs):
|
def _test_from_pretrained_config_construction(self, model_id, config_cls, config_kwargs):
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
@ -430,8 +427,8 @@ class PeftCommonTester:
|
|||||||
model_from_pretrained, tmp_dirname, is_trainable=False, config=config
|
model_from_pretrained, tmp_dirname, is_trainable=False, config=config
|
||||||
)
|
)
|
||||||
|
|
||||||
self.assertTrue(model_from_pretrained.peft_config["default"].inference_mode)
|
assert model_from_pretrained.peft_config["default"].inference_mode
|
||||||
self.assertIs(model_from_pretrained.peft_config["default"], config)
|
assert model_from_pretrained.peft_config["default"] is config
|
||||||
|
|
||||||
def _test_merge_layers_fp16(self, model_id, config_cls, config_kwargs):
|
def _test_merge_layers_fp16(self, model_id, config_cls, config_kwargs):
|
||||||
if config_cls not in (LoraConfig, IA3Config):
|
if config_cls not in (LoraConfig, IA3Config):
|
||||||
@ -479,7 +476,7 @@ class PeftCommonTester:
|
|||||||
model = model.merge_and_unload()
|
model = model.merge_and_unload()
|
||||||
logits_merged = model(**dummy_input)[0]
|
logits_merged = model(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_unmerged, logits_merged, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_unmerged, logits_merged, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
config = config_cls(
|
config = config_cls(
|
||||||
@ -493,26 +490,20 @@ class PeftCommonTester:
|
|||||||
if "lora_A" in name or "ia3" in name or "lora_E" in name or "lora_B" in name:
|
if "lora_A" in name or "ia3" in name or "lora_E" in name or "lora_B" in name:
|
||||||
module.data[0] = torch.nan
|
module.data[0] = torch.nan
|
||||||
|
|
||||||
with self.assertRaises(ValueError) as error_context:
|
with pytest.raises(
|
||||||
|
ValueError, match="NaNs detected in the merged weights. The adapter default seems to be broken"
|
||||||
|
):
|
||||||
model = model.merge_and_unload(safe_merge=True)
|
model = model.merge_and_unload(safe_merge=True)
|
||||||
|
|
||||||
self.assertEqual(
|
|
||||||
str(error_context.exception),
|
|
||||||
"NaNs detected in the merged weights. The adapter default seems to be broken",
|
|
||||||
)
|
|
||||||
|
|
||||||
for name, module in model.named_parameters():
|
for name, module in model.named_parameters():
|
||||||
if "lora_A" in name or "ia3" in name or "lora_E" in name or "lora_B" in name:
|
if "lora_A" in name or "ia3" in name or "lora_E" in name or "lora_B" in name:
|
||||||
module.data[0] = torch.inf
|
module.data[0] = torch.inf
|
||||||
|
|
||||||
with self.assertRaises(ValueError) as error_context:
|
with pytest.raises(
|
||||||
|
ValueError, match="NaNs detected in the merged weights. The adapter default seems to be broken"
|
||||||
|
):
|
||||||
model = model.merge_and_unload(safe_merge=True)
|
model = model.merge_and_unload(safe_merge=True)
|
||||||
|
|
||||||
self.assertEqual(
|
|
||||||
str(error_context.exception),
|
|
||||||
"NaNs detected in the merged weights. The adapter default seems to be broken",
|
|
||||||
)
|
|
||||||
|
|
||||||
def _test_merge_layers(self, model_id, config_cls, config_kwargs):
|
def _test_merge_layers(self, model_id, config_cls, config_kwargs):
|
||||||
if issubclass(config_cls, PromptLearningConfig):
|
if issubclass(config_cls, PromptLearningConfig):
|
||||||
return
|
return
|
||||||
@ -543,15 +534,15 @@ class PeftCommonTester:
|
|||||||
if (config.peft_type == "IA3") and (model_id == "Conv2d"):
|
if (config.peft_type == "IA3") and (model_id == "Conv2d"):
|
||||||
# for some reason, the IA³ Conv2d introduces a larger error
|
# for some reason, the IA³ Conv2d introduces a larger error
|
||||||
atol, rtol = 0.3, 0.01
|
atol, rtol = 0.3, 0.01
|
||||||
self.assertTrue(torch.allclose(logits, logits_merged, atol=atol, rtol=rtol))
|
assert torch.allclose(logits, logits_merged, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(logits, logits_unmerged, atol=atol, rtol=rtol))
|
assert torch.allclose(logits, logits_unmerged, atol=atol, rtol=rtol)
|
||||||
self.assertTrue(torch.allclose(logits, logits_merged_unloaded, atol=atol, rtol=rtol))
|
assert torch.allclose(logits, logits_merged_unloaded, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
# For this test to work, weights should not be initialized to identity transform (e.g.
|
# For this test to work, weights should not be initialized to identity transform (e.g.
|
||||||
# init_lora_weights should be False).
|
# init_lora_weights should be False).
|
||||||
transformers_model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
transformers_model = self.transformers_class.from_pretrained(model_id).to(self.torch_device)
|
||||||
logits_transformers = transformers_model(**dummy_input)[0]
|
logits_transformers = transformers_model(**dummy_input)[0]
|
||||||
self.assertFalse(torch.allclose(logits_merged, logits_transformers, atol=1e-10, rtol=1e-10))
|
assert not torch.allclose(logits_merged, logits_transformers, atol=1e-10, rtol=1e-10)
|
||||||
|
|
||||||
# test that the logits are identical after a save-load-roundtrip
|
# test that the logits are identical after a save-load-roundtrip
|
||||||
if hasattr(model, "save_pretrained"):
|
if hasattr(model, "save_pretrained"):
|
||||||
@ -564,7 +555,7 @@ class PeftCommonTester:
|
|||||||
model_from_pretrained = pickle.loads(pickle.dumps(model))
|
model_from_pretrained = pickle.loads(pickle.dumps(model))
|
||||||
|
|
||||||
logits_merged_from_pretrained = model_from_pretrained(**dummy_input)[0]
|
logits_merged_from_pretrained = model_from_pretrained(**dummy_input)[0]
|
||||||
self.assertTrue(torch.allclose(logits_merged, logits_merged_from_pretrained, atol=atol, rtol=rtol))
|
assert torch.allclose(logits_merged, logits_merged_from_pretrained, atol=atol, rtol=rtol)
|
||||||
|
|
||||||
def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
|
def _test_merge_layers_multi(self, model_id, config_cls, config_kwargs):
|
||||||
supported_peft_types = [PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.IA3, PeftType.OFT]
|
supported_peft_types = [PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.IA3, PeftType.OFT]
|
||||||
@ -598,14 +589,14 @@ class PeftCommonTester:
|
|||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_adapter_2 = model(**dummy_input)[0]
|
logits_adapter_2 = model(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
assert not torch.allclose(logits_adapter_1, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model.set_adapter("default")
|
model.set_adapter("default")
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_adapter_1_after_set = model(**dummy_input)[0]
|
logits_adapter_1_after_set = model(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_adapter_1_after_set, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model_copy = copy.deepcopy(model)
|
model_copy = copy.deepcopy(model)
|
||||||
model_copy_2 = copy.deepcopy(model)
|
model_copy_2 = copy.deepcopy(model)
|
||||||
@ -614,22 +605,22 @@ class PeftCommonTester:
|
|||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_merged_all = model_merged_all(**dummy_input)[0]
|
logits_merged_all = model_merged_all(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
assert not torch.allclose(logits_merged_all, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||||
self.assertFalse(torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
assert not torch.allclose(logits_merged_all, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model_merged_adapter_2 = model_copy.merge_and_unload(adapter_names=["adapter-2"])
|
model_merged_adapter_2 = model_copy.merge_and_unload(adapter_names=["adapter-2"])
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_merged_adapter_2 = model_merged_adapter_2(**dummy_input)[0]
|
logits_merged_adapter_2 = model_merged_adapter_2(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_merged_adapter_2, logits_adapter_2, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
model_merged_adapter_default = model_copy_2.merge_and_unload(adapter_names=["default"])
|
model_merged_adapter_default = model_copy_2.merge_and_unload(adapter_names=["default"])
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
logits_merged_adapter_default = model_merged_adapter_default(**dummy_input)[0]
|
logits_merged_adapter_default = model_merged_adapter_default(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3))
|
assert torch.allclose(logits_merged_adapter_default, logits_adapter_1, atol=1e-3, rtol=1e-3)
|
||||||
|
|
||||||
def _test_merge_layers_is_idempotent(self, model_id, config_cls, config_kwargs):
|
def _test_merge_layers_is_idempotent(self, model_id, config_cls, config_kwargs):
|
||||||
if ("gpt2" in model_id.lower()) and (config_cls != LoraConfig):
|
if ("gpt2" in model_id.lower()) and (config_cls != LoraConfig):
|
||||||
@ -650,11 +641,11 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
# merging again should not change anything
|
# merging again should not change anything
|
||||||
# also check warning:
|
# also check warning:
|
||||||
with self.assertWarnsRegex(UserWarning, "All adapters are already merged, nothing to do"):
|
with pytest.warns(UserWarning, match="All adapters are already merged, nothing to do"):
|
||||||
model.merge_adapter()
|
model.merge_adapter()
|
||||||
logits_1 = model(**self.prepare_inputs_for_testing())[0]
|
logits_1 = model(**self.prepare_inputs_for_testing())[0]
|
||||||
|
|
||||||
self.assertTrue(torch.allclose(logits_0, logits_1, atol=1e-6, rtol=1e-6))
|
assert torch.allclose(logits_0, logits_1, atol=1e-6, rtol=1e-6)
|
||||||
|
|
||||||
def _test_generate(self, model_id, config_cls, config_kwargs):
|
def _test_generate(self, model_id, config_cls, config_kwargs):
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
@ -681,7 +672,7 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
inputs = self.prepare_inputs_for_testing()
|
inputs = self.prepare_inputs_for_testing()
|
||||||
if raises_err:
|
if raises_err:
|
||||||
with self.assertRaises(TypeError):
|
with pytest.raises(TypeError):
|
||||||
# check if `generate` raises an error if positional arguments are passed
|
# check if `generate` raises an error if positional arguments are passed
|
||||||
_ = model.generate(inputs["input_ids"])
|
_ = model.generate(inputs["input_ids"])
|
||||||
else:
|
else:
|
||||||
@ -719,7 +710,7 @@ class PeftCommonTester:
|
|||||||
model = get_peft_model(model, config)
|
model = get_peft_model(model, config)
|
||||||
model = model.half()
|
model = model.half()
|
||||||
|
|
||||||
self.assertEqual(model.base_model_torch_dtype, torch.float16)
|
assert model.base_model_torch_dtype == torch.float16
|
||||||
|
|
||||||
def _test_training(self, model_id, config_cls, config_kwargs):
|
def _test_training(self, model_id, config_cls, config_kwargs):
|
||||||
if issubclass(config_cls, PromptLearningConfig):
|
if issubclass(config_cls, PromptLearningConfig):
|
||||||
@ -745,9 +736,9 @@ class PeftCommonTester:
|
|||||||
parameter_prefix = model.prefix
|
parameter_prefix = model.prefix
|
||||||
for n, param in model.named_parameters():
|
for n, param in model.named_parameters():
|
||||||
if (parameter_prefix in n) or ("modules_to_save" in n):
|
if (parameter_prefix in n) or ("modules_to_save" in n):
|
||||||
self.assertIsNotNone(param.grad)
|
assert param.grad is not None
|
||||||
else:
|
else:
|
||||||
self.assertIsNone(param.grad)
|
assert param.grad is None
|
||||||
|
|
||||||
def _test_inference_safetensors(self, model_id, config_cls, config_kwargs):
|
def _test_inference_safetensors(self, model_id, config_cls, config_kwargs):
|
||||||
if (config_cls == PrefixTuningConfig) and ("deberta" in model_id.lower()):
|
if (config_cls == PrefixTuningConfig) and ("deberta" in model_id.lower()):
|
||||||
@ -778,14 +769,14 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname, safe_serialization=True)
|
model.save_pretrained(tmp_dirname, safe_serialization=True)
|
||||||
self.assertTrue("adapter_model.safetensors" in os.listdir(tmp_dirname))
|
assert "adapter_model.safetensors" in os.listdir(tmp_dirname)
|
||||||
self.assertTrue("adapter_model.bin" not in os.listdir(tmp_dirname))
|
assert "adapter_model.bin" not in os.listdir(tmp_dirname)
|
||||||
|
|
||||||
model_from_pretrained = self.transformers_class.from_pretrained(model_id)
|
model_from_pretrained = self.transformers_class.from_pretrained(model_id)
|
||||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname).to(self.torch_device)
|
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname).to(self.torch_device)
|
||||||
|
|
||||||
logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
|
logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
|
||||||
self.assertTrue(torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4))
|
assert torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4)
|
||||||
|
|
||||||
def _test_training_layer_indexing(self, model_id, config_cls, config_kwargs):
|
def _test_training_layer_indexing(self, model_id, config_cls, config_kwargs):
|
||||||
if config_cls not in (LoraConfig,):
|
if config_cls not in (LoraConfig,):
|
||||||
@ -813,10 +804,10 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
for n, param in model.named_parameters():
|
for n, param in model.named_parameters():
|
||||||
if "lora" in n:
|
if "lora" in n:
|
||||||
self.assertIsNotNone(param.grad)
|
assert param.grad is not None
|
||||||
nb_trainable += 1
|
nb_trainable += 1
|
||||||
else:
|
else:
|
||||||
self.assertIsNone(param.grad)
|
assert param.grad is None
|
||||||
|
|
||||||
with tempfile.TemporaryDirectory() as tmp_dirname:
|
with tempfile.TemporaryDirectory() as tmp_dirname:
|
||||||
model.save_pretrained(tmp_dirname)
|
model.save_pretrained(tmp_dirname)
|
||||||
@ -825,7 +816,7 @@ class PeftCommonTester:
|
|||||||
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname).to(self.torch_device)
|
model_from_pretrained = PeftModel.from_pretrained(model_from_pretrained, tmp_dirname).to(self.torch_device)
|
||||||
|
|
||||||
logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
|
logits_from_pretrained = model_from_pretrained(**inputs)[0][0]
|
||||||
self.assertTrue(torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4))
|
assert torch.allclose(logits, logits_from_pretrained, atol=1e-4, rtol=1e-4)
|
||||||
|
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
config = config_cls(
|
config = config_cls(
|
||||||
@ -839,7 +830,7 @@ class PeftCommonTester:
|
|||||||
if "lora" in n:
|
if "lora" in n:
|
||||||
nb_trainable_all += 1
|
nb_trainable_all += 1
|
||||||
|
|
||||||
self.assertLess(nb_trainable, nb_trainable_all)
|
assert nb_trainable < nb_trainable_all
|
||||||
|
|
||||||
def _test_training_gradient_checkpointing(self, model_id, config_cls, config_kwargs):
|
def _test_training_gradient_checkpointing(self, model_id, config_cls, config_kwargs):
|
||||||
if issubclass(config_cls, PromptLearningConfig):
|
if issubclass(config_cls, PromptLearningConfig):
|
||||||
@ -872,9 +863,9 @@ class PeftCommonTester:
|
|||||||
parameter_prefix = "ia3" if config_cls == IA3Config else "lora"
|
parameter_prefix = "ia3" if config_cls == IA3Config else "lora"
|
||||||
for n, param in model.named_parameters():
|
for n, param in model.named_parameters():
|
||||||
if parameter_prefix in n:
|
if parameter_prefix in n:
|
||||||
self.assertIsNotNone(param.grad)
|
assert param.grad is not None
|
||||||
else:
|
else:
|
||||||
self.assertIsNone(param.grad)
|
assert param.grad is None
|
||||||
|
|
||||||
def _test_peft_model_device_map(self, model_id, config_cls, config_kwargs):
|
def _test_peft_model_device_map(self, model_id, config_cls, config_kwargs):
|
||||||
if config_cls not in (LoraConfig,):
|
if config_cls not in (LoraConfig,):
|
||||||
@ -919,7 +910,7 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
# check that prompt encoder has grads
|
# check that prompt encoder has grads
|
||||||
for param in model.prompt_encoder.parameters():
|
for param in model.prompt_encoder.parameters():
|
||||||
self.assertIsNotNone(param.grad)
|
assert param.grad is not None
|
||||||
|
|
||||||
def _test_delete_adapter(self, model_id, config_cls, config_kwargs):
|
def _test_delete_adapter(self, model_id, config_cls, config_kwargs):
|
||||||
supported_peft_types = [PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.IA3, PeftType.OFT]
|
supported_peft_types = [PeftType.LORA, PeftType.LOHA, PeftType.LOKR, PeftType.IA3, PeftType.OFT]
|
||||||
@ -939,20 +930,20 @@ class PeftCommonTester:
|
|||||||
model.set_adapter(adapter_to_delete)
|
model.set_adapter(adapter_to_delete)
|
||||||
model = model.to(self.torch_device)
|
model = model.to(self.torch_device)
|
||||||
model.delete_adapter(adapter_to_delete)
|
model.delete_adapter(adapter_to_delete)
|
||||||
self.assertFalse(adapter_to_delete in model.peft_config)
|
assert adapter_to_delete not in model.peft_config
|
||||||
self.assertEqual(model.active_adapters, ["default"])
|
assert model.active_adapters == ["default"]
|
||||||
|
|
||||||
key_list = [key for key, _ in model.named_modules()]
|
key_list = [key for key, _ in model.named_modules()]
|
||||||
for key in key_list:
|
for key in key_list:
|
||||||
_, target, _ = _get_submodules(model, key)
|
_, target, _ = _get_submodules(model, key)
|
||||||
attributes_to_check = getattr(target, "adapter_layer_names", []) + getattr(target, "other_param_names", [])
|
attributes_to_check = getattr(target, "adapter_layer_names", []) + getattr(target, "other_param_names", [])
|
||||||
for attr in attributes_to_check:
|
for attr in attributes_to_check:
|
||||||
self.assertFalse(adapter_to_delete in getattr(target, attr))
|
assert adapter_to_delete not in getattr(target, attr)
|
||||||
|
|
||||||
# check that we can also delete the last remaining adapter
|
# check that we can also delete the last remaining adapter
|
||||||
model.delete_adapter("default")
|
model.delete_adapter("default")
|
||||||
self.assertFalse("default" in model.peft_config)
|
assert "default" not in model.peft_config
|
||||||
self.assertEqual(model.active_adapters, [])
|
assert model.active_adapters == []
|
||||||
|
|
||||||
input = self.prepare_inputs_for_testing()
|
input = self.prepare_inputs_for_testing()
|
||||||
# note: we cannot call model(**input) because PeftModel always expects there to be at least one adapter
|
# note: we cannot call model(**input) because PeftModel always expects there to be at least one adapter
|
||||||
@ -977,20 +968,20 @@ class PeftCommonTester:
|
|||||||
# "delete_me" is added but not activated
|
# "delete_me" is added but not activated
|
||||||
model = model.to(self.torch_device)
|
model = model.to(self.torch_device)
|
||||||
model.delete_adapter(adapter_to_delete)
|
model.delete_adapter(adapter_to_delete)
|
||||||
self.assertFalse(adapter_to_delete in model.peft_config)
|
assert adapter_to_delete not in model.peft_config
|
||||||
self.assertEqual(model.active_adapters, ["default"])
|
assert model.active_adapters == ["default"]
|
||||||
|
|
||||||
key_list = [key for key, _ in model.named_modules()]
|
key_list = [key for key, _ in model.named_modules()]
|
||||||
for key in key_list:
|
for key in key_list:
|
||||||
_, target, _ = _get_submodules(model, key)
|
_, target, _ = _get_submodules(model, key)
|
||||||
attributes_to_check = getattr(target, "adapter_layer_names", []) + getattr(target, "other_param_names", [])
|
attributes_to_check = getattr(target, "adapter_layer_names", []) + getattr(target, "other_param_names", [])
|
||||||
for attr in attributes_to_check:
|
for attr in attributes_to_check:
|
||||||
self.assertFalse(adapter_to_delete in getattr(target, attr))
|
assert adapter_to_delete not in getattr(target, attr)
|
||||||
|
|
||||||
# check that we can also delete the last remaining adapter
|
# check that we can also delete the last remaining adapter
|
||||||
model.delete_adapter("default")
|
model.delete_adapter("default")
|
||||||
self.assertFalse("default" in model.peft_config)
|
assert "default" not in model.peft_config
|
||||||
self.assertEqual(model.active_adapters, [])
|
assert model.active_adapters == []
|
||||||
|
|
||||||
input = self.prepare_inputs_for_testing()
|
input = self.prepare_inputs_for_testing()
|
||||||
# note: we cannot call model(**input) because PeftModel always expects there to be at least one adapter
|
# note: we cannot call model(**input) because PeftModel always expects there to be at least one adapter
|
||||||
@ -1006,7 +997,7 @@ class PeftCommonTester:
|
|||||||
model = model.to(self.torch_device)
|
model = model.to(self.torch_device)
|
||||||
|
|
||||||
if config.peft_type not in ("LORA", "ADALORA", "IA3"):
|
if config.peft_type not in ("LORA", "ADALORA", "IA3"):
|
||||||
with self.assertRaises(AttributeError):
|
with pytest.raises(AttributeError):
|
||||||
model = model.unload()
|
model = model.unload()
|
||||||
else:
|
else:
|
||||||
dummy_input = self.prepare_inputs_for_testing()
|
dummy_input = self.prepare_inputs_for_testing()
|
||||||
@ -1019,8 +1010,8 @@ class PeftCommonTester:
|
|||||||
model = model.unload()
|
model = model.unload()
|
||||||
logits_unload = model(**dummy_input)[0]
|
logits_unload = model(**dummy_input)[0]
|
||||||
|
|
||||||
self.assertFalse(torch.allclose(logits_with_adapter, logits_unload, atol=1e-10, rtol=1e-10))
|
assert not torch.allclose(logits_with_adapter, logits_unload, atol=1e-10, rtol=1e-10)
|
||||||
self.assertTrue(torch.allclose(logits_transformers, logits_unload, atol=1e-4, rtol=1e-4))
|
assert torch.allclose(logits_transformers, logits_unload, atol=1e-4, rtol=1e-4)
|
||||||
|
|
||||||
def _test_weighted_combination_of_adapters(self, model_id, config_cls, config_kwargs):
|
def _test_weighted_combination_of_adapters(self, model_id, config_cls, config_kwargs):
|
||||||
if issubclass(config_cls, AdaLoraConfig):
|
if issubclass(config_cls, AdaLoraConfig):
|
||||||
@ -1116,7 +1107,7 @@ class PeftCommonTester:
|
|||||||
combination_type="linear",
|
combination_type="linear",
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
model.add_weighted_adapter(
|
model.add_weighted_adapter(
|
||||||
adapter_list[1:],
|
adapter_list[1:],
|
||||||
weight_list[1:],
|
weight_list[1:],
|
||||||
@ -1124,7 +1115,7 @@ class PeftCommonTester:
|
|||||||
combination_type="linear",
|
combination_type="linear",
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
model.add_weighted_adapter(
|
model.add_weighted_adapter(
|
||||||
adapter_list[1:],
|
adapter_list[1:],
|
||||||
weight_list[1:],
|
weight_list[1:],
|
||||||
@ -1133,7 +1124,7 @@ class PeftCommonTester:
|
|||||||
density=0.5,
|
density=0.5,
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
model.add_weighted_adapter(
|
model.add_weighted_adapter(
|
||||||
adapter_list[1:],
|
adapter_list[1:],
|
||||||
weight_list[1:],
|
weight_list[1:],
|
||||||
@ -1142,7 +1133,7 @@ class PeftCommonTester:
|
|||||||
density=0.5,
|
density=0.5,
|
||||||
)
|
)
|
||||||
|
|
||||||
with self.assertRaises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
model.add_weighted_adapter(
|
model.add_weighted_adapter(
|
||||||
adapter_list[1:],
|
adapter_list[1:],
|
||||||
weight_list[1:],
|
weight_list[1:],
|
||||||
@ -1165,7 +1156,7 @@ class PeftCommonTester:
|
|||||||
"multi_adapter_dare_ties_reweighting",
|
"multi_adapter_dare_ties_reweighting",
|
||||||
]
|
]
|
||||||
for new_adapter in new_adapters:
|
for new_adapter in new_adapters:
|
||||||
self.assertTrue(new_adapter in model.peft_config)
|
assert new_adapter in model.peft_config
|
||||||
|
|
||||||
key_list = [key for key, _ in model.named_modules()]
|
key_list = [key for key, _ in model.named_modules()]
|
||||||
for key in key_list:
|
for key in key_list:
|
||||||
@ -1175,23 +1166,21 @@ class PeftCommonTester:
|
|||||||
if "single" in adapter_name:
|
if "single" in adapter_name:
|
||||||
new_delta_weight = target.get_delta_weight(adapter_name)
|
new_delta_weight = target.get_delta_weight(adapter_name)
|
||||||
weighted_original_delta_weights = target.get_delta_weight(adapter_list[0]) * weight_list[0]
|
weighted_original_delta_weights = target.get_delta_weight(adapter_list[0]) * weight_list[0]
|
||||||
self.assertTrue(
|
assert torch.allclose(new_delta_weight, weighted_original_delta_weights, atol=1e-4, rtol=1e-4)
|
||||||
torch.allclose(new_delta_weight, weighted_original_delta_weights, atol=1e-4, rtol=1e-4)
|
|
||||||
)
|
|
||||||
elif "svd" in adapter_name:
|
elif "svd" in adapter_name:
|
||||||
self.assertTrue(target.r[adapter_name] == 20)
|
assert target.r[adapter_name] == 20
|
||||||
elif "linear" in adapter_name:
|
elif "linear" in adapter_name:
|
||||||
self.assertTrue(target.r[adapter_name] == 8)
|
assert target.r[adapter_name] == 8
|
||||||
elif "cat" in adapter_name:
|
elif "cat" in adapter_name:
|
||||||
self.assertTrue(target.r[adapter_name] == 28)
|
assert target.r[adapter_name] == 28
|
||||||
|
|
||||||
dummy_input = self.prepare_inputs_for_testing()
|
dummy_input = self.prepare_inputs_for_testing()
|
||||||
model.eval()
|
model.eval()
|
||||||
for adapter_name in new_adapters:
|
for adapter_name in new_adapters:
|
||||||
# ensuring new adapters pass the forward loop
|
# ensuring new adapters pass the forward loop
|
||||||
model.set_adapter(adapter_name)
|
model.set_adapter(adapter_name)
|
||||||
self.assertTrue(model.active_adapter == adapter_name)
|
assert model.active_adapter == adapter_name
|
||||||
self.assertTrue(model.active_adapters == [adapter_name])
|
assert model.active_adapters == [adapter_name]
|
||||||
model(**dummy_input)[0]
|
model(**dummy_input)[0]
|
||||||
|
|
||||||
def _test_disable_adapter(self, model_id, config_cls, config_kwargs):
|
def _test_disable_adapter(self, model_id, config_cls, config_kwargs):
|
||||||
@ -1243,9 +1232,9 @@ class PeftCommonTester:
|
|||||||
# must be False
|
# must be False
|
||||||
if isinstance(peft_model, StableDiffusionPipeline):
|
if isinstance(peft_model, StableDiffusionPipeline):
|
||||||
# for SD, check that most pixels have different values
|
# for SD, check that most pixels have different values
|
||||||
self.assertTrue((output_before != output_peft).float().mean() > 0.8)
|
assert (output_before != output_peft).float().mean() > 0.8
|
||||||
else:
|
else:
|
||||||
self.assertFalse(torch.allclose(output_before, output_peft))
|
assert not torch.allclose(output_before, output_peft)
|
||||||
|
|
||||||
# output with DISABLED ADAPTER
|
# output with DISABLED ADAPTER
|
||||||
if isinstance(peft_model, StableDiffusionPipeline):
|
if isinstance(peft_model, StableDiffusionPipeline):
|
||||||
@ -1253,11 +1242,11 @@ class PeftCommonTester:
|
|||||||
with peft_model.text_encoder.disable_adapter():
|
with peft_model.text_encoder.disable_adapter():
|
||||||
output_peft_disabled = get_output(peft_model)
|
output_peft_disabled = get_output(peft_model)
|
||||||
# for SD, very rarely, a pixel can differ
|
# for SD, very rarely, a pixel can differ
|
||||||
self.assertTrue((output_before != output_peft_disabled).float().mean() < 1e-4)
|
assert (output_before != output_peft_disabled).float().mean() < 1e-4
|
||||||
else:
|
else:
|
||||||
with peft_model.disable_adapter():
|
with peft_model.disable_adapter():
|
||||||
output_peft_disabled = get_output(peft_model)
|
output_peft_disabled = get_output(peft_model)
|
||||||
self.assertTrue(torch.allclose(output_before, output_peft_disabled, atol=1e-6, rtol=1e-6))
|
assert torch.allclose(output_before, output_peft_disabled, atol=1e-6, rtol=1e-6)
|
||||||
|
|
||||||
# TODO: add tests to check if disabling adapters works after calling merge_adapter
|
# TODO: add tests to check if disabling adapters works after calling merge_adapter
|
||||||
|
|
||||||
@ -1276,12 +1265,12 @@ class PeftCommonTester:
|
|||||||
|
|
||||||
model = self.transformers_class.from_pretrained(model_id)
|
model = self.transformers_class.from_pretrained(model_id)
|
||||||
model = get_peft_model(model, config, "adapter0")
|
model = get_peft_model(model, config, "adapter0")
|
||||||
with self.assertRaises(ValueError):
|
with pytest.raises(ValueError):
|
||||||
model.add_adapter("adapter1", replace(config, r=20))
|
model.add_adapter("adapter1", replace(config, r=20))
|
||||||
|
|
||||||
# (superficial) test that the model is not left in a half-initialized state when adding an adapter fails
|
# (superficial) test that the model is not left in a half-initialized state when adding an adapter fails
|
||||||
self.assertFalse("adapter1" in model.peft_config)
|
assert "adapter1" not in model.peft_config
|
||||||
self.assertFalse("adapter1" in model.base_model.peft_config)
|
assert "adapter1" not in model.base_model.peft_config
|
||||||
|
|
||||||
def _test_passing_input_embeds_works(self, test_name, model_id, config_cls, config_kwargs):
|
def _test_passing_input_embeds_works(self, test_name, model_id, config_cls, config_kwargs):
|
||||||
# https://github.com/huggingface/peft/issues/727
|
# https://github.com/huggingface/peft/issues/727
|
||||||
|
Reference in New Issue
Block a user