TST: Add more HF Hub model caching (#2682)

A bunch of tests in test_tuners_utils.py didn't use the decorator so
far, which is now fixed. This should hopefully help reduce timeouts.

Moreover, the iris dataset loading is now moved to a module-scoped
fixture (before, it was just loaded on module level). This doesn't help
with caching, but it prevents loading of this dataset when the
corresponding tests are not even run.
This commit is contained in:
Benjamin Bossan
2025-07-30 20:02:07 +02:00
committed by GitHub
parent 46ae69ac29
commit 5e00266e85
2 changed files with 59 additions and 24 deletions

View File

@ -24,10 +24,13 @@ from peft.utils.incremental_pca import IncrementalPCA
torch.manual_seed(1999) torch.manual_seed(1999)
iris = load_dataset("scikit-learn/iris", split="train")
@pytest.fixture(scope="module")
def iris():
return load_dataset("scikit-learn/iris", split="train")
def test_incremental_pca(): def test_incremental_pca(iris):
# Incremental PCA on dense arrays. # Incremental PCA on dense arrays.
n_components = 2 n_components = 2
X = torch.tensor([iris["SepalLengthCm"], iris["SepalWidthCm"], iris["PetalLengthCm"], iris["PetalWidthCm"]]).T X = torch.tensor([iris["SepalLengthCm"], iris["SepalWidthCm"], iris["PetalLengthCm"], iris["PetalWidthCm"]]).T
@ -170,7 +173,7 @@ def test_incremental_pca_partial_fit():
assert_close(ipca.components_, pipca.components_, rtol=1e-3, atol=1e-3) assert_close(ipca.components_, pipca.components_, rtol=1e-3, atol=1e-3)
def test_incremental_pca_lowrank(): def test_incremental_pca_lowrank(iris):
# Test that lowrank mode is equivalent to non-lowrank mode. # Test that lowrank mode is equivalent to non-lowrank mode.
n_components = 2 n_components = 2
X = torch.tensor([iris["SepalLengthCm"], iris["SepalWidthCm"], iris["PetalLengthCm"], iris["PetalWidthCm"]]).T X = torch.tensor([iris["SepalLengthCm"], iris["SepalWidthCm"], iris["PetalLengthCm"], iris["PetalWidthCm"]]).T

View File

@ -58,6 +58,7 @@ from peft.tuners.tuners_utils import (
from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND, ModulesToSaveWrapper, infer_device from peft.utils import INCLUDE_LINEAR_LAYERS_SHORTHAND, ModulesToSaveWrapper, infer_device
from peft.utils.constants import DUMMY_MODEL_CONFIG, MIN_TARGET_MODULES_FOR_OPTIMIZATION from peft.utils.constants import DUMMY_MODEL_CONFIG, MIN_TARGET_MODULES_FOR_OPTIMIZATION
from .testing_common import hub_online_once
from .testing_utils import require_bitsandbytes, require_non_cpu from .testing_utils import require_bitsandbytes, require_non_cpu
@ -208,7 +209,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
# configs that could exist. This is okay as the method calls `check_target_module_exists` internally, which # configs that could exist. This is okay as the method calls `check_target_module_exists` internally, which
# has been extensively tested above. # has been extensively tested above.
model_id = "hf-internal-testing/tiny-random-BloomForCausalLM" model_id = "hf-internal-testing/tiny-random-BloomForCausalLM"
model = AutoModel.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModel.from_pretrained(model_id)
# by default, this model matches query_key_value # by default, this model matches query_key_value
config = LoraConfig() config = LoraConfig()
peft_model = get_peft_model(model, config) peft_model = get_peft_model(model, config)
@ -231,7 +233,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
def test_feedforward_matching_ia3(self): def test_feedforward_matching_ia3(self):
model_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration" model_id = "hf-internal-testing/tiny-random-T5ForConditionalGeneration"
model = AutoModelForSeq2SeqLM.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
# simple example for just one t5 block for testing # simple example for just one t5 block for testing
config_kwargs = { config_kwargs = {
"target_modules": ".*encoder.*block.0.*(SelfAttention|EncDecAttention|DenseReluDense).(k|q|v|wo|wi)$", "target_modules": ".*encoder.*block.0.*(SelfAttention|EncDecAttention|DenseReluDense).(k|q|v|wo|wi)$",
@ -265,7 +268,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
def test_maybe_include_all_linear_layers_lora( def test_maybe_include_all_linear_layers_lora(
self, model_id, model_type, initial_target_modules, expected_target_modules self, model_id, model_type, initial_target_modules, expected_target_modules
): ):
model = self.transformers_class_map[model_type].from_pretrained(model_id) with hub_online_once(model_id):
model = self.transformers_class_map[model_type].from_pretrained(model_id)
config_cls = LoraConfig config_cls = LoraConfig
self._check_match_with_expected_target_modules( self._check_match_with_expected_target_modules(
model_id, model, config_cls, initial_target_modules, expected_target_modules model_id, model, config_cls, initial_target_modules, expected_target_modules
@ -281,7 +285,11 @@ class PeftCustomKwargsTester(unittest.TestCase):
config_kwargs = {"quantization_config": BitsAndBytesConfig(load_in_4bit=True)} config_kwargs = {"quantization_config": BitsAndBytesConfig(load_in_4bit=True)}
elif quantization == "8bit": elif quantization == "8bit":
config_kwargs = {"quantization_config": BitsAndBytesConfig(load_in_8bit=True)} config_kwargs = {"quantization_config": BitsAndBytesConfig(load_in_8bit=True)}
model = self.transformers_class_map[model_type].from_pretrained(model_id, device_map="auto", **config_kwargs)
with hub_online_once(model_id):
model = self.transformers_class_map[model_type].from_pretrained(
model_id, device_map="auto", **config_kwargs
)
config_cls = LoraConfig config_cls = LoraConfig
self._check_match_with_expected_target_modules( self._check_match_with_expected_target_modules(
model_id, model, config_cls, initial_target_modules, expected_target_modules model_id, model, config_cls, initial_target_modules, expected_target_modules
@ -310,7 +318,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
INCLUDE_LINEAR_LAYERS_SHORTHAND, INCLUDE_LINEAR_LAYERS_SHORTHAND,
["k_proj", "v_proj", "q_proj", "o_proj", "down_proj", "up_proj", "gate_proj"], ["k_proj", "v_proj", "q_proj", "o_proj", "down_proj", "up_proj", "gate_proj"],
) )
model_ia3 = AutoModelForCausalLM.from_pretrained(model_id) with hub_online_once(model_id):
model_ia3 = AutoModelForCausalLM.from_pretrained(model_id)
model_loha = deepcopy(model_ia3) model_loha = deepcopy(model_ia3)
config_classes = [IA3Config, LoHaConfig] config_classes = [IA3Config, LoHaConfig]
models = [model_ia3, model_loha] models = [model_ia3, model_loha]
@ -322,7 +331,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
@parameterized.expand(MAYBE_INCLUDE_ALL_LINEAR_LAYERS_TEST_INTERNALS) @parameterized.expand(MAYBE_INCLUDE_ALL_LINEAR_LAYERS_TEST_INTERNALS)
def test_maybe_include_all_linear_layers_internals(self, initial_target_modules, expected_target_modules): def test_maybe_include_all_linear_layers_internals(self, initial_target_modules, expected_target_modules):
model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM" model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM"
model = AutoModelForCausalLM.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
config = LoraConfig(base_model_name_or_path=model_id, target_modules=initial_target_modules) config = LoraConfig(base_model_name_or_path=model_id, target_modules=initial_target_modules)
new_config = _maybe_include_all_linear_layers(config, model) new_config = _maybe_include_all_linear_layers(config, model)
if isinstance(expected_target_modules, list): if isinstance(expected_target_modules, list):
@ -333,7 +343,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
def test_maybe_include_all_linear_layers_diffusion(self): def test_maybe_include_all_linear_layers_diffusion(self):
model_id = "hf-internal-testing/tiny-sd-pipe" model_id = "hf-internal-testing/tiny-sd-pipe"
model = StableDiffusionPipeline.from_pretrained(model_id) with hub_online_once(model_id):
model = StableDiffusionPipeline.from_pretrained(model_id)
config = LoraConfig(base_model_name_or_path=model_id, target_modules="all-linear") config = LoraConfig(base_model_name_or_path=model_id, target_modules="all-linear")
# all linear layers should be converted # all linear layers should be converted
@ -347,7 +358,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
# Ensure that if a SEQ_CLS model is being used with target_modules="all-linear", the classification head is not # Ensure that if a SEQ_CLS model is being used with target_modules="all-linear", the classification head is not
# targeted by the adapter layer. # targeted by the adapter layer.
model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM" model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM"
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=10) with hub_online_once(model_id):
model = AutoModelForSequenceClassification.from_pretrained(model_id, num_labels=10)
# sanity check # sanity check
assert isinstance(model.score, nn.Linear) assert isinstance(model.score, nn.Linear)
@ -372,7 +384,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
# See 2390 # See 2390
# Ensure that if adapter layers are already applied, we don't get nested adapter layers (e.g. LoRA targeting the # Ensure that if adapter layers are already applied, we don't get nested adapter layers (e.g. LoRA targeting the
# lora_A, lora_B layers) # lora_A, lora_B layers)
model = self.transformers_class_map[model_type].from_pretrained(model_id) with hub_online_once(model_id):
model = self.transformers_class_map[model_type].from_pretrained(model_id)
config_cls = LoraConfig config_cls = LoraConfig
self._check_match_with_expected_target_modules( self._check_match_with_expected_target_modules(
model_id, model, config_cls, initial_target_modules, expected_target_modules model_id, model, config_cls, initial_target_modules, expected_target_modules
@ -386,7 +399,8 @@ class PeftCustomKwargsTester(unittest.TestCase):
# See 2390 Similar test to test_all_linear_nested_targets_correct_layers above, but using add_adapter instead of # See 2390 Similar test to test_all_linear_nested_targets_correct_layers above, but using add_adapter instead of
# calling get_peft_model in an already adapted model # calling get_peft_model in an already adapted model
model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM" model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM"
model = AutoModelForCausalLM.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
# important: don't reuse the first config, since config.target_modules will be overwritten, which would make the # important: don't reuse the first config, since config.target_modules will be overwritten, which would make the
# test pass trivially. # test pass trivially.
@ -467,7 +481,9 @@ class TestTargetedModuleNames(unittest.TestCase):
assert model.targeted_module_names == ["lin0", "lin1"] assert model.targeted_module_names == ["lin0", "lin1"]
def test_realistic_example(self): def test_realistic_example(self):
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-BloomForCausalLM") model_id = "hf-internal-testing/tiny-random-BloomForCausalLM"
with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
config = LoraConfig(task_type="CAUSAL_LM") config = LoraConfig(task_type="CAUSAL_LM")
model = get_peft_model(model, config) model = get_peft_model(model, config)
expected = [ expected = [
@ -493,7 +509,9 @@ class TestTargetedParameterNames(unittest.TestCase):
assert model.targeted_parameter_names == ["lin0.weight", "lin1.weight"] assert model.targeted_parameter_names == ["lin0.weight", "lin1.weight"]
def test_realistic_example(self): def test_realistic_example(self):
model = AutoModelForCausalLM.from_pretrained("trl-internal-testing/tiny-random-LlamaForCausalLM") model_id = "trl-internal-testing/tiny-random-LlamaForCausalLM"
with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
config = LoraConfig(target_modules=[], task_type="CAUSAL_LM", target_parameters=["v_proj.weight"]) config = LoraConfig(target_modules=[], task_type="CAUSAL_LM", target_parameters=["v_proj.weight"])
model = get_peft_model(model, config) model = get_peft_model(model, config)
expected = [ expected = [
@ -564,7 +582,9 @@ class TestExcludedModuleNames(unittest.TestCase):
get_peft_model(model, LoraConfig(target_modules=["lin1"], exclude_modules=["non_existent_module"])) get_peft_model(model, LoraConfig(target_modules=["lin1"], exclude_modules=["non_existent_module"]))
def test_realistic_example(self): def test_realistic_example(self):
model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-BloomForCausalLM") model_id = "hf-internal-testing/tiny-random-BloomForCausalLM"
with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
config = LoraConfig(task_type="CAUSAL_LM", exclude_modules="transformer.h.2.self_attention.query_key_value") config = LoraConfig(task_type="CAUSAL_LM", exclude_modules="transformer.h.2.self_attention.query_key_value")
model = get_peft_model(model, config) model = get_peft_model(model, config)
expected = [ expected = [
@ -843,7 +863,8 @@ class TestModelAndLayerStatus:
def test_base_model_type_transformers_automodel(self): def test_base_model_type_transformers_automodel(self):
# ensure that this also works with transformers AutoModels # ensure that this also works with transformers AutoModels
model_id = "google/flan-t5-small" model_id = "google/flan-t5-small"
model = AutoModel.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModel.from_pretrained(model_id)
model = get_peft_model(model, LoraConfig()) model = get_peft_model(model, LoraConfig())
model_status = model.get_model_status() model_status = model.get_model_status()
assert model_status.base_model_type == "T5Model" assert model_status.base_model_type == "T5Model"
@ -1144,7 +1165,8 @@ class TestModelAndLayerStatus:
model_id = "peft-internal-testing/gpt2-lora-random" model_id = "peft-internal-testing/gpt2-lora-random"
# note that loading through AutoModelForCausalLM.from_pretrained does not enable training mode, hence # note that loading through AutoModelForCausalLM.from_pretrained does not enable training mode, hence
# requires_grad=False # requires_grad=False
model = AutoModelForCausalLM.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
model_status = get_model_status(model) model_status = get_model_status(model)
layer_status = get_layer_status(model) layer_status = get_layer_status(model)
@ -1215,7 +1237,9 @@ class TestModelAndLayerStatus:
get_model_status(model) get_model_status(model)
def test_transformer_model_without_adapter_raises(self): def test_transformer_model_without_adapter_raises(self):
model = AutoModelForCausalLM.from_pretrained("gpt2") model_id = "gpt2"
with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
# note: full error message is longer # note: full error message is longer
with pytest.raises(ValueError, match="No adapter layers found in the model"): with pytest.raises(ValueError, match="No adapter layers found in the model"):
get_layer_status(model) get_layer_status(model)
@ -1224,7 +1248,9 @@ class TestModelAndLayerStatus:
get_model_status(model) get_model_status(model)
def test_prefix_tuning(self): def test_prefix_tuning(self):
model = AutoModelForSeq2SeqLM.from_pretrained("hf-internal-testing/tiny-random-BartForConditionalGeneration") model_id = "hf-internal-testing/tiny-random-BartForConditionalGeneration"
with hub_online_once(model_id):
model = AutoModelForSeq2SeqLM.from_pretrained(model_id)
config = PromptTuningConfig(task_type="SEQ_2_SEQ_LM", num_virtual_tokens=10) config = PromptTuningConfig(task_type="SEQ_2_SEQ_LM", num_virtual_tokens=10)
model = get_peft_model(model, config) model = get_peft_model(model, config)
@ -1236,7 +1262,9 @@ class TestModelAndLayerStatus:
model.get_model_status() model.get_model_status()
def test_adaption_prompt(self): def test_adaption_prompt(self):
model = AutoModelForCausalLM.from_pretrained("HuggingFaceH4/tiny-random-LlamaForCausalLM") model_id = "HuggingFaceH4/tiny-random-LlamaForCausalLM"
with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4) config = AdaptionPromptConfig(adapter_layers=1, adapter_len=4)
model = get_peft_model(model, config) model = get_peft_model(model, config)
@ -1322,8 +1350,10 @@ class TestBaseTunerWarnForTiedEmbeddings:
) )
def _get_peft_model(self, tie_word_embeddings, target_module): def _get_peft_model(self, tie_word_embeddings, target_module):
with hub_online_once(self.model_id):
base_model = AutoModelForCausalLM.from_pretrained(self.model_id, tie_word_embeddings=tie_word_embeddings)
model = get_peft_model( model = get_peft_model(
AutoModelForCausalLM.from_pretrained(self.model_id, tie_word_embeddings=tie_word_embeddings), base_model,
LoraConfig(target_modules=[target_module]), LoraConfig(target_modules=[target_module]),
) )
return model return model
@ -1454,7 +1484,8 @@ class TestFindMinimalTargetModules:
# target_modules is big enough. The resulting model itself should be unaffected. # target_modules is big enough. The resulting model itself should be unaffected.
torch.manual_seed(0) torch.manual_seed(0)
model_id = "facebook/opt-125m" # must be big enough for optimization to trigger model_id = "facebook/opt-125m" # must be big enough for optimization to trigger
model = AutoModelForCausalLM.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
# base case: specify target_modules in a minimal fashion # base case: specify target_modules in a minimal fashion
config = LoraConfig(init_lora_weights=False, target_modules=["q_proj", "v_proj"]) config = LoraConfig(init_lora_weights=False, target_modules=["q_proj", "v_proj"])
@ -1475,7 +1506,8 @@ class TestFindMinimalTargetModules:
del model del model
torch.manual_seed(0) torch.manual_seed(0)
model = AutoModelForCausalLM.from_pretrained(model_id) with hub_online_once(model_id):
model = AutoModelForCausalLM.from_pretrained(model_id)
# pass the big target_modules to config # pass the big target_modules to config
config = LoraConfig(init_lora_weights=False, target_modules=big_target_modules) config = LoraConfig(init_lora_weights=False, target_modules=big_target_modules)
model = get_peft_model(model, config) model = get_peft_model(model, config)