mirror of
https://github.com/huggingface/peft.git
synced 2025-10-20 15:33:48 +08:00
Compare commits
2 Commits
b774fd901e
...
f6b0a2dd43
Author | SHA1 | Date | |
---|---|---|---|
f6b0a2dd43 | |||
f1b83646a6 |
@ -143,5 +143,7 @@
|
||||
title: Helpers
|
||||
- local: package_reference/hotswap
|
||||
title: Hotswapping adapters
|
||||
- local: package_reference/functional
|
||||
title: Functions for PEFT integration
|
||||
title: Utilities
|
||||
title: API reference
|
||||
|
33
docs/source/package_reference/functional.md
Normal file
33
docs/source/package_reference/functional.md
Normal file
@ -0,0 +1,33 @@
|
||||
<!--⚠️ Note that this file is in Markdown but contain specific syntax for our doc-builder (similar to MDX) that may not be
|
||||
rendered properly in your Markdown viewer.
|
||||
-->
|
||||
|
||||
# Functions for PEFT integration
|
||||
|
||||
A collection of functions that could be useful for non-PeftModel models, e.g. transformers or diffusers integration
|
||||
|
||||
The functions provided here can be considered "public API" of PEFT and hence are safe to be used by packages that provide PEFT integrations.
|
||||
|
||||
## Cast the adapter weight dtypes
|
||||
[[autodoc]] functional.cast_adapter_dtype
|
||||
- all
|
||||
|
||||
## Delete the PEFT adapter from model
|
||||
[[autodoc]] functional.delete_adapter
|
||||
- all
|
||||
|
||||
## Get the state dict of the PEFT adapter
|
||||
[[autodoc]] functional.get_peft_model_state_dict
|
||||
- all
|
||||
|
||||
## Inject a PEFT adapter into the model based on a PEFT config
|
||||
[[autodoc]] functional.inject_adapter_in_model
|
||||
- all
|
||||
|
||||
## Set the active PEFT adapter(s) of the model
|
||||
[[autodoc]] functional.set_adapter
|
||||
- all
|
||||
|
||||
## Load the weights of the PEFT state dict into the model
|
||||
[[autodoc]] functional.set_peft_model_state_dict
|
||||
- all
|
33
src/peft/functional.py
Normal file
33
src/peft/functional.py
Normal file
@ -0,0 +1,33 @@
|
||||
# Copyright 2025-present the HuggingFace Inc. team.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
"""
|
||||
Functions that are useful for integration with non-PeftModel models, e.g. transformers or diffusers.
|
||||
|
||||
The functions provided here can be considered "public API" of PEFT and hence are safe to be used by packages that
|
||||
provide PEFT integrations.
|
||||
"""
|
||||
|
||||
from peft.mapping import inject_adapter_in_model
|
||||
from peft.tuners.tuners_utils import cast_adapter_dtype, delete_adapter, set_adapter
|
||||
from peft.utils import get_peft_model_state_dict, set_peft_model_state_dict
|
||||
|
||||
|
||||
__all__ = [
|
||||
"cast_adapter_dtype",
|
||||
"delete_adapter",
|
||||
"get_peft_model_state_dict",
|
||||
"inject_adapter_in_model",
|
||||
"set_adapter",
|
||||
"set_peft_model_state_dict",
|
||||
]
|
@ -52,13 +52,16 @@ def inject_adapter_in_model(
|
||||
state_dict: Optional[dict[str, torch.Tensor]] = None,
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
A simple API to create and inject adapter in-place into a model. Currently the API does not support prompt learning
|
||||
methods and adaption prompt. Make sure to have the correct `target_names` set in the `peft_config` object. The API
|
||||
calls `get_peft_model` under the hood but would be restricted only to non-prompt learning methods.
|
||||
Create PEFT layers and inject them into the model in-place.
|
||||
|
||||
Currently the API does not support prompt learning methods and adaption prompt.
|
||||
|
||||
This function is similar to [`get_peft_model`] but it does not return a [`PeftModel`] instance. Instead, it returns
|
||||
the original, mutated instance of the passed model.
|
||||
|
||||
Args:
|
||||
peft_config (`PeftConfig`):
|
||||
Configuration object containing the parameters of the Peft model.
|
||||
Configuration object containing the parameters of the PEFT model.
|
||||
model (`torch.nn.Module`):
|
||||
The input model where the adapter will be injected.
|
||||
adapter_name (`str`, `optional`, defaults to `"default"`):
|
||||
@ -66,9 +69,9 @@ def inject_adapter_in_model(
|
||||
low_cpu_mem_usage (`bool`, `optional`, defaults to `False`):
|
||||
Create empty adapter weights on meta device. Useful to speed up the loading process.
|
||||
state_dict (`dict`, *optional*, defaults to `None`)
|
||||
If a state_dict is passed here, the adapters will be injected based on the entries of the state_dict. This
|
||||
can be useful when the exact `target_modules` of the PEFT method is unknown, for instance because the
|
||||
checkpoint was created without meta data. Note that the values from the state_dict are not used, only the
|
||||
If a `state_dict` is passed here, the adapters will be injected based on the entries of the state_dict.
|
||||
This can be useful when the exact `target_modules` of the PEFT method is unknown, for instance because the
|
||||
checkpoint was created without meta data. Note that the values from the `state_dict` are not used, only the
|
||||
keys are used to determine the correct layers that should be adapted.
|
||||
"""
|
||||
if peft_config.is_prompt_learning or peft_config.is_adaption_prompt:
|
||||
|
@ -63,7 +63,8 @@ class AdaLoraModel(LoraModel):
|
||||
- **peft_config** ([`AdaLoraConfig`]): The configuration of the AdaLora model.
|
||||
"""
|
||||
|
||||
# Note: don't redefine prefix here, it should be inherited from LoraModel
|
||||
# Note: don't redefine prefix or tuner_layer_cls here, it should be inherited from LoraModel
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING
|
||||
|
||||
def __init__(self, model, config, adapter_name, **kwargs):
|
||||
super().__init__(model, config, adapter_name, **kwargs)
|
||||
@ -221,25 +222,6 @@ class AdaLoraModel(LoraModel):
|
||||
|
||||
return new_module
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING[
|
||||
model_config["model_type"]
|
||||
]
|
||||
return peft_config
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def forward(self, *args, **kwargs):
|
||||
outputs = self.model.forward(*args, **kwargs)
|
||||
|
||||
|
@ -16,27 +16,15 @@
|
||||
# via Butterfly Factorization" (https://huggingface.co/papers/2311.06243) in ICLR 2024.
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners.tuners_utils import (
|
||||
BaseTuner,
|
||||
BaseTunerLayer,
|
||||
check_target_module_exists,
|
||||
onload_layer,
|
||||
)
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING
|
||||
|
||||
from .config import BOFTConfig
|
||||
from .layer import BOFTLayer, Conv2d, Linear
|
||||
|
||||
|
||||
@ -73,25 +61,8 @@ class BOFTModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "boft_"
|
||||
|
||||
def _check_new_adapter_config(self, config: BOFTConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(boft_config, key):
|
||||
return check_target_module_exists(boft_config, key)
|
||||
tuner_layer_cls = BOFTLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -134,55 +105,6 @@ class BOFTModel(BaseTuner):
|
||||
init_weights=boft_config.init_weights,
|
||||
)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "boft_only":
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, BOFTLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(boft_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -207,146 +129,3 @@ class BOFTModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, BOFTLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
if merge:
|
||||
self._check_merge_allowed()
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
with onload_layer(target):
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
new_module = target.modules_to_save[target.active_adapter]
|
||||
if hasattr(new_module, "base_layer"):
|
||||
# check if the module is itself a tuner layer
|
||||
if merge:
|
||||
new_module.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
new_module = new_module.get_base_layer()
|
||||
setattr(parent, target_name, new_module)
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, BOFTLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the BOFT layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the boft modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -96,7 +96,7 @@ class BoneConfig(PeftConfig):
|
||||
"help": "The layer pattern name, used only if `layers_to_transform` is different to None and if the layer pattern is not in the common layers pattern."
|
||||
},
|
||||
)
|
||||
bias: str = field(default="none", metadata={"help": "Bias type for Bone. Can be 'none', 'all' or 'Bone_only'"})
|
||||
bias: str = field(default="none", metadata={"help": "Bias type for Bone. Can be 'none', 'all' or 'bone_only'"})
|
||||
modules_to_save: Optional[list[str]] = field(
|
||||
default=None,
|
||||
metadata={
|
||||
|
@ -12,23 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING
|
||||
|
||||
from .config import BoneConfig
|
||||
from .layer import BoneLayer, BoneLinear
|
||||
|
||||
|
||||
@ -83,25 +72,8 @@ class BoneModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "bone_"
|
||||
|
||||
def _check_new_adapter_config(self, config: BoneConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(bone_config, key):
|
||||
return check_target_module_exists(bone_config, key)
|
||||
tuner_layer_cls = BoneLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -137,55 +109,6 @@ class BoneModel(BaseTuner):
|
||||
init_weights=bone_config.init_weights,
|
||||
)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "bone_only":
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, BoneLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(bone_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -201,138 +124,3 @@ class BoneModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "base_model":
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, BoneLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
self._unloading_checks(adapter_names)
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, BoneLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the Bone layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the bone modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -14,23 +14,15 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
|
||||
from .config import C3AConfig
|
||||
from .layer import C3ALayer, C3ALinear
|
||||
|
||||
|
||||
@ -54,25 +46,8 @@ class C3AModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "c3a_"
|
||||
|
||||
def _check_new_adapter_config(self, config: C3AConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(c3a_config, key):
|
||||
return check_target_module_exists(c3a_config, key)
|
||||
tuner_layer_cls = C3ALayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -109,55 +84,6 @@ class C3AModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: torch.nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "c3a_only":
|
||||
for m in model.modules():
|
||||
if isinstance(m, C3ALayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(c3a_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -169,127 +95,3 @@ class C3AModel(BaseTuner):
|
||||
new_module = C3ALinear(target, adapter_name, **kwargs)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, C3ALayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the C3A layers into the base model. This is needed if someone wants to use the base model as
|
||||
a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`list[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the C3A modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -15,23 +15,16 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from itertools import chain
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from tqdm import tqdm
|
||||
from transformers.pytorch_utils import Conv1D
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
|
||||
from .config import FourierFTConfig
|
||||
from .layer import FourierFTLayer, FourierFTLinear
|
||||
|
||||
|
||||
@ -57,25 +50,8 @@ class FourierFTModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "fourierft_"
|
||||
|
||||
def _check_new_adapter_config(self, config: FourierFTConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(fourierft_config, key):
|
||||
return check_target_module_exists(fourierft_config, key)
|
||||
tuner_layer_cls = FourierFTLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -120,55 +96,6 @@ class FourierFTModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if "fourierft_" in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: torch.nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "fourier_only":
|
||||
for m in model.modules():
|
||||
if isinstance(m, FourierFTLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(fourierft_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -199,153 +126,3 @@ class FourierFTModel(BaseTuner):
|
||||
new_module = FourierFTLinear(target, adapter_name, **kwargs)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model":
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, FourierFTLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str):
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
# we cannot use self.prefix as we want to include non-trainable fourierft parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "fourierft" not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, FourierFTLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapter[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the Fourier layers into the base model. This is needed if someone wants to use the base
|
||||
model as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the Fourier modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -12,23 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING
|
||||
|
||||
from .config import HRAConfig
|
||||
from .layer import HRAConv2d, HRALayer, HRALinear
|
||||
|
||||
|
||||
@ -83,25 +72,8 @@ class HRAModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "hra_"
|
||||
|
||||
def _check_new_adapter_config(self, config: HRAConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(hra_config, key):
|
||||
return check_target_module_exists(hra_config, key)
|
||||
tuner_layer_cls = HRALayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -139,55 +111,6 @@ class HRAModel(BaseTuner):
|
||||
init_weights=hra_config.init_weights,
|
||||
)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "hra_only":
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, HRALayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(hra_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -206,138 +129,3 @@ class HRAModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "base_model":
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, HRALayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
self._unloading_checks(adapter_names)
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, HRALayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the HRA layers into the base model. This is needed if someone wants to use the base model as
|
||||
a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the hra modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -15,16 +15,13 @@ from __future__ import annotations
|
||||
|
||||
import re
|
||||
import warnings
|
||||
from dataclasses import asdict, replace
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
from dataclasses import replace
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from transformers.pytorch_utils import Conv1D
|
||||
|
||||
from peft.import_utils import is_bnb_4bit_available, is_bnb_available
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING,
|
||||
@ -74,6 +71,7 @@ class IA3Model(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "ia3_"
|
||||
tuner_layer_cls = IA3Layer
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(ia3_config, adapter_name, target, **kwargs):
|
||||
@ -143,15 +141,6 @@ class IA3Model(BaseTuner):
|
||||
)
|
||||
return new_module
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(ia3_config, key):
|
||||
return check_target_module_exists(ia3_config, key)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
ia3_config,
|
||||
@ -196,88 +185,6 @@ class IA3Model(BaseTuner):
|
||||
is_feedforward = any(key.endswith(target_key) for target_key in ia3_config.feedforward_modules)
|
||||
return is_feedforward
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
# layers with base_layer don't need the weight to be copied, as they have a reference already
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (IA3Layer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, IA3Layer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
@ -294,9 +201,7 @@ class IA3Model(BaseTuner):
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self, merge: bool = True, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
):
|
||||
def _unload_and_optionally_merge(self, *args, **kwargs):
|
||||
r"""
|
||||
This method merges the (IA)^3 layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
@ -316,86 +221,7 @@ class IA3Model(BaseTuner):
|
||||
if getattr(self.model, "is_loaded_in_4bit", False):
|
||||
raise ValueError("Cannot merge ia3 layers when the model is loaded in 4-bit mode")
|
||||
|
||||
self._unloading_checks(adapter_names)
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
for key in key_list:
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
new_module = target.modules_to_save[target.active_adapter]
|
||||
if hasattr(new_module, "base_layer"):
|
||||
# check if the module is itself a tuner layer
|
||||
if merge:
|
||||
new_module.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
new_module = new_module.get_base_layer()
|
||||
setattr(parent, target_name, new_module)
|
||||
|
||||
return self.model
|
||||
|
||||
def merge_and_unload(self, safe_merge: bool = False, adapter_names: Optional[list[str]] = None) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the IA³ layers into the base model. This is needed if someone wants to use the base model as
|
||||
a standalone model.
|
||||
|
||||
Args:
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import PeftModel
|
||||
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
|
||||
>>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
|
||||
>>> model = PeftModel.from_pretrained(base_model, peft_model_id)
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the IA³ modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in self.peft_config:
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, IA3Layer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
return super()._unload_and_optionally_merge(*args, **kwargs)
|
||||
|
||||
def _check_add_weighted_adapter(self, adapters: list[str]) -> tuple[str, str]:
|
||||
"""
|
||||
|
@ -13,16 +13,14 @@
|
||||
# limitations under the License.
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
from torch import nn
|
||||
from torch.nn.modules import Module
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.config import PeftConfig
|
||||
from peft.tuners.tuners_utils import BaseTuner, _get_submodules, check_target_module_exists
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING, ModulesToSaveWrapper
|
||||
from peft.tuners.tuners_utils import BaseTuner, _get_submodules
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING
|
||||
|
||||
from .layer import LNTuningLayer
|
||||
|
||||
@ -64,26 +62,8 @@ class LNTuningModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "ln_tuning_"
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
# TODO: here need to handle the modules_to_save rather than the target_modules
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config: PeftConfig, model_config: dict) -> PeftConfig:
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
tuner_layer_cls = LNTuningLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -113,59 +93,13 @@ class LNTuningModel(BaseTuner):
|
||||
new_module.update_layer(target.base_layer, adapter_name)
|
||||
return new_module
|
||||
|
||||
def _replace_module(self, parent: Module, child_name: str, new_module: Module, child: Module) -> None:
|
||||
setattr(parent, child_name, new_module)
|
||||
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
for name, module in new_module.named_modules():
|
||||
weight = child.qweight if hasattr(child, "qweight") else child.weight
|
||||
module.to(weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: Module):
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
def _check_target_module_exists(self, peft_config: PeftConfig, key: str) -> bool:
|
||||
return check_target_module_exists(peft_config, key)
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (LNTuningLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str, inference_mode: bool = False) -> None:
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, LNTuningLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
def _unloading_checks(self, adapter_names: Optional[list[str]]):
|
||||
adapters_to_consider = adapter_names or self.active_adapters
|
||||
is_modules_to_save_available = any(
|
||||
self.peft_config[adapter].modules_to_save for adapter in adapters_to_consider
|
||||
)
|
||||
if is_modules_to_save_available and len(adapters_to_consider) > 1:
|
||||
raise ValueError("Cannot unload multiple adapters that specify `modules_to_save`.")
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
@ -191,14 +125,6 @@ class LNTuningModel(BaseTuner):
|
||||
|
||||
return self.model
|
||||
|
||||
def unload(self):
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> nn.Module:
|
||||
return self._unload_and_optionally_merge(merge=True)
|
||||
|
||||
def _cast_adapter_dtype(self, adapter_name: str, autocast_adapter_dtype: bool = True) -> None:
|
||||
# Note: LN Tuning does not add adapter layers, instead it creates copies of the original layer. For this reason,
|
||||
# we need to skip adapter autocasting, otherwise we would change the dtype of copies of the original layer,
|
||||
|
@ -83,6 +83,8 @@ class LoHaModel(LycorisTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "hada_"
|
||||
tuner_layer_cls = LoHaLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING
|
||||
layers_mapping: dict[type[torch.nn.Module], type[LoHaLayer]] = {
|
||||
torch.nn.Conv2d: Conv2d,
|
||||
torch.nn.Conv1d: Conv1d,
|
||||
@ -112,13 +114,3 @@ class LoHaModel(LycorisTuner):
|
||||
else:
|
||||
new_module = self._create_new_module(config, adapter_name, target, **kwargs)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
@ -84,6 +84,8 @@ class LoKrModel(LycorisTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "lokr_"
|
||||
tuner_layer_cls = LoKrLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING
|
||||
layers_mapping: dict[type[torch.nn.Module], type[LoKrLayer]] = {
|
||||
torch.nn.Conv2d: Conv2d,
|
||||
torch.nn.Conv1d: Conv1d,
|
||||
@ -114,13 +116,3 @@ class LoKrModel(LycorisTuner):
|
||||
else:
|
||||
new_module = self._create_new_module(config, adapter_name, target, **kwargs)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
@ -17,21 +17,17 @@ import math
|
||||
import operator
|
||||
import warnings
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import asdict, replace
|
||||
from enum import Enum
|
||||
from dataclasses import replace
|
||||
from functools import partial, reduce
|
||||
from typing import Literal, Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.import_utils import is_bnb_4bit_available, is_bnb_available
|
||||
from peft.tuners.tuners_utils import (
|
||||
BaseTuner,
|
||||
BaseTunerLayer,
|
||||
check_target_module_exists,
|
||||
onload_layer,
|
||||
replicate_layers,
|
||||
)
|
||||
from peft.utils import (
|
||||
@ -143,25 +139,8 @@ class LoraModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "lora_"
|
||||
|
||||
def _check_new_adapter_config(self, config: LoraConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(lora_config, key):
|
||||
return check_target_module_exists(lora_config, key)
|
||||
tuner_layer_cls = LoraLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING
|
||||
|
||||
def _prepare_model(self, peft_config: LoraConfig, model: nn.Module):
|
||||
r"""
|
||||
@ -273,6 +252,8 @@ class LoraModel(BaseTuner):
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
# override in LoraModel to handle quantized weights properly
|
||||
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
@ -298,27 +279,6 @@ class LoraModel(BaseTuner):
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "lora_only":
|
||||
for m in model.modules():
|
||||
if isinstance(m, LoraLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(lora_config, adapter_name, target, **kwargs):
|
||||
# Collect dispatcher functions to decide what backend to use for the replaced LoRA layer. The order matters,
|
||||
@ -386,69 +346,6 @@ class LoraModel(BaseTuner):
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, AuxiliaryTrainingWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s)
|
||||
|
||||
Args:
|
||||
adapter_name (str, list[str]):
|
||||
The name(s) of the adapter(s) to set as active
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, LoraLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@contextmanager
|
||||
def _enable_peft_forward_hooks(self, *args, **kwargs):
|
||||
# If adapter_names is passed as an argument, we inject it into the forward arguments.
|
||||
@ -532,48 +429,14 @@ class LoraModel(BaseTuner):
|
||||
if self.peft_config.get("layer_replication"):
|
||||
raise ValueError("Cannot merge LORA layers when base model layers are replicated")
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
def _prepare_adapter_config(self, peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
if model_config["model_type"] in self.target_module_mapping:
|
||||
peft_config.target_modules = set(self.target_module_mapping[model_config["model_type"]])
|
||||
elif not peft_config.target_parameters:
|
||||
raise ValueError("Please specify `target_modules` or `target_parameters`in `peft_config`")
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
if merge:
|
||||
self._check_merge_allowed()
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
with onload_layer(target):
|
||||
if hasattr(target, "unload_and_optionally_merge_module"):
|
||||
# if layers have special unloading method, like MultiheadAttention, use that
|
||||
unloaded_module = target.unload_and_optionally_merge_module(
|
||||
merge=merge, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
self._replace_module(parent, target_name, unloaded_module, target)
|
||||
elif hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
|
||||
return self.model
|
||||
|
||||
def _check_add_weighted_adapter(
|
||||
self, adapters: list[str], combination_type: str, svd_rank: int | None
|
||||
) -> tuple[str, int, str]:
|
||||
@ -901,68 +764,6 @@ class LoraModel(BaseTuner):
|
||||
lora_deltas = [delta.to(dtype) for delta in lora_deltas]
|
||||
return lora_deltas
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, LoraLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the LoRa layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import PeftModel
|
||||
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
|
||||
>>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
|
||||
>>> model = PeftModel.from_pretrained(base_model, peft_model_id)
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the lora modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def subtract_mutated_init(self, output_state_dict: dict[str, torch.Tensor], adapter_name: str, kwargs=None):
|
||||
"""
|
||||
This function can calculate the updates of the PiSSA/CorDA/OLoRA by comparing the parameters of the
|
||||
|
@ -20,15 +20,10 @@ from typing import Any, Optional, Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.config import PeftConfig
|
||||
from peft.utils import (
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
|
||||
from .tuners_utils import BaseTuner, BaseTunerLayer, check_adapters_to_merge, check_target_module_exists
|
||||
from .tuners_utils import BaseTuner, BaseTunerLayer, check_adapters_to_merge
|
||||
|
||||
|
||||
@dataclass
|
||||
@ -201,21 +196,9 @@ class LycorisTuner(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str
|
||||
tuner_layer_cls = LycorisLayer
|
||||
layers_mapping: dict[type[torch.nn.Module], type[LycorisLayer]]
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(config, key):
|
||||
return check_target_module_exists(config, key)
|
||||
|
||||
@abstractmethod
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -269,163 +252,3 @@ class LycorisTuner(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
return peft_config
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge: bool = True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
if merge:
|
||||
if getattr(self.model, "quantization_method", None) == "gptq":
|
||||
raise ValueError("Cannot merge LOHA layers when the model is gptq quantized")
|
||||
|
||||
self._unloading_checks(adapter_names)
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
new_module = target.modules_to_save[target.active_adapter]
|
||||
if hasattr(new_module, "base_layer"):
|
||||
# check if the module is itself a tuner layer
|
||||
if merge:
|
||||
new_module.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
new_module = new_module.get_base_layer()
|
||||
setattr(parent, target_name, new_module)
|
||||
|
||||
return self.model
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the adapter layers into the base model. This is needed if someone wants to use the base
|
||||
model as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the lora modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, LycorisLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (`str`): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, LycorisLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
@ -12,23 +12,12 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING
|
||||
|
||||
from .config import MissConfig
|
||||
from .layer import MissLayer, MissLinear
|
||||
|
||||
|
||||
@ -83,25 +72,8 @@ class MissModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "miss_"
|
||||
|
||||
def _check_new_adapter_config(self, config: MissConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(miss_config, key):
|
||||
return check_target_module_exists(miss_config, key)
|
||||
tuner_layer_cls = MissLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -141,55 +113,6 @@ class MissModel(BaseTuner):
|
||||
mini_r=miss_config.mini_r,
|
||||
)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "miss_only":
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, MissLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(miss_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -205,138 +128,3 @@ class MissModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "base_model":
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, MissLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
self._unloading_checks(adapter_names)
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, MissLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the MiSS layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the miss modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -20,7 +20,7 @@ from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners import adalora, loha, lokr, lora, oft, shira
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, _delete_auxiliary_adapter
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
@ -83,17 +83,7 @@ class MixedModel(BaseTuner):
|
||||
f"{self.__class__.__name__} only supports {COMPATIBLE_TUNER_TYPES} configs, but got {type(config)}."
|
||||
)
|
||||
|
||||
biases = (getattr(config, "bias", None) for config in self.peft_config)
|
||||
biases = [bias for bias in biases if bias not in (None, "none")]
|
||||
if len(biases) > 1:
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(config: Configs, key: str):
|
||||
return check_target_module_exists(config, key)
|
||||
super()._check_new_adapter_config(config)
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -197,34 +187,6 @@ class MixedModel(BaseTuner):
|
||||
raise ValueError(f"Unknown config type {type(config)}, should be one of {COMPATIBLE_TUNER_TYPES}.")
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = getattr(self.peft_config[active_adapter], "bias", "none")
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: Union[str, list[str]], inference_mode: bool = False) -> None:
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
@ -328,35 +290,7 @@ class MixedModel(BaseTuner):
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> nn.Module:
|
||||
r"""
|
||||
This method merges the layers into the base model. This is needed if someone wants to use the base model as a
|
||||
standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the lora modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
_delete_auxiliary_adapter(self.model, adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def generate(self, *args: Any, **kwargs: Any):
|
||||
return self.model.generate(*args, **kwargs)
|
||||
|
@ -12,32 +12,18 @@
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.import_utils import is_bnb_4bit_available, is_bnb_available
|
||||
from peft.tuners.tuners_utils import (
|
||||
BaseTuner,
|
||||
BaseTunerLayer,
|
||||
check_target_module_exists,
|
||||
onload_layer,
|
||||
)
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING,
|
||||
get_quantization_config,
|
||||
)
|
||||
|
||||
from .aqlm import dispatch_aqlm
|
||||
from .awq import dispatch_awq
|
||||
from .config import OFTConfig
|
||||
from .eetq import dispatch_eetq
|
||||
from .gptq import dispatch_gptq
|
||||
from .hqq import dispatch_hqq
|
||||
@ -98,25 +84,8 @@ class OFTModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "oft_"
|
||||
|
||||
def _check_new_adapter_config(self, config: OFTConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(oft_config, key):
|
||||
return check_target_module_exists(oft_config, key)
|
||||
tuner_layer_cls = OFTLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -174,53 +143,6 @@ class OFTModel(BaseTuner):
|
||||
init_weights=oft_config.init_weights,
|
||||
)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if (self.prefix in name) or ("ranknum" in name):
|
||||
if hasattr(child, "qweight"):
|
||||
weight = child.qweight
|
||||
elif hasattr(child, "W_q"):
|
||||
weight = child.W_q
|
||||
elif hasattr(child, "weight"):
|
||||
weight = child.weight
|
||||
elif getattr(child, "in_proj_weight", None) is not None: # MHA
|
||||
weight = child.in_proj_weight
|
||||
else:
|
||||
weight = next(child.parameters())
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "oft_only":
|
||||
for name, m in model.named_modules():
|
||||
if isinstance(m, OFTLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(oft_config, adapter_name, target, **kwargs):
|
||||
# Collect dispatcher functions to decide what backend to use for the replaced OFT layer. The order matters,
|
||||
@ -265,67 +187,6 @@ class OFTModel(BaseTuner):
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Additionally, this function will set the specified adapters to trainable (i.e., requires_grad=True). If this is
|
||||
not desired, use the following code.
|
||||
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, OFTLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
def _check_merge_allowed(self):
|
||||
"""Verify that the configuration supports merging.
|
||||
|
||||
@ -336,96 +197,3 @@ class OFTModel(BaseTuner):
|
||||
raise ValueError("Cannot merge OFT layers when the model is gptq quantized")
|
||||
if self.peft_config.get("layer_replication"):
|
||||
raise ValueError("Cannot merge OFT layers when base model layers are replicated")
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
if merge:
|
||||
self._check_merge_allowed()
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
with onload_layer(target):
|
||||
if hasattr(target, "unload_and_optionally_merge_module"):
|
||||
# if layers have special unloading method, like MultiheadAttention, use that
|
||||
unloaded_module = target.unload_and_optionally_merge_module(
|
||||
merge=merge, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
self._replace_module(parent, target_name, unloaded_module, target)
|
||||
elif hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, OFTLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the OFT layers into the base model. This is needed if someone wants to use the base model as
|
||||
a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the oft modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -13,18 +13,13 @@
|
||||
# limitations under the License.
|
||||
|
||||
from contextlib import contextmanager
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Any
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
)
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING
|
||||
|
||||
from .config import PolyConfig
|
||||
from .layer import Linear, PolyLayer
|
||||
@ -32,10 +27,8 @@ from .layer import Linear, PolyLayer
|
||||
|
||||
class PolyModel(BaseTuner):
|
||||
prefix: str = "poly_"
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(poly_config, key):
|
||||
return check_target_module_exists(poly_config, key)
|
||||
tuner_layer_cls = PolyLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -59,38 +52,6 @@ class PolyModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if (self.prefix in name) or ("ranknum" in name):
|
||||
weight = child.qweight if hasattr(child, "qweight") else child.weight
|
||||
module.to(weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(poly_config, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -106,50 +67,6 @@ class PolyModel(BaseTuner):
|
||||
"`torch.nn.Linear`."
|
||||
)
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (PolyLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, PolyLayer):
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
|
||||
def _prepare_adapter_config(self, peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _register_pre_hooks(self, task_ids):
|
||||
"""Helper method to register pre hooks."""
|
||||
if task_ids is None:
|
||||
|
@ -16,22 +16,17 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional, Union
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from accelerate.utils.imports import is_bf16_available
|
||||
from tqdm import tqdm
|
||||
from transformers.pytorch_utils import Conv1D
|
||||
|
||||
from peft.import_utils import is_bnb_4bit_available, is_bnb_available
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
|
||||
from .._buffer_dict import BufferDict
|
||||
@ -100,6 +95,8 @@ class RandLoraModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "randlora_"
|
||||
tuner_layer_cls = RandLoraLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING
|
||||
|
||||
def _find_dim(self, config) -> tuple[int, int]:
|
||||
"""
|
||||
@ -224,14 +221,7 @@ class RandLoraModel(BaseTuner):
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# the below todo is copied from LoRA
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
super()._check_new_adapter_config(config)
|
||||
|
||||
for existing_config in self.peft_config.values():
|
||||
if existing_config is config:
|
||||
@ -251,10 +241,6 @@ class RandLoraModel(BaseTuner):
|
||||
f"{save_project_unique_values}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(randlora_config, key):
|
||||
return check_target_module_exists(randlora_config, key)
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
randlora_config,
|
||||
@ -299,56 +285,6 @@ class RandLoraModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
@staticmethod
|
||||
def _replace_module(parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if "randlora_" in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "randlora_only":
|
||||
for m in model.modules():
|
||||
if isinstance(m, RandLoraLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(randlora_config, randlora_A, randlora_B, adapter_name, target, **kwargs):
|
||||
# avoid eager bnb import
|
||||
@ -418,151 +354,3 @@ class RandLoraModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, RandLoraLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
# we cannot use self.prefix as we want to include non-trainable randlora parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "randlora" not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str):
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
# we cannot use self.prefix as we want to include non-trainable randlora parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "randlora" not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, RandLoraLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapter[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
):
|
||||
r"""
|
||||
This method merges the RandLora layers into the base model. This is needed if someone wants to use the base
|
||||
model as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`list[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import PeftModel
|
||||
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
|
||||
>>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
|
||||
>>> model = PeftModel.from_pretrained(base_model, peft_model_id)
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self):
|
||||
"""
|
||||
Gets back the base model by removing all the RandLora modules without merging. This gives back the original
|
||||
base model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -16,25 +16,15 @@ from __future__ import annotations
|
||||
import operator
|
||||
from contextlib import contextmanager
|
||||
from functools import partial
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.import_utils import is_bnb_4bit_available, is_bnb_available
|
||||
from peft.tuners.road.config import RoadConfig
|
||||
from peft.tuners.tuners_utils import (
|
||||
BaseTuner,
|
||||
BaseTunerLayer,
|
||||
check_target_module_exists,
|
||||
onload_layer,
|
||||
)
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING
|
||||
|
||||
from .layer import RoadLayer, dispatch_default
|
||||
|
||||
@ -49,20 +39,8 @@ class RoadModel(BaseTuner):
|
||||
""" """
|
||||
|
||||
prefix: str = "road_"
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(road_config: RoadConfig, model_config: dict) -> RoadConfig:
|
||||
if road_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
road_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return road_config
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(road_config, key):
|
||||
return check_target_module_exists(road_config, key)
|
||||
tuner_layer_cls = RoadLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -110,32 +88,6 @@ class RoadModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if (self.prefix in name) or ("ranknum" in name):
|
||||
if hasattr(child, "qweight"):
|
||||
weight = child.qweight
|
||||
elif hasattr(child, "W_q"):
|
||||
weight = child.W_q
|
||||
elif hasattr(child, "weight"):
|
||||
weight = child.weight
|
||||
elif getattr(child, "in_proj_weight", None) is not None: # MHA
|
||||
weight = child.in_proj_weight
|
||||
else:
|
||||
weight = next(child.parameters())
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(weight.device)
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(road_config: RoadConfig, adapter_name, target, **kwargs):
|
||||
dispatchers = []
|
||||
@ -172,46 +124,6 @@ class RoadModel(BaseTuner):
|
||||
|
||||
return new_module
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module):
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, RoadLayer):
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
@contextmanager
|
||||
def _enable_peft_forward_hooks(self, *args, **kwargs):
|
||||
# If adapter_names is passed as an argument, we inject it into the forward arguments.
|
||||
@ -249,89 +161,3 @@ class RoadModel(BaseTuner):
|
||||
|
||||
for handle in hook_handles:
|
||||
handle.remove()
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
if merge:
|
||||
self._check_merge_allowed()
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
with onload_layer(target):
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
new_module = target.modules_to_save[target.active_adapter]
|
||||
if hasattr(new_module, "base_layer"):
|
||||
# check if the module is itself a tuner layer
|
||||
if merge:
|
||||
new_module.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
new_module = new_module.get_base_layer()
|
||||
setattr(parent, target_name, new_module)
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, RoadLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the RoAd layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the road modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -15,22 +15,14 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
|
||||
from .config import ShiraConfig
|
||||
from .layer import Linear, ShiraLayer
|
||||
|
||||
|
||||
@ -63,22 +55,8 @@ class ShiraModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "shira_"
|
||||
|
||||
def _check_new_adapter_config(self, config: ShiraConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
for existing_config in self.peft_config.values():
|
||||
if existing_config is config:
|
||||
# skip the current config
|
||||
continue
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(shira_config, key):
|
||||
return check_target_module_exists(shira_config, key)
|
||||
tuner_layer_cls = ShiraLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
@ -121,40 +99,6 @@ class ShiraModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
@staticmethod
|
||||
def _replace_module(parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if "shira_" in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(shira_config, adapter_name, target, **kwargs):
|
||||
fan_in_fan_out = shira_config.fan_in_fan_out
|
||||
@ -196,143 +140,3 @@ class ShiraModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, ShiraLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
# we cannot use self.prefix as we want to include non-trainable shira parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "shira" not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str):
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
# we cannot use self.prefix as we want to include non-trainable shira parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "shira" not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, ShiraLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapter[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
):
|
||||
r"""
|
||||
This method merges the Shira layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`list[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import ShiraConfig, get_peft_model
|
||||
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained("facebook/opt-125m")
|
||||
>>> config = ShiraConfig(r=32)
|
||||
>>> model = get_peft_model(base_model, config)
|
||||
>>> ## [Train the adapter] ##
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self):
|
||||
"""
|
||||
Gets back the base model by removing all the Shira modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -14,29 +14,18 @@
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tqdm import tqdm
|
||||
|
||||
from peft.config import PeftConfig
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists, onload_layer
|
||||
from peft.utils import AuxiliaryTrainingWrapper, _get_input_embeddings_name, _get_submodules
|
||||
from peft.tuners.tuners_utils import BaseTuner
|
||||
from peft.utils import _get_input_embeddings_name, _get_submodules
|
||||
|
||||
from .layer import TrainableTokensLayer
|
||||
|
||||
|
||||
class TrainableTokensModel(BaseTuner):
|
||||
prefix: str = "trainable_tokens_"
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
return getattr(self.model, name)
|
||||
tuner_layer_cls = TrainableTokensLayer
|
||||
|
||||
def _prepare_adapter_config(self, peft_config, model_config):
|
||||
# target_modules can be none which prompts us to infer the embedding layer name ourselves.
|
||||
@ -137,9 +126,6 @@ class TrainableTokensModel(BaseTuner):
|
||||
kwargs = peft_config.to_dict()
|
||||
self._create_and_replace_dict(kwargs, adapter_name, target, target_name, parent, current_key)
|
||||
|
||||
def _check_target_module_exists(self, peft_config: PeftConfig, key: str) -> bool:
|
||||
return check_target_module_exists(peft_config, key)
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(peft_config, adapter_name, target, **kwargs):
|
||||
new_module = TrainableTokensLayer(target, adapter_name, **kwargs)
|
||||
@ -151,127 +137,3 @@ class TrainableTokensModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, AuxiliaryTrainingWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, TrainableTokensLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Gets back the base model by removing all the trainable tokens modules without merging.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the trained tokens into the targeted embedding layer(s) of the base model. This is needed if
|
||||
someone wants to use the base model as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
with onload_layer(target):
|
||||
if hasattr(target, "unload_and_optionally_merge_module"):
|
||||
# if layers have special unloading method, like MultiheadAttention, use that
|
||||
unloaded_module = target.unload_and_optionally_merge_module(
|
||||
merge=merge, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
self._replace_module(parent, target_name, unloaded_module, target)
|
||||
elif hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
|
||||
return self.model
|
||||
|
@ -27,6 +27,7 @@ import torch
|
||||
from accelerate.hooks import AlignDevicesHook
|
||||
from accelerate.utils import named_module_tensors, offload_state_dict
|
||||
from torch import nn
|
||||
from tqdm import tqdm
|
||||
from transformers import PreTrainedModel
|
||||
from transformers.pytorch_utils import Conv1D
|
||||
|
||||
@ -178,8 +179,21 @@ class BaseTuner(nn.Module, ABC):
|
||||
targeted_parameter_names (`list[str]`):
|
||||
The list of parameter names that were actually adapted. Can be useful to inspect if you want to quickly
|
||||
double-check that the `config.target_parameters` were specified correctly.
|
||||
prefix (`str`)
|
||||
The PEFT-method specific unique prefix. E.g. `"lora_"` for LoRA.
|
||||
"""
|
||||
|
||||
# Required attributes for child classes:
|
||||
|
||||
# The unique prefix for this PEFT method, e.g. 'lora_' for LoRA.
|
||||
prefix: str
|
||||
# The class of the tuner layer, e.g. `LoraLayer` for LoRA.
|
||||
tuner_layer_cls: type[BaseTunerLayer]
|
||||
# The default target modules for various transformers model architectures, like Llama. This is useful to allow users
|
||||
# to skip specifying the `target_modules` in the config of the PEFT method. The default is often something like
|
||||
# `{'llama': ['q_proj', 'v_proj'], ...}`.
|
||||
target_module_mapping: dict[str, list[str]]
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
model,
|
||||
@ -242,23 +256,33 @@ class BaseTuner(nn.Module, ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _prepare_adapter_config(self, peft_config: PeftConfig, model_config: dict) -> PeftConfig:
|
||||
r"""
|
||||
A private method to eventually prepare the adapter config. For transformers based models, if
|
||||
`peft_config.target_modules` is None, we can automatically infer the target modules from the
|
||||
`TRANSFORMERS_MODELS_TO_XXX_TARGET_MODULES_MAPPING`. This method can be further refactored in the future to
|
||||
automatically infer it for all tuner models.
|
||||
A private method to prepare the adapter config.
|
||||
|
||||
Check out `peft.tuner.lora.LoraModel._prepare_adapter_config` for an example.
|
||||
For transformers based models, if `peft_config.target_modules` is None, for some model architectures, we can
|
||||
automatically infer the target modules from the `TRANSFORMERS_MODELS_TO_XXX_TARGET_MODULES_MAPPING`.
|
||||
|
||||
Args:
|
||||
peft_config (`PeftConfig`):
|
||||
The adapter config.
|
||||
model_config (`dict`):
|
||||
The transformers model config, that config should contain the `model_type` key.
|
||||
|
||||
Returns:
|
||||
peft_config (`PeftConfig`):
|
||||
The PEFT config with updated `target_modules`.
|
||||
|
||||
Raises:
|
||||
ValueError:
|
||||
Raises an error if the model type was not recognized.
|
||||
"""
|
||||
...
|
||||
if peft_config.target_modules is None:
|
||||
target_modules = self.target_module_mapping.get(model_config["model_type"])
|
||||
if target_modules is None:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(target_modules)
|
||||
return peft_config
|
||||
|
||||
def _prepare_model(self, peft_config: PeftConfig, model: nn.Module):
|
||||
r"""
|
||||
@ -274,19 +298,23 @@ class BaseTuner(nn.Module, ABC):
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def _check_target_module_exists(peft_config: PeftConfig, key: str) -> bool:
|
||||
r"""
|
||||
A helper private method to check if the passed module's key name matches any of the target modules in the
|
||||
`peft_config.target_modules` list. If it does, return `True`, else return `False`.
|
||||
@staticmethod
|
||||
def _check_target_module_exists(peft_config: PeftConfig, key: str) -> bool | re.Match[str] | None:
|
||||
"""
|
||||
A helper method to check if the passed module's key name matches any of the target modules in the
|
||||
adapter_config.
|
||||
|
||||
Args:
|
||||
peft_config (`PeftConfig`):
|
||||
The adapter config.
|
||||
config (`PeftConfig`):
|
||||
A config to match target modules from.
|
||||
key (`str`):
|
||||
The module's key name.
|
||||
A key to search any matches in config.
|
||||
|
||||
Returns:
|
||||
`bool` | `re.Match[str]` | `None`:
|
||||
True or re.Match object if key matches any target modules from config, False or None if no match found.
|
||||
"""
|
||||
...
|
||||
return check_target_module_exists(peft_config, key)
|
||||
|
||||
@abstractmethod
|
||||
def _create_and_replace(
|
||||
@ -323,38 +351,99 @@ class BaseTuner(nn.Module, ABC):
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module):
|
||||
r"""
|
||||
A helper method to mark only the adapter layers as trainable (i.e. module.requires_grad = False) This needs to
|
||||
be overridden for all tuner classes to match the correct key names.
|
||||
|
||||
Check `peft.tuners.lora.LoraModel._mark_only_adapters_as_trainable` for an example.
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
"""
|
||||
...
|
||||
A helper method to mark only the adapter layers as trainable (i.e. module.requires_grad = False).
|
||||
"""
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = getattr(self.peft_config[active_adapter], "bias", "none")
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias.endswith("_only"): # e.g. "lora_only" or "boft_only"
|
||||
for m in model.modules():
|
||||
if isinstance(m, self.tuner_layer_cls) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, AuxiliaryTrainingWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
@abstractmethod
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""
|
||||
Disable all adapters in-place.
|
||||
"""
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
# TODO: deprecate in favor of enable_adapters
|
||||
for active_adapter in self.active_adapters:
|
||||
bias_val = getattr(self.peft_config[active_adapter], "bias", "none")
|
||||
if bias_val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{bias_val}' does not produce the "
|
||||
"same output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""
|
||||
Enable all adapters in-place
|
||||
"""
|
||||
...
|
||||
# TODO: deprecate in favor of enable_adapters
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
new_adapter = delete_adapter(
|
||||
model=self.model, adapter_name=adapter_name, prefix=self.prefix, layer_cls=self.tuner_layer_cls
|
||||
)
|
||||
self.active_adapter = new_adapter or []
|
||||
|
||||
def _check_new_adapter_config(self, config: PeftConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
A helper method to check the config of a new adapter being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
pass
|
||||
if len(self.peft_config) <= 1:
|
||||
return
|
||||
|
||||
# It is assumed that the config was added to self.peft_config *before* calling this check. We should thus never
|
||||
# encounter the error below. Still, it is better to verify this, or else subsequent checks could be incorrect.
|
||||
if not any(conf is config for conf in self.peft_config.values()):
|
||||
raise ValueError(
|
||||
"_check_new_peft_config was called incorrectly, this should not happen. Please open an issue and "
|
||||
"report the error: https://github.com/huggingface/peft/issues"
|
||||
)
|
||||
|
||||
bias_values = [getattr(conf, "bias", "none") for conf in self.peft_config.values()]
|
||||
if sum(bias_value != "none" for bias_value in bias_values) > 1:
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
def _cast_adapter_dtype(self, adapter_name: str, autocast_adapter_dtype: bool = True) -> None:
|
||||
"""
|
||||
@ -369,35 +458,7 @@ class BaseTuner(nn.Module, ABC):
|
||||
Whether to autocast the adapter dtype. Defaults to `True`.
|
||||
|
||||
"""
|
||||
if not autocast_adapter_dtype:
|
||||
return
|
||||
|
||||
dtypes_to_convert_to_fp32 = {torch.float16, torch.bfloat16}
|
||||
|
||||
for module in self.model.modules():
|
||||
if not isinstance(module, BaseTunerLayer):
|
||||
continue
|
||||
|
||||
for submodule in module.modules():
|
||||
if not isinstance(submodule, (nn.ModuleDict, nn.ParameterDict, BufferDict)):
|
||||
continue
|
||||
|
||||
if adapter_name not in submodule:
|
||||
continue
|
||||
|
||||
if isinstance(submodule[adapter_name], nn.Parameter):
|
||||
if submodule[adapter_name].dtype in dtypes_to_convert_to_fp32:
|
||||
submodule[adapter_name].data = submodule[adapter_name].data.to(torch.float32)
|
||||
continue
|
||||
|
||||
if isinstance(submodule[adapter_name], torch.Tensor): # e.g. from a BufferDict
|
||||
if submodule[adapter_name].dtype in dtypes_to_convert_to_fp32:
|
||||
submodule[adapter_name] = submodule[adapter_name].to(torch.float32)
|
||||
continue
|
||||
|
||||
for param in submodule[adapter_name].parameters():
|
||||
if param.dtype in dtypes_to_convert_to_fp32:
|
||||
param.data = param.data.to(torch.float32)
|
||||
cast_adapter_dtype(self.model, adapter_name=adapter_name, autocast_adapter_dtype=autocast_adapter_dtype)
|
||||
|
||||
def _check_merge_allowed(self):
|
||||
"""Helper method to check whether the adapter can be merged.
|
||||
@ -435,6 +496,83 @@ class BaseTuner(nn.Module, ABC):
|
||||
+ example_code
|
||||
)
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge: bool = True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
) -> None:
|
||||
if merge:
|
||||
self._check_merge_allowed()
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
with onload_layer(target):
|
||||
if hasattr(target, "unload_and_optionally_merge_module"):
|
||||
# if layers have special unloading method, like MultiheadAttention, use that
|
||||
unloaded_module = target.unload_and_optionally_merge_module(
|
||||
merge=merge, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
self._replace_module(parent, target_name, unloaded_module, target)
|
||||
elif hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
|
||||
return self.model
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the adapter layers into the base model.
|
||||
|
||||
This is needed if someone wants to use the base model as a standalone model. The returned model has the same
|
||||
architecture as the original base model.
|
||||
|
||||
It is important to assign the returned model to a variable and use it, this is not an in-place operation!
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process (default: False).
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights.
|
||||
adapter_names (`List[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import PeftModel
|
||||
|
||||
>>> model_id = ...
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained(model_id)
|
||||
>>> peft_model_id = ...
|
||||
>>> model = PeftModel.from_pretrained(base_model, peft_model_id)
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self) -> torch.nn.Module:
|
||||
"""
|
||||
Return the base model by removing all the PEFT modules.
|
||||
|
||||
It is important to assign the returned model to a variable and use it, this is not an in-place operation!
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def _check_target_module_compatiblity(self, peft_config: PeftConfig, model: nn.Module, target_name: str):
|
||||
"""
|
||||
Prevent applying LoRA to incompatible modules in specific architectures (e.g., Mamba).
|
||||
@ -530,10 +668,9 @@ class BaseTuner(nn.Module, ABC):
|
||||
and (len(peft_config.target_modules) >= MIN_TARGET_MODULES_FOR_OPTIMIZATION)
|
||||
and (peft_config.peft_type != PeftType.IA3)
|
||||
):
|
||||
suffixes = tuple("." + suffix for suffix in peft_config.target_modules)
|
||||
names_no_target = [
|
||||
name
|
||||
for name in key_list
|
||||
if not any((name == suffix) or name.endswith("." + suffix) for suffix in peft_config.target_modules)
|
||||
name for name in key_list if (name not in peft_config.target_modules) and not name.endswith(suffixes)
|
||||
]
|
||||
new_target_modules = _find_minimal_target_modules(peft_config.target_modules, names_no_target)
|
||||
if len(new_target_modules) < len(peft_config.target_modules):
|
||||
@ -543,10 +680,10 @@ class BaseTuner(nn.Module, ABC):
|
||||
# MATCHING & CREATING MODULES #
|
||||
###############################
|
||||
|
||||
existing_adapter_map = {}
|
||||
existing_adapter_prefixes = []
|
||||
for key, module in named_modules:
|
||||
if isinstance(module, BaseTunerLayer):
|
||||
existing_adapter_map[key] = module
|
||||
existing_adapter_prefixes.append(key + ".")
|
||||
|
||||
# TODO: check if this the most robust way
|
||||
module_names: set[str] = set()
|
||||
@ -560,8 +697,8 @@ class BaseTuner(nn.Module, ABC):
|
||||
|
||||
# It is possible that we're adding an additional adapter, so if we encounter a key that clearly belongs to a
|
||||
# previous adapter we can skip here since we don't want to interfere with adapter internals.
|
||||
for adapter_key in existing_adapter_map:
|
||||
if key.startswith(adapter_key + "."):
|
||||
for adapter_key in existing_adapter_prefixes:
|
||||
if key.startswith(adapter_key):
|
||||
excluded_modules.append(key)
|
||||
break
|
||||
|
||||
@ -801,6 +938,61 @@ class BaseTuner(nn.Module, ABC):
|
||||
create_and_replace_param(module_name, key, param_name)
|
||||
self.targeted_parameter_names.append(key)
|
||||
|
||||
def _replace_module(self, parent, child_name, new_module, child) -> None:
|
||||
"""
|
||||
Replace the sub-module of a given moduel with a new PEFT module.
|
||||
|
||||
This also deals with device placement of the new module to be in line with the child module.
|
||||
|
||||
Args:
|
||||
parent (`nn.Module`):
|
||||
The parent module on which the replacement should take place.
|
||||
child_name (`str`):
|
||||
The name of the child module to be replaced.
|
||||
new_module (`nn.Module`):
|
||||
The new PEFT module.
|
||||
child (`nn.Module`):
|
||||
The original child module that is being replaced.
|
||||
|
||||
"""
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if self.prefix in name:
|
||||
if hasattr(child, "qweight"):
|
||||
weight = child.qweight
|
||||
elif hasattr(child, "W_q"):
|
||||
weight = child.W_q
|
||||
elif hasattr(child, "weight"):
|
||||
weight = child.weight
|
||||
elif getattr(child, "in_proj_weight", None) is not None: # MHA
|
||||
weight = child.in_proj_weight
|
||||
else:
|
||||
weight = next(child.parameters())
|
||||
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(weight.device)
|
||||
|
||||
def merge_adapter(self, adapter_names: Optional[list[str]] = None, safe_merge: bool = False) -> None:
|
||||
"""
|
||||
This method merges the adapter layers into the base model.
|
||||
@ -854,18 +1046,19 @@ class BaseTuner(nn.Module, ABC):
|
||||
"""
|
||||
_set_adapter(self, adapter_name, inference_mode=inference_mode)
|
||||
|
||||
def _delete_auxiliary_adapter(self, adapter_name: str, new_active_adapters: Optional[list[str]]) -> None:
|
||||
for module in self.modules():
|
||||
if isinstance(module, AuxiliaryTrainingWrapper):
|
||||
module.delete_adapter(adapter_name, new_active_adapters=new_active_adapters)
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
def _unloading_checks(self, adapter_names: Optional[list[str]]):
|
||||
adapters_to_consider = adapter_names or self.active_adapters
|
||||
is_modules_to_save_available = any(
|
||||
self.peft_config[adapter].modules_to_save for adapter in adapters_to_consider
|
||||
Args:
|
||||
adapter_name (str, list[str]):
|
||||
The name(s) of the adapter(s) to set as active
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
set_adapter(
|
||||
self.model, adapter_name=adapter_name, inference_mode=inference_mode, layer_cls=self.tuner_layer_cls
|
||||
)
|
||||
if is_modules_to_save_available and len(adapters_to_consider) > 1:
|
||||
raise ValueError("Cannot unload multiple adapters that specify `modules_to_save`.")
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def get_model_config(model: nn.Module) -> dict:
|
||||
@ -898,6 +1091,15 @@ class BaseTuner(nn.Module, ABC):
|
||||
tied_target_modules.append(target_module)
|
||||
return tied_target_modules
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
|
||||
class BaseTunerLayer(ABC):
|
||||
r"""
|
||||
@ -1246,12 +1448,14 @@ def check_target_module_exists(config, key: str) -> bool | re.Match[str] | None:
|
||||
"""A helper method to check if the passed module's key name matches any of the target modules in the adapter_config.
|
||||
|
||||
Args:
|
||||
config (`LoraConfig` | `LycorisConfig`): A config to match target modules from
|
||||
key (`str`): A key to search any matches in config
|
||||
config (`PeftConfig`):
|
||||
A config to match target modules from.
|
||||
key (`str`):
|
||||
A key to search any matches in config
|
||||
|
||||
Returns:
|
||||
`bool` | `re.Match[str]` | `None`: True of match object if key matches any target modules from config, False or
|
||||
None if no match found
|
||||
`bool` | `re.Match[str]` | `None`:
|
||||
True or re.Match object if key matches any target modules from config, False or None if no match found.
|
||||
"""
|
||||
if hasattr(config, "exclude_modules") and config.exclude_modules:
|
||||
if isinstance(config.exclude_modules, str):
|
||||
@ -1487,3 +1691,126 @@ def replicate_layers(model: nn.Module, layer_map: list[tuple[int, int]]):
|
||||
raise ValueError("Unexpected model type, need to handle post-processing of layers.")
|
||||
if hasattr(model.config, "num_hidden_layers"): # Common to Llama, Bert, Falcon.
|
||||
model.config.num_hidden_layers = len(new_layers)
|
||||
|
||||
|
||||
###############################
|
||||
# FUNCTIONS FOR functional.py #
|
||||
###############################
|
||||
|
||||
|
||||
def set_adapter(
|
||||
model,
|
||||
adapter_name: str | list[str],
|
||||
inference_mode: bool = False,
|
||||
layer_cls: type[BaseTunerLayer] = BaseTunerLayer,
|
||||
) -> None:
|
||||
"""Set the active PEFT adapter(s) of the model.
|
||||
|
||||
Active adapters are those adapters that participate in the forward pass. Use this function if you want to switch
|
||||
between multiple PEFT adapters.
|
||||
|
||||
Args:
|
||||
model (`nn.Module`):
|
||||
The model on which the adapter(s) should be set.
|
||||
adapter_name (str, list[str]):
|
||||
The name(s) of the adapter(s) to set as active
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
layer_cls (type, optional):
|
||||
The class of the adapter layer. Defaults to `BaseTunerLayer`.
|
||||
"""
|
||||
_set_adapter(model, adapter_name, inference_mode=inference_mode) # auxiliary modules
|
||||
for module in model.modules():
|
||||
if isinstance(module, layer_cls):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
|
||||
|
||||
def _delete_auxiliary_adapter(model, adapter_name: str, new_active_adapters: Optional[list[str]]) -> None:
|
||||
for module in model.modules():
|
||||
if isinstance(module, AuxiliaryTrainingWrapper):
|
||||
module.delete_adapter(adapter_name, new_active_adapters=new_active_adapters)
|
||||
|
||||
|
||||
def delete_adapter(
|
||||
model: nn.Module, adapter_name: str, prefix: str, layer_cls: type[BaseTunerLayer] = BaseTunerLayer
|
||||
) -> list[str] | None:
|
||||
"""
|
||||
Delete an existing PEFT adapter.
|
||||
|
||||
Note: This function does not delete the PEFT config on the model, if there is one. It will also not completely
|
||||
purge the PEFT layers if the last PEFT adapter is deleted. For this, consider using `model.unload()` if using a
|
||||
PEFT model instance, or just reloading the base model.
|
||||
|
||||
Args:
|
||||
model (`nn.Module`):
|
||||
The model from which the adapter should be deleted.
|
||||
adapter_name (str):
|
||||
The name of the adapter to be deleted.
|
||||
prefix (str):
|
||||
The prefix of the PEFT method, e.g. "lora_" for LoRA.
|
||||
layer_cls (type, optional):
|
||||
The class of the adapter layer. Defaults to `BaseTunerLayer`.
|
||||
|
||||
Returns:
|
||||
new_adapter (list[str] | None):
|
||||
The name of remaining adapter(s) after deletion, or `None` if there are no active adapters left. Use this
|
||||
to set the new active adapter of the model if necessary.
|
||||
"""
|
||||
key_list = [key for key, _ in model.named_modules() if prefix not in key]
|
||||
new_adapter = None
|
||||
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(model, key)
|
||||
if isinstance(target, layer_cls):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapters[:]
|
||||
|
||||
_delete_auxiliary_adapter(model, adapter_name=adapter_name, new_active_adapters=new_adapter)
|
||||
return new_adapter
|
||||
|
||||
|
||||
def cast_adapter_dtype(model: nn.Module, adapter_name: str, autocast_adapter_dtype: bool = True) -> None:
|
||||
"""
|
||||
A helper method to cast the adapter weights to the correct dtype.
|
||||
|
||||
Currently, this only upcasts float16 and bfloat16 to float32.
|
||||
|
||||
Args:
|
||||
adapter_name (`str`):
|
||||
The adapter name.
|
||||
autocast_adapter_dtype (`bool`, *optional*):
|
||||
Whether to autocast the adapter dtype. Defaults to `True`.
|
||||
"""
|
||||
if not autocast_adapter_dtype:
|
||||
return
|
||||
|
||||
dtypes_to_convert_to_fp32 = {torch.float16, torch.bfloat16}
|
||||
|
||||
for module in model.modules():
|
||||
if not isinstance(module, BaseTunerLayer):
|
||||
continue
|
||||
|
||||
for submodule in module.modules():
|
||||
if not isinstance(submodule, (nn.ModuleDict, nn.ParameterDict, BufferDict)):
|
||||
continue
|
||||
|
||||
if adapter_name not in submodule:
|
||||
continue
|
||||
|
||||
if isinstance(submodule[adapter_name], nn.Parameter):
|
||||
if submodule[adapter_name].dtype in dtypes_to_convert_to_fp32:
|
||||
submodule[adapter_name].data = submodule[adapter_name].data.to(torch.float32)
|
||||
continue
|
||||
|
||||
if isinstance(submodule[adapter_name], torch.Tensor): # e.g. from a BufferDict
|
||||
if submodule[adapter_name].dtype in dtypes_to_convert_to_fp32:
|
||||
submodule[adapter_name] = submodule[adapter_name].to(torch.float32)
|
||||
continue
|
||||
|
||||
for param in submodule[adapter_name].parameters():
|
||||
if param.dtype in dtypes_to_convert_to_fp32:
|
||||
param.data = param.data.to(torch.float32)
|
||||
|
@ -14,17 +14,13 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from tqdm import tqdm
|
||||
from transformers.pytorch_utils import Conv1D
|
||||
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING, ModulesToSaveWrapper, _get_submodules
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING
|
||||
|
||||
from .config import VBLoRAConfig
|
||||
from .layer import Linear, VBLoRALayer
|
||||
@ -70,6 +66,8 @@ class VBLoRAModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "vblora_"
|
||||
tuner_layer_cls = VBLoRALayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING
|
||||
|
||||
def _init_vblora_vector_bank(self, config: VBLoRAConfig, adapter_name: str) -> None:
|
||||
vblora_vector_bank = torch.zeros(config.num_vectors, config.vector_length)
|
||||
@ -79,26 +77,6 @@ class VBLoRAModel(BaseTuner):
|
||||
def _pre_injection_hook(self, model: nn.Module, config: VBLoRAConfig, adapter_name: str) -> None:
|
||||
self.vblora_vector_bank = nn.ParameterDict({})
|
||||
|
||||
def _check_new_adapter_config(self, config: VBLoRAConfig) -> None:
|
||||
"""
|
||||
A helper method to check the config when a new adapter is being added.
|
||||
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# the below todo is copied from LoRA
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(vblora_config, key):
|
||||
return check_target_module_exists(vblora_config, key)
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
vblora_config,
|
||||
@ -143,55 +121,6 @@ class VBLoRAModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
@staticmethod
|
||||
def _replace_module(parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if "vblora_" in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "vblora_only":
|
||||
for m in model.modules():
|
||||
if isinstance(m, VBLoRALayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(vblora_config, vblora_vector_bank, adapter_name, target, **kwargs):
|
||||
if isinstance(target, BaseTunerLayer):
|
||||
@ -233,168 +162,6 @@ class VBLoRAModel(BaseTuner):
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled: bool = True) -> None:
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self) -> None:
|
||||
"""Enable all adapters.
|
||||
|
||||
Call this if you have previously disabled all adapters and want to re-enable them.
|
||||
"""
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self) -> None:
|
||||
"""Disable all adapters.
|
||||
|
||||
When disabling all adapters, the model output corresponds to the output of the base model.
|
||||
"""
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name: str | list[str], inference_mode: bool = False) -> None:
|
||||
"""Set the active adapter(s).
|
||||
|
||||
Args:
|
||||
adapter_name (`str` or `list[str]`):
|
||||
Name(s) of the adapter(s) to be activated.
|
||||
inference_mode (bool, optional):
|
||||
Whether the activated adapter should be frozen (i.e. `requires_grad=False`). Default is False.
|
||||
"""
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, VBLoRALayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str) -> None:
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
key_list = [key for key, _ in self.model.named_modules() if self.prefix not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, VBLoRALayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapter[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
) -> torch.nn.Module:
|
||||
r"""
|
||||
This method merges the VBLoRA layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`list[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import PeftModel
|
||||
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
|
||||
>>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
|
||||
>>> model = PeftModel.from_pretrained(base_model, peft_model_id)
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self):
|
||||
"""
|
||||
Gets back the base model by removing all the VBLoRA modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
||||
def get_nb_savable_parameters(self, adapter="default") -> tuple[int, int]:
|
||||
r"""
|
||||
Returns the number of savable VB-LoRA parameters and other savable parameters.
|
||||
|
@ -16,22 +16,17 @@ from __future__ import annotations
|
||||
|
||||
import math
|
||||
import warnings
|
||||
from dataclasses import asdict
|
||||
from enum import Enum
|
||||
from typing import Optional, Union
|
||||
from typing import Union
|
||||
|
||||
import torch
|
||||
import torch.nn as nn
|
||||
from torch.nn.init import _calculate_correct_fan
|
||||
from tqdm import tqdm
|
||||
from transformers.pytorch_utils import Conv1D
|
||||
|
||||
from peft.import_utils import is_bnb_4bit_available, is_bnb_available
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer, check_target_module_exists
|
||||
from peft.tuners.tuners_utils import BaseTuner, BaseTunerLayer
|
||||
from peft.utils import (
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING,
|
||||
ModulesToSaveWrapper,
|
||||
_get_submodules,
|
||||
)
|
||||
|
||||
from .._buffer_dict import BufferDict
|
||||
@ -100,6 +95,8 @@ class VeraModel(BaseTuner):
|
||||
"""
|
||||
|
||||
prefix: str = "vera_lambda_"
|
||||
tuner_layer_cls = VeraLayer
|
||||
target_module_mapping = TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING
|
||||
|
||||
def _find_dim(self, config) -> tuple[int, int]:
|
||||
"""
|
||||
@ -163,14 +160,7 @@ class VeraModel(BaseTuner):
|
||||
Raise a ValueError if there is something wrong with the config or if it conflicts with existing adapters.
|
||||
|
||||
"""
|
||||
# the below todo is copied from LoRA
|
||||
# TODO: there should be a check if any of the existing adapters actually has bias != "none", or else the check
|
||||
# does not fully correspond to the error message.
|
||||
if (len(self.peft_config) > 1) and (config.bias != "none"):
|
||||
raise ValueError(
|
||||
f"{self.__class__.__name__} supports only 1 adapter with bias. When using multiple adapters, "
|
||||
"set bias to 'none' for all adapters."
|
||||
)
|
||||
super()._check_new_adapter_config(config)
|
||||
|
||||
for existing_config in self.peft_config.values():
|
||||
if existing_config is config:
|
||||
@ -190,10 +180,6 @@ class VeraModel(BaseTuner):
|
||||
f"{save_project_unique_values}"
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def _check_target_module_exists(vera_config, key):
|
||||
return check_target_module_exists(vera_config, key)
|
||||
|
||||
def _create_and_replace(
|
||||
self,
|
||||
vera_config,
|
||||
@ -236,56 +222,6 @@ class VeraModel(BaseTuner):
|
||||
new_module.requires_grad_(False)
|
||||
self._replace_module(parent, target_name, new_module, target)
|
||||
|
||||
@staticmethod
|
||||
def _replace_module(parent, child_name, new_module, child):
|
||||
setattr(parent, child_name, new_module)
|
||||
# It's not necessary to set requires_grad here, as that is handled by
|
||||
# _mark_only_adapters_as_trainable
|
||||
|
||||
# child layer wraps the original module, unpack it
|
||||
if hasattr(child, "base_layer"):
|
||||
child = child.base_layer
|
||||
|
||||
if not hasattr(new_module, "base_layer"):
|
||||
new_module.weight = child.weight
|
||||
if hasattr(child, "bias"):
|
||||
new_module.bias = child.bias
|
||||
|
||||
if getattr(child, "state", None) is not None:
|
||||
if hasattr(new_module, "base_layer"):
|
||||
new_module.base_layer.state = child.state
|
||||
else:
|
||||
new_module.state = child.state
|
||||
new_module.to(child.weight.device)
|
||||
|
||||
meta = torch.device("meta")
|
||||
# dispatch to correct device
|
||||
for name, module in new_module.named_modules():
|
||||
if "vera_" in name:
|
||||
if not any(p.device == meta for p in module.parameters()):
|
||||
module.to(child.weight.device)
|
||||
|
||||
def _mark_only_adapters_as_trainable(self, model: nn.Module) -> None:
|
||||
for n, p in model.named_parameters():
|
||||
if self.prefix not in n:
|
||||
p.requires_grad = False
|
||||
|
||||
for active_adapter in self.active_adapters:
|
||||
bias = self.peft_config[active_adapter].bias
|
||||
if bias == "none":
|
||||
continue
|
||||
|
||||
if bias == "all":
|
||||
for n, p in model.named_parameters():
|
||||
if "bias" in n:
|
||||
p.requires_grad = True
|
||||
elif bias == "vera_only":
|
||||
for m in model.modules():
|
||||
if isinstance(m, VeraLayer) and hasattr(m, "bias") and m.bias is not None:
|
||||
m.bias.requires_grad = True
|
||||
else:
|
||||
raise NotImplementedError(f"Requested bias: {bias}, is not implemented.")
|
||||
|
||||
@staticmethod
|
||||
def _create_new_module(vera_config, vera_A, vera_B, adapter_name, target, **kwargs):
|
||||
# avoid eager bnb import
|
||||
@ -356,151 +292,3 @@ class VeraModel(BaseTuner):
|
||||
)
|
||||
|
||||
return new_module
|
||||
|
||||
def __getattr__(self, name: str):
|
||||
"""Forward missing attributes to the wrapped module."""
|
||||
try:
|
||||
return super().__getattr__(name) # defer to nn.Module's logic
|
||||
except AttributeError:
|
||||
if name == "model": # see #1892: prevent infinite recursion if class is not initialized
|
||||
raise
|
||||
return getattr(self.model, name)
|
||||
|
||||
def get_peft_config_as_dict(self, inference: bool = False):
|
||||
config_dict = {}
|
||||
for key, value in self.peft_config.items():
|
||||
config = {k: v.value if isinstance(v, Enum) else v for k, v in asdict(value).items()}
|
||||
if inference:
|
||||
config["inference_mode"] = True
|
||||
config_dict[key] = config
|
||||
return config
|
||||
|
||||
def _set_adapter_layers(self, enabled=True):
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, (BaseTunerLayer, ModulesToSaveWrapper)):
|
||||
module.enable_adapters(enabled)
|
||||
|
||||
def enable_adapter_layers(self):
|
||||
self._set_adapter_layers(enabled=True)
|
||||
|
||||
def disable_adapter_layers(self):
|
||||
for active_adapter in self.active_adapters:
|
||||
val = self.peft_config[active_adapter].bias
|
||||
if val != "none":
|
||||
msg = (
|
||||
f"Careful, disabling adapter layers with bias configured to be '{val}' does not produce the same "
|
||||
"output as the base model would without adaption."
|
||||
)
|
||||
warnings.warn(msg)
|
||||
self._set_adapter_layers(enabled=False)
|
||||
|
||||
def set_adapter(self, adapter_name, inference_mode: bool = False):
|
||||
self.set_auxiliary_adapters(adapter_name, inference_mode=inference_mode)
|
||||
for module in self.model.modules():
|
||||
if isinstance(module, VeraLayer):
|
||||
if module.merged:
|
||||
warnings.warn("Adapter cannot be set when the model is merged. Unmerging the model first.")
|
||||
module.unmerge()
|
||||
module.set_adapter(adapter_name, inference_mode=inference_mode)
|
||||
self.active_adapter = adapter_name
|
||||
|
||||
@staticmethod
|
||||
def _prepare_adapter_config(peft_config, model_config):
|
||||
if peft_config.target_modules is None:
|
||||
if model_config["model_type"] not in TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING:
|
||||
raise ValueError("Please specify `target_modules` in `peft_config`")
|
||||
peft_config.target_modules = set(
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING[model_config["model_type"]]
|
||||
)
|
||||
return peft_config
|
||||
|
||||
def _unload_and_optionally_merge(
|
||||
self,
|
||||
merge=True,
|
||||
progressbar: bool = False,
|
||||
safe_merge: bool = False,
|
||||
adapter_names: Optional[list[str]] = None,
|
||||
):
|
||||
# we cannot use self.prefix as we want to include non-trainable vera parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "vera" not in key]
|
||||
desc = "Unloading " + ("and merging " if merge else "") + "model"
|
||||
for key in tqdm(key_list, disable=not progressbar, desc=desc):
|
||||
try:
|
||||
parent, target, target_name = _get_submodules(self.model, key)
|
||||
except AttributeError:
|
||||
continue
|
||||
|
||||
if hasattr(target, "base_layer"):
|
||||
if merge:
|
||||
target.merge(safe_merge=safe_merge, adapter_names=adapter_names)
|
||||
|
||||
self._replace_module(parent, target_name, target.get_base_layer(), target)
|
||||
elif isinstance(target, ModulesToSaveWrapper):
|
||||
# save any additional trainable modules part of `modules_to_save`
|
||||
setattr(parent, target_name, target.modules_to_save[target.active_adapter])
|
||||
|
||||
return self.model
|
||||
|
||||
def delete_adapter(self, adapter_name: str):
|
||||
"""
|
||||
Deletes an existing adapter.
|
||||
|
||||
Args:
|
||||
adapter_name (str): Name of the adapter to be deleted.
|
||||
"""
|
||||
if adapter_name not in list(self.peft_config.keys()):
|
||||
raise ValueError(f"Adapter {adapter_name} does not exist")
|
||||
del self.peft_config[adapter_name]
|
||||
|
||||
# we cannot use self.prefix as we want to include non-trainable vera parameters
|
||||
key_list = [key for key, _ in self.model.named_modules() if "vera" not in key]
|
||||
new_adapter = None
|
||||
for key in key_list:
|
||||
_, target, _ = _get_submodules(self.model, key)
|
||||
if isinstance(target, VeraLayer):
|
||||
target.delete_adapter(adapter_name)
|
||||
if new_adapter is None:
|
||||
new_adapter = target.active_adapter[:]
|
||||
|
||||
self.active_adapter = new_adapter or []
|
||||
self._delete_auxiliary_adapter(adapter_name, new_active_adapters=new_adapter)
|
||||
|
||||
def merge_and_unload(
|
||||
self, progressbar: bool = False, safe_merge: bool = False, adapter_names: Optional[list[str]] = None
|
||||
):
|
||||
r"""
|
||||
This method merges the Vera layers into the base model. This is needed if someone wants to use the base model
|
||||
as a standalone model.
|
||||
|
||||
Args:
|
||||
progressbar (`bool`):
|
||||
whether to show a progressbar indicating the unload and merge process
|
||||
safe_merge (`bool`):
|
||||
whether to activate the safe merging check to check if there is any potential Nan in the adapter
|
||||
weights
|
||||
adapter_names (`list[str]`, *optional*):
|
||||
The list of adapter names that should be merged. If None, all active adapters will be merged. Defaults
|
||||
to `None`.
|
||||
|
||||
Example:
|
||||
|
||||
```py
|
||||
>>> from transformers import AutoModelForCausalLM
|
||||
>>> from peft import PeftModel
|
||||
|
||||
>>> base_model = AutoModelForCausalLM.from_pretrained("tiiuae/falcon-40b")
|
||||
>>> peft_model_id = "smangrul/falcon-40B-int4-peft-lora-sfttrainer-sample"
|
||||
>>> model = PeftModel.from_pretrained(base_model, peft_model_id)
|
||||
>>> merged_model = model.merge_and_unload()
|
||||
```
|
||||
"""
|
||||
return self._unload_and_optionally_merge(
|
||||
progressbar=progressbar, safe_merge=safe_merge, adapter_names=adapter_names
|
||||
)
|
||||
|
||||
def unload(self):
|
||||
"""
|
||||
Gets back the base model by removing all the Vera modules without merging. This gives back the original base
|
||||
model.
|
||||
"""
|
||||
return self._unload_and_optionally_merge(merge=False)
|
||||
|
@ -19,16 +19,23 @@ from .other import (
|
||||
INCLUDE_LINEAR_LAYERS_SHORTHAND,
|
||||
SAFETENSORS_WEIGHTS_NAME,
|
||||
TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING,
|
||||
@ -66,16 +73,23 @@ __all__ = [
|
||||
"INCLUDE_LINEAR_LAYERS_SHORTHAND",
|
||||
"SAFETENSORS_WEIGHTS_NAME",
|
||||
"TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING",
|
||||
|
@ -58,44 +58,10 @@ if hasattr(BloomPreTrainedModel, "_convert_to_standard_cache"):
|
||||
# the _convert_to_standard_cache method is removed in the PR and thus serves as an indicator
|
||||
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING["bloom"] = bloom_model_postprocess_past_key_value
|
||||
|
||||
TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING = {
|
||||
"llama": ["input_layernorm", "post_attention_layernorm", "norm"],
|
||||
"bloom": ["input_layernorm", "post_attention_layernorm", "ln_f"],
|
||||
"llava": [
|
||||
"multi_modal_projector",
|
||||
"input_layernorm",
|
||||
"post_attention_layernorm",
|
||||
"norm",
|
||||
"embed_tokens",
|
||||
"lm_head",
|
||||
],
|
||||
"t5": ["layer_norm", "final_layer_norm"],
|
||||
"mt5": ["layer_norm", "final_layer_norm"],
|
||||
"bart": ["self_attn_layer_norm", "encoder_attn_layer_norm", "final_layer_norm"],
|
||||
"gpt2": ["ln_1", "ln_2", "ln_f"],
|
||||
"blip-2": ["layernorm", "LayerNorm", "final_layer_norm", "self_attn_layer_norm"],
|
||||
"gptj": ["ln_1", "ln_f"],
|
||||
"falcon": ["input_layernorm", "post_attention_layernorm", "ln_f"],
|
||||
"mistral": ["input_layernorm", "post_attention_layernorm", "norm"],
|
||||
"phi": ["input_layernorm", "final_layernorm"],
|
||||
"gemma": ["input_layernorm", "post_attention_layernorm", "norm"],
|
||||
"gemma2": [
|
||||
"input_layernorm",
|
||||
"post_attention_layernorm",
|
||||
"pre_feedforward_layernorm",
|
||||
"post_feedforward_layernorm",
|
||||
"norm",
|
||||
],
|
||||
"gemma3_text": [
|
||||
"input_layernorm",
|
||||
"post_attention_layernorm",
|
||||
"pre_feedforward_layernorm",
|
||||
"post_feedforward_layernorm",
|
||||
"norm",
|
||||
],
|
||||
"qwen2": ["post_attention_layernorm"],
|
||||
"qwen3": ["post_attention_layernorm"],
|
||||
}
|
||||
|
||||
#######################################
|
||||
# DEFAULT MAPPINGS FOR TARGET_MODULES #
|
||||
#######################################
|
||||
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["q", "v"],
|
||||
@ -136,8 +102,73 @@ TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
|
||||
"qwen3": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
# target module mappings that are identical to LORA
|
||||
TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
|
||||
# mappings that are similar to LORA with small changes
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING["gpt_bigcode"] = ["mlp.c_proj"]
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING["gpt2"] = ["mlp.c_proj"]
|
||||
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING["phi"] = ["q_proj", "v_proj"]
|
||||
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING["phi"] = ["q_proj", "v_proj"]
|
||||
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING = TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING.copy()
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING["gpt_bigcode"] = ["mlp.c_proj"]
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING["gpt2"] = ["mlp.c_proj"]
|
||||
|
||||
# target module mappings that differ from LORA
|
||||
TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING = {
|
||||
"llama": ["input_layernorm", "post_attention_layernorm", "norm"],
|
||||
"bloom": ["input_layernorm", "post_attention_layernorm", "ln_f"],
|
||||
"llava": [
|
||||
"multi_modal_projector",
|
||||
"input_layernorm",
|
||||
"post_attention_layernorm",
|
||||
"norm",
|
||||
"embed_tokens",
|
||||
"lm_head",
|
||||
],
|
||||
"t5": ["layer_norm", "final_layer_norm"],
|
||||
"mt5": ["layer_norm", "final_layer_norm"],
|
||||
"bart": ["self_attn_layer_norm", "encoder_attn_layer_norm", "final_layer_norm"],
|
||||
"gpt2": ["ln_1", "ln_2", "ln_f"],
|
||||
"blip-2": ["layernorm", "LayerNorm", "final_layer_norm", "self_attn_layer_norm"],
|
||||
"gptj": ["ln_1", "ln_f"],
|
||||
"falcon": ["input_layernorm", "post_attention_layernorm", "ln_f"],
|
||||
"mistral": ["input_layernorm", "post_attention_layernorm", "norm"],
|
||||
"phi": ["input_layernorm", "final_layernorm"],
|
||||
"gemma": ["input_layernorm", "post_attention_layernorm", "norm"],
|
||||
"gemma2": [
|
||||
"input_layernorm",
|
||||
"post_attention_layernorm",
|
||||
"pre_feedforward_layernorm",
|
||||
"post_feedforward_layernorm",
|
||||
"norm",
|
||||
],
|
||||
"gemma3_text": [
|
||||
"input_layernorm",
|
||||
"post_attention_layernorm",
|
||||
"pre_feedforward_layernorm",
|
||||
"post_feedforward_layernorm",
|
||||
"norm",
|
||||
],
|
||||
"qwen2": ["post_attention_layernorm"],
|
||||
"qwen3": ["post_attention_layernorm"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["k", "v", "wo"],
|
||||
@ -226,120 +257,6 @@ TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING = {
|
||||
"qwen3": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["q", "v"],
|
||||
"mt5": ["q", "v"],
|
||||
"bart": ["q_proj", "v_proj"],
|
||||
"gpt2": ["c_attn"],
|
||||
"bloom": ["query_key_value"],
|
||||
"blip-2": ["q", "v", "q_proj", "v_proj"],
|
||||
"opt": ["q_proj", "v_proj"],
|
||||
"gptj": ["q_proj", "v_proj"],
|
||||
"gpt_neox": ["query_key_value"],
|
||||
"gpt_neo": ["q_proj", "v_proj"],
|
||||
"bert": ["query", "value"],
|
||||
"roberta": ["query", "value"],
|
||||
"xlm-roberta": ["query", "value"],
|
||||
"electra": ["query", "value"],
|
||||
"deberta-v2": ["query_proj", "value_proj"],
|
||||
"deberta": ["in_proj"],
|
||||
"layoutlm": ["query", "value"],
|
||||
"llama": ["q_proj", "v_proj"],
|
||||
"llama4": ["q_proj", "v_proj"],
|
||||
"chatglm": ["query_key_value"],
|
||||
"gpt_bigcode": ["c_attn"],
|
||||
"mpt": ["Wqkv"],
|
||||
"RefinedWebModel": ["query_key_value"],
|
||||
"RefinedWeb": ["query_key_value"],
|
||||
"falcon": ["query_key_value"],
|
||||
"btlm": ["c_proj", "c_attn"],
|
||||
"codegen": ["qkv_proj"],
|
||||
"mistral": ["q_proj", "v_proj"],
|
||||
"mixtral": ["q_proj", "v_proj"],
|
||||
"stablelm": ["q_proj", "v_proj"],
|
||||
"phi": ["q_proj", "v_proj"],
|
||||
"gemma": ["q_proj", "v_proj"],
|
||||
"gemma2": ["q_proj", "v_proj"],
|
||||
"gemma3_text": ["q_proj", "v_proj"],
|
||||
"qwen2": ["q_proj", "v_proj"],
|
||||
"qwen3": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["q", "v"],
|
||||
"mt5": ["q", "v"],
|
||||
"bart": ["q_proj", "v_proj"],
|
||||
"gpt2": ["c_attn"],
|
||||
"bloom": ["query_key_value"],
|
||||
"blip-2": ["q", "v", "q_proj", "v_proj"],
|
||||
"opt": ["q_proj", "v_proj"],
|
||||
"gptj": ["q_proj", "v_proj"],
|
||||
"gpt_neox": ["query_key_value"],
|
||||
"gpt_neo": ["q_proj", "v_proj"],
|
||||
"bert": ["query", "value"],
|
||||
"roberta": ["query", "value"],
|
||||
"xlm-roberta": ["query", "value"],
|
||||
"electra": ["query", "value"],
|
||||
"deberta-v2": ["query_proj", "value_proj"],
|
||||
"deberta": ["in_proj"],
|
||||
"layoutlm": ["query", "value"],
|
||||
"llama": ["q_proj", "v_proj"],
|
||||
"chatglm": ["query_key_value"],
|
||||
"gpt_bigcode": ["c_attn"],
|
||||
"mpt": ["Wqkv"],
|
||||
"RefinedWebModel": ["query_key_value"],
|
||||
"RefinedWeb": ["query_key_value"],
|
||||
"falcon": ["query_key_value"],
|
||||
"btlm": ["c_proj", "c_attn"],
|
||||
"codegen": ["qkv_proj"],
|
||||
"mistral": ["q_proj", "v_proj"],
|
||||
"mixtral": ["q_proj", "v_proj"],
|
||||
"stablelm": ["q_proj", "v_proj"],
|
||||
"phi": ["q_proj", "v_proj"],
|
||||
"gemma": ["q_proj", "v_proj"],
|
||||
"gemma2": ["q_proj", "v_proj"],
|
||||
"gemma3_text": ["q_proj", "v_proj"],
|
||||
"qwen2": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["q", "v"],
|
||||
"mt5": ["q", "v"],
|
||||
"bart": ["q_proj", "v_proj"],
|
||||
"gpt2": ["mlp.c_proj"],
|
||||
"bloom": ["query_key_value"],
|
||||
"blip-2": ["q", "v", "q_proj", "v_proj"],
|
||||
"opt": ["q_proj", "v_proj"],
|
||||
"gptj": ["q_proj", "v_proj"],
|
||||
"gpt_neox": ["query_key_value"],
|
||||
"gpt_neo": ["q_proj", "v_proj"],
|
||||
"bert": ["query", "value"],
|
||||
"roberta": ["query", "value"],
|
||||
"xlm-roberta": ["query", "value"],
|
||||
"electra": ["query", "value"],
|
||||
"deberta-v2": ["query_proj", "value_proj"],
|
||||
"deberta": ["in_proj"],
|
||||
"layoutlm": ["query", "value"],
|
||||
"llama": ["q_proj", "v_proj"],
|
||||
"llama4": ["q_proj", "v_proj"],
|
||||
"chatglm": ["query_key_value"],
|
||||
"gpt_bigcode": ["mlp.c_proj"],
|
||||
"mpt": ["Wqkv"],
|
||||
"RefinedWebModel": ["query_key_value"],
|
||||
"RefinedWeb": ["query_key_value"],
|
||||
"falcon": ["query_key_value"],
|
||||
"codegen": ["qkv_proj"],
|
||||
"mistral": ["q_proj", "v_proj"],
|
||||
"mixtral": ["q_proj", "v_proj"],
|
||||
"stablelm": ["q_proj", "v_proj"],
|
||||
"phi": ["q_proj", "v_proj", "fc1", "fc2"],
|
||||
"gemma": ["q_proj", "v_proj"],
|
||||
"gemma2": ["q_proj", "v_proj"],
|
||||
"gemma3_text": ["q_proj", "v_proj"],
|
||||
"qwen2": ["q_proj", "v_proj"],
|
||||
"qwen3": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["q", "k", "v", "o", "wi", "wo"],
|
||||
"mt5": ["q", "k", "v", "o", "wi_0", "wi_1", "wo"],
|
||||
@ -364,47 +281,9 @@ TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING = {
|
||||
"qwen3": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING = {
|
||||
"t5": ["q", "v"],
|
||||
"mt5": ["q", "v"],
|
||||
"bart": ["q_proj", "v_proj"],
|
||||
"gpt2": ["mlp.c_proj"],
|
||||
"bloom": ["query_key_value"],
|
||||
"blip-2": ["q", "v", "q_proj", "v_proj"],
|
||||
"opt": ["q_proj", "v_proj"],
|
||||
"gptj": ["q_proj", "v_proj"],
|
||||
"gpt_neox": ["query_key_value"],
|
||||
"gpt_neo": ["q_proj", "v_proj"],
|
||||
"bert": ["query", "value"],
|
||||
"roberta": ["query", "value"],
|
||||
"xlm-roberta": ["query", "value"],
|
||||
"electra": ["query", "value"],
|
||||
"deberta-v2": ["query_proj", "value_proj"],
|
||||
"deberta": ["in_proj"],
|
||||
"layoutlm": ["query", "value"],
|
||||
"llama": ["q_proj", "v_proj"],
|
||||
"llama4": ["q_proj", "v_proj"],
|
||||
"chatglm": ["query_key_value"],
|
||||
"gpt_bigcode": ["mlp.c_proj"],
|
||||
"mpt": ["Wqkv"],
|
||||
"RefinedWebModel": ["query_key_value"],
|
||||
"RefinedWeb": ["query_key_value"],
|
||||
"falcon": ["query_key_value"],
|
||||
"codegen": ["qkv_proj"],
|
||||
"mistral": ["q_proj", "v_proj"],
|
||||
"mixtral": ["q_proj", "v_proj"],
|
||||
"stablelm": ["q_proj", "v_proj"],
|
||||
"phi": ["q_proj", "v_proj", "fc1", "fc2"],
|
||||
"gemma": ["q_proj", "v_proj"],
|
||||
"gemma2": ["q_proj", "v_proj"],
|
||||
"gemma3_text": ["q_proj", "v_proj"],
|
||||
"qwen2": ["q_proj", "v_proj"],
|
||||
"qwen3": ["q_proj", "v_proj"],
|
||||
}
|
||||
|
||||
TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING = (
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING # Leaving this for now but RandLoRA is flexible
|
||||
)
|
||||
##################
|
||||
# MISC CONSTANTS #
|
||||
##################
|
||||
|
||||
WEIGHTS_NAME = "adapter_model.bin"
|
||||
SAFETENSORS_WEIGHTS_NAME = "adapter_model.safetensors"
|
||||
|
@ -42,16 +42,23 @@ from .constants import (
|
||||
INCLUDE_LINEAR_LAYERS_SHORTHAND,
|
||||
SAFETENSORS_WEIGHTS_NAME,
|
||||
TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING,
|
||||
TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING,
|
||||
@ -74,16 +81,23 @@ __all__ = [
|
||||
"INCLUDE_LINEAR_LAYERS_SHORTHAND",
|
||||
"SAFETENSORS_WEIGHTS_NAME",
|
||||
"TRANSFORMERS_MODELS_TO_ADALORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_BOFT_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_BONE_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_C3A_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_FOURIERFT_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_HRA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_IA3_FEEDFORWARD_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_IA3_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LNTUNING_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LOHA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LOKR_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_MISS_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_OFT_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_POLY_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_PREFIX_TUNING_POSTPROCESS_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_RANDLORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_ROAD_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_SHIRA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_VBLORA_TARGET_MODULES_MAPPING",
|
||||
"TRANSFORMERS_MODELS_TO_VERA_TARGET_MODULES_MAPPING",
|
||||
|
@ -58,7 +58,15 @@ def get_peft_model_state_dict(
|
||||
model, state_dict=None, adapter_name="default", unwrap_compiled=False, save_embedding_layers="auto"
|
||||
):
|
||||
"""
|
||||
Get the state dict of the Peft model.
|
||||
Get the state dict of the given adapter of the PEFT model.
|
||||
|
||||
This only includes the PEFT parameters, not the parameters of the base model. Thus the returned `state_dict` is
|
||||
generally small compared to the full model size. To retrieve the full `state_dict`, just call `model.state_dict()`.
|
||||
|
||||
Note that the adapter name is removed from the `state_dict`, as this is just an arbitrary name that can be changed
|
||||
when loading the adapter. So e.g. if the adapter name is `'default'` and the original key is
|
||||
`'model.q_proj.lora_A.default.weight'`, the returned key will be `'model.q_proj.lora_A.weight'`. Use this function
|
||||
in conjunction with [`set_peft_model_state_dict`] to take care of the adapter name when loading weights.
|
||||
|
||||
Args:
|
||||
model ([`PeftModel`]): The Peft model. When using torch.nn.DistributedDataParallel, DeepSpeed or FSDP,
|
||||
@ -73,6 +81,7 @@ def get_peft_model_state_dict(
|
||||
If `True`, save the embedding layers in addition to adapter weights. If `auto`, checks the common embedding
|
||||
layers `peft.utils.other.EMBEDDING_LAYER_NAMES` in config's `target_modules` when available. Based on it
|
||||
sets the boolean flag. This only works for 🤗 transformers models.
|
||||
|
||||
"""
|
||||
if unwrap_compiled:
|
||||
model = getattr(model, "_orig_mod", model)
|
||||
@ -373,9 +382,15 @@ def set_peft_model_state_dict(
|
||||
adapter_name="default",
|
||||
ignore_mismatched_sizes: bool = False,
|
||||
low_cpu_mem_usage: bool = False,
|
||||
):
|
||||
) -> None:
|
||||
"""
|
||||
Set the state dict of the Peft model.
|
||||
Set the state dict of the PEFT model.
|
||||
|
||||
Given a PEFT `state_dict` (as returned by [`get_peft_model_state_dict`]), insert the weights into the model. The
|
||||
model needs to have the PEFT adapters already in place (e.g. via [`inject_adapter_in_model`]).
|
||||
|
||||
Setting the adapter weights also takes care of re-inserting the adapter name. This name may be a different name
|
||||
than the one originally used to train the adapter.
|
||||
|
||||
Args:
|
||||
model ([`PeftModel`]):
|
||||
|
@ -1482,6 +1482,26 @@ class TestLoraInitialization:
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
model.load_adapter(tmp_path, adapter_name="other")
|
||||
|
||||
def test_multiple_configs_with_bias_raises(self, tmp_path):
|
||||
# There cannot be more than one config with bias != "none".
|
||||
# Note: This would need to be tested for all PEFT methods that support the bias parameter, but as this method
|
||||
# comes from BaseTuner, it's fine to only check LoRA.
|
||||
model = self.get_model()
|
||||
config0 = LoraConfig(target_modules=["linear"], bias="all")
|
||||
model = get_peft_model(model, config0)
|
||||
|
||||
config1 = LoraConfig(target_modules=["linear"], bias="lora_only")
|
||||
msg = "supports only 1 adapter with bias. When using multiple adapters"
|
||||
with pytest.raises(ValueError, match=msg):
|
||||
model.add_adapter("other", config1)
|
||||
|
||||
# the invalid peft config was not added
|
||||
assert len(model.peft_config) == 1
|
||||
|
||||
# it's okay to add a config with bias="none" (the default)
|
||||
config2 = LoraConfig(target_modules=["linear"], bias="none")
|
||||
model.add_adapter("other", config2) # does not raise
|
||||
|
||||
|
||||
class TestLokrInitialization:
|
||||
torch_device = infer_device()
|
||||
|
Reference in New Issue
Block a user