make style

2025-10-20 15:33:48 +08:00 · 2025-05-26 20:58:17 +00:00
parent b1563c5f65
commit fa16f85cef
4 changed files with 24 additions and 23 deletions
--- a/src/peft/helpers.py
+++ b/src/peft/helpers.py
@ -162,9 +162,9 @@ def rescale_adapter_scale(model, multiplier):
    transformers and diffusers models that have directly loaded LoRA adapters.

    For LoRA, applying this context manager with multiplier in [0, 1] is strictly equivalent to applying
-    [wise-ft](https://huggingface.co/papers/2109.01903) (see [#1940](https://github.com/huggingface/peft/issues/1940) for
-    details). It can improve the performances of the model if there is a distribution shiftbetween the training data
-    used for fine-tuning, and the test data used during inference.
+    [wise-ft](https://huggingface.co/papers/2109.01903) (see [#1940](https://github.com/huggingface/peft/issues/1940)
+    for details). It can improve the performances of the model if there is a distribution shiftbetween the training
+    data used for fine-tuning, and the test data used during inference.

    Warning: It has been reported that when using Apple's MPS backend for PyTorch, it is necessary to add a short sleep
        time after exiting the context before the scales are fully restored.
--- a/src/peft/tuners/lokr/model.py
+++ b/src/peft/tuners/lokr/model.py
@ -26,8 +26,8 @@ from .layer import Conv2d, Linear, LoKrLayer
 class LoKrModel(LycorisTuner):
    """
    Creates Low-Rank Kronecker Product model from a pretrained model. The original method is partially described in
-    https://huggingface.co/papers/2108.06098 and in https://huggingface.co/papers/2309.14859 Current implementation heavily borrows
-    from
+    https://huggingface.co/papers/2108.06098 and in https://huggingface.co/papers/2309.14859 Current implementation
+    heavily borrows from
    https://github.com/KohakuBlueleaf/LyCORIS/blob/eb460098187f752a5d66406d3affade6f0a07ece/lycoris/modules/lokr.py

    Args:
--- a/src/peft/tuners/lora/config.py
+++ b/src/peft/tuners/lora/config.py
@ -228,9 +228,9 @@ class LoraConfig(PeftConfig):
            will be updated during training. Be aware that this means that, even when disabling the adapters, the model
            will not produce the same output as the base model would have without adaptation.
        use_rslora (`bool`):
-            When set to True, uses [Rank-Stabilized LoRA](https://huggingface.co/papers/2312.03732) which
-            sets the adapter scaling factor to `lora_alpha/math.sqrt(r)`, since it was proven to work better.
-            Otherwise, it will use the original default value of `lora_alpha/r`.
+            When set to True, uses [Rank-Stabilized LoRA](https://huggingface.co/papers/2312.03732) which sets the
+            adapter scaling factor to `lora_alpha/math.sqrt(r)`, since it was proven to work better. Otherwise, it will
+            use the original default value of `lora_alpha/r`.
        modules_to_save (`List[str]`):
            List of modules apart from adapter layers to be set as trainable and saved in the final checkpoint.
        init_lora_weights (`bool` | `Literal["gaussian", "eva", "olora", "pissa", "pissa_niter_[number of iters]", "corda", "loftq"]`):
@ -240,19 +240,20 @@ class LoraConfig(PeftConfig):
            False leads to random initialization of LoRA A and B, meaning that LoRA is not a no-op before training;
            this setting is intended for debugging purposes. Passing 'gaussian' results in Gaussian initialization
            scaled by the LoRA rank for linear and layers. Pass `'loftq'` to use LoftQ initialization. Passing `'eva'`
-            results in a data-driven initialization of <a href='https://huggingface.co/papers/2410.07170' >Explained Variance
-            Adaptation</a>. EVA initializes LoRA based on the SVD of layer input activations and achieves SOTA
+            results in a data-driven initialization of <a href='https://huggingface.co/papers/2410.07170' >Explained
+            Variance Adaptation</a>. EVA initializes LoRA based on the SVD of layer input activations and achieves SOTA
            performance due to its ability to adapt to the finetuning data. Pass `'olora'` to use OLoRA initialization.
-            Passing `'pissa'` results in the initialization of <a href='https://huggingface.co/papers/2404.02948' >Principal
-            Singular values and Singular vectors Adaptation (PiSSA)</a>, which converges more rapidly than LoRA and
-            ultimately achieves superior performance. Moreover, PiSSA reduces the quantization error compared to QLoRA,
-            leading to further enhancements. Passing `'pissa_niter_[number of iters]'` initiates Fast-SVD-based PiSSA
-            initialization, where `[number of iters]` indicates the number of subspace iterations to perform FSVD, and
-            must be a nonnegative integer. When `[number of iters]` is set to 16, it can complete the initialization of
-            a 7B model within seconds, and the training effect is approximately equivalent to using SVD. Passing
-            `'corda'` results in the initialization of <a href='https://huggingface.co/papers/2406.05223' >Context-Oriented
-            Decomposition Adaptation</a>, which converges even more rapidly than PiSSA in Instruction-Previewed Mode,
-            and preserves world knowledge better than LoRA in Knowledge-Preserved Mode.
+            Passing `'pissa'` results in the initialization of <a href='https://huggingface.co/papers/2404.02948'
+            >Principal Singular values and Singular vectors Adaptation (PiSSA)</a>, which converges more rapidly than
+            LoRA and ultimately achieves superior performance. Moreover, PiSSA reduces the quantization error compared
+            to QLoRA, leading to further enhancements. Passing `'pissa_niter_[number of iters]'` initiates
+            Fast-SVD-based PiSSA initialization, where `[number of iters]` indicates the number of subspace iterations
+            to perform FSVD, and must be a nonnegative integer. When `[number of iters]` is set to 16, it can complete
+            the initialization of a 7B model within seconds, and the training effect is approximately equivalent to
+            using SVD. Passing `'corda'` results in the initialization of <a
+            href='https://huggingface.co/papers/2406.05223' >Context-Oriented Decomposition Adaptation</a>, which
+            converges even more rapidly than PiSSA in Instruction-Previewed Mode, and preserves world knowledge better
+            than LoRA in Knowledge-Preserved Mode.
        layers_to_transform (`Union[List[int], int]`):
            The layer indices to transform. If a list of ints is passed, it will apply the adapter to the layer indices
            that are specified in this list. If a single integer is passed, it will apply the transformations on the
--- a/src/peft/tuners/randlora/config.py
+++ b/src/peft/tuners/randlora/config.py
@ -44,9 +44,9 @@ class RandLoraConfig(PeftConfig):
            Whether to use sparse random bases as described in the RandLora paper. The bases are ternary sparse bases
            (only containing -1, 0 and 1) where the attribution probability is 1/6 for -1 and 1 and 2/3 for 0. These
            sparse matrices aim to be used for matmul free computation in the future, see
-            https://huggingface.co/papers/2406.02528v1 The current implementation is a proof of concept however where the
-            sparseness is not used to improve speed or memory usage. Using sparse matrices typically does not reduce
-            performance and can even help reduce overfitting. Defaults to `False`.
+            https://huggingface.co/papers/2406.02528v1 The current implementation is a proof of concept however where
+            the sparseness is not used to improve speed or memory usage. Using sparse matrices typically does not
+            reduce performance and can even help reduce overfitting. Defaults to `False`.
        very_sparse (`bool`):
            Whether to use highly sparse random bases as described in the RandLora paper. The very sparse bases are
            ternary sparse bases (only containing -1, 0 and 1) given a matrix with smallest dimension d, the