Method comparison: LoRA that targets MLP modules (#2845 )

The "LoRA Without Regret" blog post (https://thinkingmachines.ai/blog/lora/) mentions that targeting the MLP part of the transformer is more effective than targeting the attention modules. This experiment tests this by targeting: ["gate_proj", "up_proj", "down_proj"] instead of the default layers (["q_proj", "v_proj"]). I chose a rank to match the parameter count we would get when targeting the attention modules with rank 32, which is rank 10. Testing on my machine, there is indeed a nice improvement in the test score: | metric | target attention | target MLP | |----------------------|------------------|------------| | test accuracy | 48.2% | 51.3% | | # trainable params | 9175040 | 9461760 | | peak memory reserved | 20.74 GB | 23.02 GB | There is, however, also a marked increase in memory usage, despite matching parameter count. Since the operations are different, this may not be a surprise, but let's wait for the final verdict once this experiment runs on our AWS instance. Note: I also tested higher and lower ranks when targeting the MLP. The effect on memory usage was negligible, but it did improve the score: | metric | rank 8 | rank 10 | rank 12 | rank 32 | |--------------------|---------|---------|----------|----------| | test accuracy | 50.3% | 51.3% | 52.2% | 54.8% | | # trainable params | 7569408 | 9461760 | 11354112 | 30277632 | In the end, I chose only to add the rank 10 experiment to match the number of trainable parameters.
ENH Add RWKV default target modules (#2810 )
2025-10-20 15:33:48 +08:00 · 2025-10-16 17:37:02 +02:00 · 2025-10-16 16:30:51 +02:00 · 2025-10-16 14:59:09 +02:00
56 changed files with 241 additions and 215 deletions
--- a/docs/source/accelerate/deepspeed.md
+++ b/docs/source/accelerate/deepspeed.md
@ -263,11 +263,11 @@ model = AutoModelForCausalLM.from_pretrained(
    quantization_config=bnb_config,
    trust_remote_code=True,
    attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
-+   torch_dtype=quant_storage_dtype or torch.float32,
+   dtype=quant_storage_dtype or torch.float32,
 )
 ```

-Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
+Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.

 ## Memory usage

--- a/docs/source/accelerate/fsdp.md
+++ b/docs/source/accelerate/fsdp.md
@ -264,11 +264,11 @@ model = AutoModelForCausalLM.from_pretrained(
    quantization_config=bnb_config,
    trust_remote_code=True,
    attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
-+   torch_dtype=quant_storage_dtype or torch.float32,
+   dtype=quant_storage_dtype or torch.float32,
 )
 ```

-Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
+Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.

 ## Memory usage

--- a/docs/source/developer_guides/lora.md
+++ b/docs/source/developer_guides/lora.md
@ -539,7 +539,7 @@ from peft import PeftModel
 import torch

 base_model = AutoModelForCausalLM.from_pretrained(
-    "mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, device_map="auto"
+    "mistralai/Mistral-7B-v0.1", dtype=torch.float16, device_map="auto"
 )
 ```

@ -813,7 +813,7 @@ To encode general knowledge, GenKnowSub subtracts the average of the provided ge
 >     # Loading the model
 >     base_model = AutoModelForCausalLM.from_pretrained(
 >         "microsoft/Phi-3-mini-4k-instruct",
->         torch_dtype=torch.bfloat16,
+>         dtype=torch.bfloat16,
 >         device_map="auto",
 >         quantization_config=bnb_config,
 >     )
--- a/docs/source/developer_guides/quantization.md
+++ b/docs/source/developer_guides/quantization.md
@ -144,7 +144,7 @@ The models support LoRA adapter tuning. To tune the quantized model you'll need
 ```py
 quantized_model = AutoModelForCausalLM.from_pretrained(
    "BlackSamorez/Mixtral-8x7b-AQLM-2Bit-1x16-hf-test-dispatch",
-    torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,
+    dtype="auto", device_map="auto", low_cpu_mem_usage=True,
 )

 peft_config = LoraConfig(...)
--- a/docs/source/developer_guides/troubleshooting.md
+++ b/docs/source/developer_guides/troubleshooting.md
@ -43,7 +43,7 @@ python -m pip install git+https://github.com/huggingface/peft

 ### ValueError: Attempting to unscale FP16 gradients

-This error probably occurred because the model was loaded with `torch_dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
+This error probably occurred because the model was loaded with `dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:

 ```python
 peft_model = get_peft_model(...)
@ -294,7 +294,7 @@ It is possible to get this information for non-PEFT models if they are using PEF

 >>> path = "runwayml/stable-diffusion-v1-5"
 >>> lora_id = "takuma104/lora-test-text-encoder-lora-target"
->>> pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
+>>> pipe = StableDiffusionPipeline.from_pretrained(path, dtype=torch.float16)
 >>> pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
 >>> pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
 >>> pipe.set_lora_device(["adapter-2"], "cuda")
--- a/examples/arrow_multitask/arrow_phi3_mini.py
+++ b/examples/arrow_multitask/arrow_phi3_mini.py
@ -303,7 +303,7 @@ if __name__ == "__main__":
    # Loading the model
    base_model = AutoModelForCausalLM.from_pretrained(
        MODEL_NAME,
-        torch_dtype=torch.bfloat16,
+        dtype=torch.bfloat16,
        device_map="auto",
        quantization_config=bnb_config,
    )
--- a/examples/boft_controlnet/test_controlnet.py
+++ b/examples/boft_controlnet/test_controlnet.py
@ -84,7 +84,7 @@ def main(args):
        args.pretrained_model_name_or_path,
        controlnet=controlnet,
        unet=unet.model,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
        requires_safety_checker=False,
    ).to(device)

--- a/examples/boft_dreambooth/train_dreambooth.py
+++ b/examples/boft_dreambooth/train_dreambooth.py
@ -139,16 +139,16 @@ def main(args):
        cur_class_images = len(list(class_images_dir.iterdir()))

        if cur_class_images < args.num_class_images:
-            torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
+            dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
            if args.prior_generation_precision == "fp32":
-                torch_dtype = torch.float32
+                dtype = torch.float32
            elif args.prior_generation_precision == "fp16":
-                torch_dtype = torch.float16
+                dtype = torch.float16
            elif args.prior_generation_precision == "bf16":
-                torch_dtype = torch.bfloat16
+                dtype = torch.bfloat16
            pipeline = DiffusionPipeline.from_pretrained(
                args.pretrained_model_name_or_path,
-                torch_dtype=torch_dtype,
+                dtype=dtype,
                safety_checker=None,
                revision=args.revision,
            )
--- a/examples/bone_finetuning/README.md
+++ b/examples/bone_finetuning/README.md
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset

-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
 tokenizer.pad_token_id = tokenizer.eos_token_id
 bone_config = BoneConfig(
@ -47,7 +47,7 @@ from peft import PeftModel
 from transformers import AutoModelForCausalLM

 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
+    "meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
 )
 peft_model = PeftModel.from_pretrained(model, "bone-llama-2-7b")
 ```
--- a/examples/bone_finetuning/bone_finetuning.py
+++ b/examples/bone_finetuning/bone_finetuning.py
@ -57,7 +57,7 @@ elif script_args.base_model_name_or_path is not None:
    print(f"No available pre-processed model, manually initialize a Bone using {script_args.base_model_name_or_path}.")
    model = AutoModelForCausalLM.from_pretrained(
        script_args.base_model_name_or_path,
-        torch_dtype=(
+        dtype=(
            torch.float16
            if script_args.bits == "fp16"
            else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
--- a/examples/corda_finetuning/README.md
+++ b/examples/corda_finetuning/README.md
@ -78,7 +78,7 @@ from peft.tuners.lora.corda import preprocess_corda
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset

-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
 tokenizer.pad_token_id = tokenizer.eos_token_id
 sampled_dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:256]")
@ -236,7 +236,7 @@ from peft import PeftModel
 from transformers import AutoModelForCausalLM

 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
+    "meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
 )
 # No SVD is performed during this step, and the base model remains unaltered.
 peft_model = PeftModel.from_pretrained(model, "corda-llama-2-7b-lora")
--- a/examples/corda_finetuning/corda_finetuning.py
+++ b/examples/corda_finetuning/corda_finetuning.py
@ -229,7 +229,7 @@ def train():
        print("Train in Full Finetuning mode")
        model = transformers.AutoModelForCausalLM.from_pretrained(
            script_args.model_name_or_path,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
            device_map="auto",
        )
    trainable_params, all_param = get_nb_trainable_parameters(model)
--- a/examples/corda_finetuning/preprocess.py
+++ b/examples/corda_finetuning/preprocess.py
@ -49,7 +49,7 @@ def main(args):
    tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)

    model = AutoModelForCausalLM.from_pretrained(
-        model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
+        model_id, device_map="auto", dtype=torch.float16, trust_remote_code=True
    )

    # Collect data
--- a/examples/cpt_finetuning/cpt_train_and_inference.ipynb
+++ b/examples/cpt_finetuning/cpt_train_and_inference.ipynb
@ -553,7 +553,7 @@
        "base_model = AutoModelForCausalLM.from_pretrained(\n",
        "    model_id,\n",
        "    cache_dir='.',\n",
-        "    torch_dtype=torch.float16,\n",
+        "    dtype=torch.float16,\n",
        "    device_map='auto'\n",
        ")\n",
        "\n",
--- a/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py
+++ b/examples/fp4_finetuning/finetune_fp4_opt_bnb_peft.py
@ -55,7 +55,7 @@ model = AutoModelForCausalLM.from_pretrained(
        bnb_4bit_use_double_quant=True,
        bnb_4bit_quant_type="nf4",
    ),
-    torch_dtype=torch.float16,
+    dtype=torch.float16,
 )

 tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
--- a/examples/hra_dreambooth/train_dreambooth.py
+++ b/examples/hra_dreambooth/train_dreambooth.py
@ -141,16 +141,16 @@ def main(args):
        cur_class_images = len(list(class_images_dir.iterdir()))

        if cur_class_images < args.num_class_images:
-            torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
+            dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
            if args.prior_generation_precision == "fp32":
-                torch_dtype = torch.float32
+                dtype = torch.float32
            elif args.prior_generation_precision == "fp16":
-                torch_dtype = torch.float16
+                dtype = torch.float16
            elif args.prior_generation_precision == "bf16":
-                torch_dtype = torch.bfloat16
+                dtype = torch.bfloat16
            pipeline = DiffusionPipeline.from_pretrained(
                args.pretrained_model_name_or_path,
-                torch_dtype=torch_dtype,
+                dtype=dtype,
                safety_checker=None,
                revision=args.revision,
            )
--- a/examples/int8_training/Finetune_flan_t5_large_bnb_peft.ipynb
+++ b/examples/int8_training/Finetune_flan_t5_large_bnb_peft.ipynb
@ -196,7 +196,7 @@
     "name": "stderr",
     "output_type": "stream",
     "text": [
-      "Overriding torch_dtype=None with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning.\n"
+      "Overriding dtype=None with `dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass dtype=torch.float16 or don't pass this argument at all to remove this warning.\n"
     ]
    },
    {
@ -1201,7 +1201,7 @@
    "peft_model_id = \"ybelkada/flan-t5-large-financial-phrasebank-lora\"\n",
    "config = PeftConfig.from_pretrained(peft_model_id)\n",
    "\n",
-    "model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, torch_dtype=\"auto\", device_map=\"auto\")\n",
+    "model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, dtype=\"auto\", device_map=\"auto\")\n",
    "tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
    "\n",
    "# Load the Lora model\n",
--- a/examples/loftq_finetuning/README.md
+++ b/examples/loftq_finetuning/README.md
@ -24,7 +24,7 @@ MODEL_ID = "LoftQ/Mistral-7B-v0.1-4bit-64rank"

 base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_ID, 
-    torch_dtype=torch.bfloat16,  # you may change it with different models
+    dtype=torch.bfloat16,  # you may change it with different models
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,  # bfloat16 is recommended
@ -81,7 +81,7 @@ MODEL_DIR = "model_zoo/loftq/Llama-2-7b-hf-4bit-16rank"

 base_model = AutoModelForCausalLM.from_pretrained(
    MODEL_DIR, 
-    torch_dtype=torch.bfloat16,
+    dtype=torch.bfloat16,
    quantization_config=BitsAndBytesConfig(
        load_in_4bit=True,
        bnb_4bit_compute_dtype=torch.bfloat16,
--- a/examples/loftq_finetuning/train_gsm8k_llama.py
+++ b/examples/loftq_finetuning/train_gsm8k_llama.py
@ -454,7 +454,7 @@ def main():
                load_in_4bit=True,
                bnb_4bit_use_double_quant=False,
                bnb_4bit_quant_type="nf4",
-                bnb_4bit_compute_dtype=config.torch_dtype,
+                bnb_4bit_compute_dtype=config.dtype,
            ),
        )
    else:
--- a/examples/lora_dreambooth/train_dreambooth.py
+++ b/examples/lora_dreambooth/train_dreambooth.py
@ -628,16 +628,16 @@ def main(args):
        cur_class_images = len(list(class_images_dir.iterdir()))

        if cur_class_images < args.num_class_images:
-            torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
+            dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
            if args.prior_generation_precision == "fp32":
-                torch_dtype = torch.float32
+                dtype = torch.float32
            elif args.prior_generation_precision == "fp16":
-                torch_dtype = torch.float16
+                dtype = torch.float16
            elif args.prior_generation_precision == "bf16":
-                torch_dtype = torch.bfloat16
+                dtype = torch.bfloat16
            pipeline = DiffusionPipeline.from_pretrained(
                args.pretrained_model_name_or_path,
-                torch_dtype=torch_dtype,
+                dtype=dtype,
                safety_checker=None,
                revision=args.revision,
            )
--- a/examples/lorafa_finetune/lorafa_finetuning.py
+++ b/examples/lorafa_finetune/lorafa_finetuning.py
@ -72,14 +72,14 @@ def train_model(
                bnb_4bit_use_double_quant=False,
                bnb_4bit_quant_type="nf4",
            ),
-            torch_dtype=compute_dtype,
+            dtype=compute_dtype,
            device_map=device_map,
        )
        # setup for quantized training
        model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
    else:
        model = AutoModelForCausalLM.from_pretrained(
-            base_model_name_or_path, torch_dtype=compute_dtype, device_map=device_map
+            base_model_name_or_path, dtype=compute_dtype, device_map=device_map
        )

    # LoRA config for the PEFT model
--- a/examples/miss_finetuning/README.md
+++ b/examples/miss_finetuning/README.md
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset

-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
 tokenizer.pad_token_id = tokenizer.eos_token_id

@ -55,7 +55,7 @@ from peft import PeftModel
 from transformers import AutoModelForCausalLM

 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
+    "meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
 )
 peft_model = PeftModel.from_pretrained(model, "miss-llama-2-7b")
 ```
--- a/examples/miss_finetuning/miss_finetuning.py
+++ b/examples/miss_finetuning/miss_finetuning.py
@ -59,7 +59,7 @@ elif script_args.base_model_name_or_path is not None:
    print(f"No available pre-processed model, manually initialize a MiSS using {script_args.base_model_name_or_path}.")
    model = AutoModelForCausalLM.from_pretrained(
        script_args.base_model_name_or_path,
-        torch_dtype=(
+        dtype=(
            torch.float16
            if script_args.bits == "fp16"
            else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
--- a/examples/multi_adapter_examples/multi_adapter_weighted_inference_diffusers.ipynb
+++ b/examples/multi_adapter_examples/multi_adapter_weighted_inference_diffusers.ipynb
@ -689,7 +689,7 @@
    }
   ],
   "source": [
-    "model = model.to(dtype=torch.float16, device=device)\n",
+    "model = model.to(torch_dtype=torch.float16, device=device)\n",
    "\n",
    "pipe = DiffusionPipeline.from_pretrained(\n",
    "    model_id, unet=model, variant=\"fp16\", torch_dtype=torch.float16,\n",
@ -796,7 +796,7 @@
    }
   ],
   "source": [
-    "model = model.to(dtype=torch.float16, device=device)\n",
+    "model = model.to(torch_dtype=torch.float16, device=device)\n",
    "\n",
    "pipe = DiffusionPipeline.from_pretrained(\n",
    "   model_id, unet=model, variant=\"fp16\", torch_dtype=torch.float16,\n",
@ -868,7 +868,7 @@
    "del pipe\n",
    "\n",
    "pipe = DiffusionPipeline.from_pretrained(\n",
-    "    model_id, variant=\"fp16\", torch_dtype=torch.float16,\n",
+    "    model_id, variant=\"fp16\", dtype=torch.float16,\n",
    ").to(device)\n",
    "\n",
    "prompt = \"toy_face of a hacker with a hoodie, pixel art\"\n",
--- a/examples/oft_dreambooth/train_dreambooth.py
+++ b/examples/oft_dreambooth/train_dreambooth.py
@ -638,16 +638,16 @@ def main(args):
        cur_class_images = len(list(class_images_dir.iterdir()))

        if cur_class_images < args.num_class_images:
-            torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
+            dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
            if args.prior_generation_precision == "fp32":
-                torch_dtype = torch.float32
+                dtype = torch.float32
            elif args.prior_generation_precision == "fp16":
-                torch_dtype = torch.float16
+                dtype = torch.float16
            elif args.prior_generation_precision == "bf16":
-                torch_dtype = torch.bfloat16
+                dtype = torch.bfloat16
            pipeline = DiffusionPipeline.from_pretrained(
                args.pretrained_model_name_or_path,
-                torch_dtype=torch_dtype,
+                dtype=dtype,
                safety_checker=None,
                revision=args.revision,
            )
--- a/examples/olora_finetuning/README.md
+++ b/examples/olora_finetuning/README.md
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset

-model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
 dataset = load_dataset("imdb", split="train[:1%]")
 lora_config = LoraConfig(
--- a/examples/olora_finetuning/olora_finetuning.py
+++ b/examples/olora_finetuning/olora_finetuning.py
@ -44,7 +44,7 @@ def train(
    lora_alpha: int = 16,
    lora_dropout: float = 0.05,
    lora_target_modules: list[str] = None,
-    torch_dtype: str = "float16",
+    dtype: str = "float16",
    init_lora_weights="olora",
    seed: Optional[int] = None,
 ):
@ -57,7 +57,7 @@ def train(
    # Set seed
    if seed is not None:
        set_seed(seed)
-    model_kwargs = {"torch_dtype": getattr(torch, torch_dtype), "device_map": device_map}
+    model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
    if quantize:
        model_kwargs["quantization_config"] = BitsAndBytesConfig(
            load_in_4bit=True,
@ -170,7 +170,7 @@ if __name__ == "__main__":
    parser.add_argument("--lora_alpha", type=int, default=16)
    parser.add_argument("--lora_dropout", type=float, default=0.05)
    parser.add_argument("--lora_target_modules", type=str, default=None)
-    parser.add_argument("--torch_dtype", type=str, default="float16")
+    parser.add_argument("--dtype", type=str, default="float16")
    parser.add_argument("--init_lora_weights", type=str, default="olora")
    parser.add_argument("--seed", type=int, default=None)

@ -193,7 +193,7 @@ if __name__ == "__main__":
        lora_alpha=args.lora_alpha,
        lora_dropout=args.lora_dropout,
        lora_target_modules=args.lora_target_modules,
-        torch_dtype=args.torch_dtype,
+        dtype=args.dtype,
        init_lora_weights=args.init_lora_weights,
        seed=args.seed,
    )
--- a/examples/pissa_finetuning/README.md
+++ b/examples/pissa_finetuning/README.md
@ -10,7 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset

-model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
 tokenizer.pad_token_id = tokenizer.eos_token_id
 lora_config = LoraConfig(
@ -43,7 +43,7 @@ from peft import PeftModel
 from transformers import AutoModelForCausalLM

 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
+    "meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
 )
 # Performs SVD again to initialize the residual model and loads the state_dict of the fine-tuned PiSSA modules.
 peft_model = PeftModel.from_pretrained(model, "pissa-llama-2-7b")
@ -83,7 +83,7 @@ from peft import PeftModel
 from transformers import AutoModelForCausalLM

 model = AutoModelForCausalLM.from_pretrained(
-    "meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
+    "meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
 )
 # No SVD is performed during this step, and the base model remains unaltered.
 peft_model = PeftModel.from_pretrained(model, "pissa-llama-2-7b-lora")
--- a/examples/pissa_finetuning/pissa_finetuning.py
+++ b/examples/pissa_finetuning/pissa_finetuning.py
@ -75,7 +75,7 @@ if script_args.bits in ["nf4", "fp4", "int8"]:
 elif script_args.residual_model_name_or_path is not None:
    res_model = AutoModelForCausalLM.from_pretrained(
        script_args.residual_model_name_or_path,
-        torch_dtype=(
+        dtype=(
            torch.float16
            if script_args.bits == "fp16"
            else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
@ -94,7 +94,7 @@ elif script_args.base_model_name_or_path is not None:
    )
    model = AutoModelForCausalLM.from_pretrained(
        script_args.base_model_name_or_path,
-        torch_dtype=(
+        dtype=(
            torch.float16
            if script_args.bits == "fp16"
            else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
--- a/examples/pissa_finetuning/preprocess.py
+++ b/examples/pissa_finetuning/preprocess.py
@ -39,7 +39,7 @@ print(script_args)

 model = AutoModelForCausalLM.from_pretrained(
    script_args.base_model_name_or_path,
-    torch_dtype=(
+    dtype=(
        torch.float16
        if script_args.bits == "fp16"
        else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
--- a/examples/qalora_finetuning/qalora_gptq_finetuning.py
+++ b/examples/qalora_finetuning/qalora_gptq_finetuning.py
@ -44,7 +44,7 @@ def load_or_quantize_model(
        test_model = AutoModelForCausalLM.from_pretrained(
            base_model,
            device_map="auto",
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            trust_remote_code=True,  # Some GPTQ models might need this
        )

@ -95,7 +95,7 @@ def load_or_quantize_model(

    # Load and quantize the model
    model = AutoModelForCausalLM.from_pretrained(
-        base_model, device_map="auto", quantization_config=gptq_config, torch_dtype=torch.float16
+        base_model, device_map="auto", quantization_config=gptq_config, dtype=torch.float16
    )

    # Save the quantized model to cache
--- a/examples/randlora_finetuning/randlora_finetuning.py
+++ b/examples/randlora_finetuning/randlora_finetuning.py
@ -52,7 +52,7 @@ def train_model(
    device_type = device.type
    device_module = getattr(torch, device_type, torch.cuda)
    bf16_suppotrted = device_module.is_available() and device_module.is_bf16_supported()
-    torch_dtype = torch.bfloat16 if bf16_suppotrted else torch.float16
+    dtype = torch.bfloat16 if bf16_suppotrted else torch.float16

    # QRandLora (quantized randlora): IF YOU WANNA QUANTIZE THE MODEL
    if quantize:
@ -65,14 +65,14 @@ def train_model(
                bnb_4bit_use_double_quant=True,
                bnb_4bit_quant_type="nf4",
            ),
-            torch_dtype=torch_dtype,
+            dtype=dtype,
        )
        # setup for quantized training
        model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
    else:
        model = AutoModelForCausalLM.from_pretrained(
            base_model,
-            torch_dtype=torch_dtype,
+            dtype=dtype,
            token=hf_token,
        )
    # LoRa config for the PEFT model
--- a/examples/sequence_classification/LoRA-torchao-8bit-dynamic-activation.ipynb
+++ b/examples/sequence_classification/LoRA-torchao-8bit-dynamic-activation.ipynb
@ -207,7 +207,7 @@
   "source": [
    "quant_config = TorchAoConfig(quant_type=\"int8_dynamic_activation_int8_weight\")\n",
    "model = AutoModelForSequenceClassification.from_pretrained(\n",
-    "    model_name_or_path, return_dict=True, device_map=0, torch_dtype=torch.bfloat16, quantization_config=quant_config\n",
+    "    model_name_or_path, return_dict=True, device_map=0, dtype=torch.bfloat16, quantization_config=quant_config\n",
    ")"
   ]
  },
--- a/examples/sequence_classification/LoRA-torchao-8bit.ipynb
+++ b/examples/sequence_classification/LoRA-torchao-8bit.ipynb
@ -207,7 +207,7 @@
   "source": [
    "quant_config = TorchAoConfig(quant_type=\"int8_weight_only\")\n",
    "model = AutoModelForSequenceClassification.from_pretrained(\n",
-    "    model_name_or_path, return_dict=True, device_map=0, torch_dtype=torch.bfloat16, quantization_config=quant_config\n",
+    "    model_name_or_path, return_dict=True, device_map=0, dtype=torch.bfloat16, quantization_config=quant_config\n",
    ")"
   ]
  },
--- a/examples/sft/utils.py
+++ b/examples/sft/utils.py
@ -129,14 +129,12 @@ def create_and_prepare_model(args, data_args, training_args):
            load_in_4bit=args.use_4bit_quantization,
        )
    else:
-        torch_dtype = (
-            quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32
-        )
+        dtype = quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32

        # Prepare model loading arguments
        model_kwargs = {
            "trust_remote_code": True,
-            "torch_dtype": torch_dtype,
+            "dtype": dtype,
        }
        if args.use_flash_attn:
            if torch.xpu.is_available():
--- a/examples/shira_finetuning/README.md
+++ b/examples/shira_finetuning/README.md
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
 from trl import SFTConfig, SFTTrainer
 from datasets import load_dataset

-model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.bfloat16, device_map="auto")
+model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", dtype=torch.bfloat16, device_map="auto")
 tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
 dataset = load_dataset("imdb", split="train[:1%]")
 shira_config = ShiraConfig(
--- a/examples/shira_finetuning/shira_finetuning.py
+++ b/examples/shira_finetuning/shira_finetuning.py
@ -42,7 +42,7 @@ def train(
    device_map: str = "auto",
    shira_r: int = 32,
    shira_target_modules: list[str] = None,
-    torch_dtype: str = "float16",
+    dtype: str = "float16",
    seed: Optional[int] = None,
    use_custom_random_mask_function_with_custom_kwargs: Optional[bool] = False,
 ):
@ -55,7 +55,7 @@ def train(
    # Set seed
    if seed is not None:
        set_seed(seed)
-    model_kwargs = {"torch_dtype": getattr(torch, torch_dtype), "device_map": device_map}
+    model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
    model = AutoModelForCausalLM.from_pretrained(base_model, **model_kwargs)

    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
@ -191,7 +191,7 @@ if __name__ == "__main__":
    parser.add_argument("--device_map", type=str, default="auto")
    parser.add_argument("--shira_r", type=int, default=32)
    parser.add_argument("--shira_target_modules", type=str, default=None)
-    parser.add_argument("--torch_dtype", type=str, default="float16")
+    parser.add_argument("--dtype", type=str, default="float16")
    parser.add_argument("--seed", type=int, default=None)
    parser.add_argument("--use_custom_random_mask_function_with_custom_kwargs", action="store_true")

@ -211,7 +211,7 @@ if __name__ == "__main__":
        device_map=args.device_map,
        shira_r=args.shira_r,
        shira_target_modules=args.shira_target_modules,
-        torch_dtype=args.torch_dtype,
+        dtype=args.dtype,
        seed=args.seed,
        use_custom_random_mask_function_with_custom_kwargs=args.use_custom_random_mask_function_with_custom_kwargs,
    )
--- a/examples/stable_diffusion/train_dreambooth.py
+++ b/examples/stable_diffusion/train_dreambooth.py
@ -802,16 +802,16 @@ def main(args):
        cur_class_images = len(list(class_images_dir.iterdir()))

        if cur_class_images < args.num_class_images:
-            torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
+            dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
            if args.prior_generation_precision == "fp32":
-                torch_dtype = torch.float32
+                dtype = torch.float32
            elif args.prior_generation_precision == "fp16":
-                torch_dtype = torch.float16
+                dtype = torch.float16
            elif args.prior_generation_precision == "bf16":
-                torch_dtype = torch.bfloat16
+                dtype = torch.bfloat16
            pipeline = DiffusionPipeline.from_pretrained(
                args.pretrained_model_name_or_path,
-                torch_dtype=torch_dtype,
+                dtype=dtype,
                safety_checker=None,
                revision=args.revision,
            )
--- a/examples/waveft_finetuning/waveft_finetuning.py
+++ b/examples/waveft_finetuning/waveft_finetuning.py
@ -44,7 +44,7 @@ def train(
    waveft_scaling: float = 25.0,
    waveft_wavelet_family: str = "db1",
    waveft_use_idwt: bool = True,
-    torch_dtype: str = "float16",
+    dtype: str = "float16",
    seed: Optional[int] = None,
 ):
    # Set device_map to the right place when enabling DDP.
@ -56,7 +56,7 @@ def train(
    # Set seed
    if seed is not None:
        set_seed(seed)
-    model_kwargs = {"dtype": getattr(torch, torch_dtype), "device_map": device_map}
+    model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
    model = AutoModelForCausalLM.from_pretrained(base_model, **model_kwargs)

    tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
@ -162,7 +162,7 @@ if __name__ == "__main__":
    parser.add_argument("--waveft_scaling", type=float, default=25.0)
    parser.add_argument("--waveft_wavelet_family", type=str, default="db1")
    parser.add_argument("--waveft_use_idwt", action="store_true", default=True)
-    parser.add_argument("--torch_dtype", type=str, default="float16")
+    parser.add_argument("--dtype", type=str, default="float16")
    parser.add_argument("--seed", type=int, default=None)

    args = parser.parse_args()
@ -184,6 +184,6 @@ if __name__ == "__main__":
        waveft_scaling=args.waveft_scaling,
        waveft_wavelet_family=args.waveft_wavelet_family,
        waveft_use_idwt=args.waveft_use_idwt,
-        torch_dtype=args.torch_dtype,
+        dtype=args.dtype,
        seed=args.seed,
    )
--- a/method_comparison/MetaMathQA/experiments/lora/llama-3.2-3B-rank10-target-mlp/adapter_config.json
+++ b/method_comparison/MetaMathQA/experiments/lora/llama-3.2-3B-rank10-target-mlp/adapter_config.json
@ -0,0 +1,30 @@
+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": null,
+  "bias": "none",
+  "corda_config": null,
+  "eva_config": null,
+  "exclude_modules": null,
+  "fan_in_fan_out": false,
+  "inference_mode": false,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 20,
+  "lora_bias": false,
+  "lora_dropout": 0.0,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 10,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": ["gate_proj", "up_proj", "down_proj"],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}
--- a/method_comparison/MetaMathQA/run.py
+++ b/method_comparison/MetaMathQA/run.py
@ -25,11 +25,12 @@ import random
 import sys
 import textwrap
 import time
-from contextlib import AbstractContextManager, nullcontext
+from contextlib import nullcontext
 from functools import partial
 from typing import Any, Callable, Literal, Optional

 import torch
+from data import get_train_valid_test_datasets
 from torch import nn
 from torch.amp import GradScaler, autocast
 from tqdm import tqdm
@ -53,9 +54,8 @@ from utils import (
    validate_experiment_path,
 )

-from data import get_train_valid_test_datasets
 from peft import AdaLoraConfig, PeftConfig
-from peft.utils import infer_device, CONFIG_NAME
+from peft.utils import CONFIG_NAME, infer_device


 # # suppress all warnings
--- a/method_comparison/MetaMathQA/utils.py
+++ b/method_comparison/MetaMathQA/utils.py
@ -44,7 +44,8 @@ from transformers import (
 import peft
 from peft import PeftConfig, get_peft_model, prepare_model_for_kbit_training
 from peft.optimizers import create_lorafa_optimizer, create_loraplus_optimizer
-from peft.utils import infer_device, SAFETENSORS_WEIGHTS_NAME
+from peft.utils import SAFETENSORS_WEIGHTS_NAME, infer_device
+

 device = infer_device()

--- a/method_comparison/text_generation_benchmark/utils.py
+++ b/method_comparison/text_generation_benchmark/utils.py
@ -24,11 +24,12 @@ import subprocess
 from dataclasses import asdict, dataclass, field
 from enum import Enum
 from typing import Any, Callable, Optional
-from peft.utils import infer_device

 import psutil
 import torch

+from peft.utils import infer_device
+

 FILE_NAME_BENCHMARK_PARAMS = "benchmark_params.json"
 FILE_NAME_DEFAULT_CONFIG = "default_benchmark_params.json"
--- a/src/peft/utils/constants.py
+++ b/src/peft/utils/constants.py
@ -100,6 +100,8 @@ TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
    "gemma3_text": ["q_proj", "v_proj"],
    "qwen2": ["q_proj", "v_proj"],
    "qwen3": ["q_proj", "v_proj"],
+    "rwkv": ["key", "value", "receptance", "output"],
+    "rwkv7": ["r_proj", "k_proj", "v_proj", "o_proj", "key", "value"],
 }

 # target module mappings that are identical to LORA
--- a/tests/bnb/test_bnb_regression.py
+++ b/tests/bnb/test_bnb_regression.py
@ -60,7 +60,7 @@ def test_opt_350m_4bit():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -79,7 +79,7 @@ def test_opt_350m_8bit():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -102,7 +102,7 @@ def test_opt_350m_4bit_double_quant():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -125,7 +125,7 @@ def test_opt_350m_4bit_compute_dtype_float16():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -149,7 +149,7 @@ def test_opt_350m_4bit_quant_type_nf4():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -174,7 +174,7 @@ def test_opt_350m_4bit_quant_storage():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -196,7 +196,7 @@ def test_opt_350m_8bit_threshold():
    model = AutoModelForCausalLM.from_pretrained(
        "facebook/opt-350m",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -224,7 +224,7 @@ def test_flan_t5_4bit():
    model = AutoModelForSeq2SeqLM.from_pretrained(
        "google/flan-t5-base",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -245,7 +245,7 @@ def test_flan_t5_8bit():
    model = AutoModelForSeq2SeqLM.from_pretrained(
        "google/flan-t5-base",
        quantization_config=bnb_config,
-        torch_dtype=torch.float32,
+        dtype=torch.float32,
    )

    input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
--- a/tests/regression/test_regression.py
+++ b/tests/regression/test_regression.py
@ -623,7 +623,7 @@ class TestOpt4bitBnb(RegressionTester):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-350m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        return model

--- a/tests/test_adaption_prompt.py
+++ b/tests/test_adaption_prompt.py
@ -388,7 +388,7 @@ class TestAdaptionPrompt:

        """Test that AdaptionPrompt works when Llama using a half-precision model."""
        input_ids = torch.LongTensor([[1, 1, 1], [2, 1, 2]]).to(self.torch_device)
-        original = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+        original = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
        adapted = get_peft_model(
            original, AdaptionPromptConfig(adapter_layers=2, adapter_len=4, task_type="CAUSAL_LM")
        )
--- a/tests/test_arrow.py
+++ b/tests/test_arrow.py
@ -343,7 +343,7 @@ class TestArrowRouting:

        # Create base in fp16 (no manual assignment to .dtype)
        with hub_online_once(model_id):
-            base = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
+            base = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)

        cfg = ArrowConfig(top_k=2)

@ -353,7 +353,7 @@ class TestArrowRouting:
            task_specific_adapter_paths=ts_adapters,
            arrow_config=cfg,
            autocast_adapter_dtype=False,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        ).eval()

        X = {
--- a/tests/test_auto.py
+++ b/tests/test_auto.py
@ -52,14 +52,14 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModelForCausalLM)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForCausalLM.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForCausalLM)
        assert model.base_model.lm_head.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
+        _ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_peft_causal_lm_extended_vocab(self):
        model_id = "peft-internal-testing/tiny-random-OPTForCausalLM-extended-vocab"
@ -67,14 +67,14 @@ class TestPeftAutoModel:
        assert isinstance(model, PeftModelForCausalLM)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForCausalLM.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForCausalLM)
        assert model.base_model.lm_head.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
+        _ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_peft_seq2seq_lm(self):
        model_id = "peft-internal-testing/tiny_T5ForSeq2SeqLM-lora"
@ -88,14 +88,14 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModelForSeq2SeqLM)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForSeq2SeqLM)
        assert model.base_model.lm_head.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
+        _ = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_peft_sequence_cls(self):
        model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
@ -109,7 +109,7 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModelForSequenceClassification)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForSequenceClassification)
        assert model.score.original_module.weight.dtype == self.dtype

@ -117,7 +117,7 @@ class TestPeftAutoModel:
        is_trainable = False
        # This should work
        _ = AutoPeftModelForSequenceClassification.from_pretrained(
-            model_id, adapter_name, is_trainable, torch_dtype=self.dtype
+            model_id, adapter_name, is_trainable, dtype=self.dtype
        )

    def test_peft_token_classification(self):
@ -132,16 +132,14 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModelForTokenClassification)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForTokenClassification.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForTokenClassification.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForTokenClassification)
        assert model.base_model.classifier.original_module.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModelForTokenClassification.from_pretrained(
-            model_id, adapter_name, is_trainable, torch_dtype=self.dtype
-        )
+        _ = AutoPeftModelForTokenClassification.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_peft_question_answering(self):
        model_id = "peft-internal-testing/tiny_OPTForQuestionAnswering-lora"
@ -155,16 +153,14 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModelForQuestionAnswering)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForQuestionAnswering)
        assert model.base_model.qa_outputs.original_module.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModelForQuestionAnswering.from_pretrained(
-            model_id, adapter_name, is_trainable, torch_dtype=self.dtype
-        )
+        _ = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_peft_feature_extraction(self):
        model_id = "peft-internal-testing/tiny_OPTForFeatureExtraction-lora"
@ -178,16 +174,14 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModelForFeatureExtraction)

        # check if kwargs are passed correctly
-        model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModelForFeatureExtraction)
        assert model.base_model.model.decoder.embed_tokens.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModelForFeatureExtraction.from_pretrained(
-            model_id, adapter_name, is_trainable, torch_dtype=self.dtype
-        )
+        _ = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_peft_whisper(self):
        model_id = "peft-internal-testing/tiny_WhisperForConditionalGeneration-lora"
@ -201,14 +195,14 @@ class TestPeftAutoModel:
            assert isinstance(model, PeftModel)

        # check if kwargs are passed correctly
-        model = AutoPeftModel.from_pretrained(model_id, torch_dtype=self.dtype)
+        model = AutoPeftModel.from_pretrained(model_id, dtype=self.dtype)
        assert isinstance(model, PeftModel)
        assert model.base_model.model.model.encoder.embed_positions.weight.dtype == self.dtype

        adapter_name = "default"
        is_trainable = False
        # This should work
-        _ = AutoPeftModel.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
+        _ = AutoPeftModel.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)

    def test_embedding_size_not_reduced_if_greater_vocab_size(self, tmp_path):
        # See 2415
--- a/tests/test_common_gpu.py
+++ b/tests/test_common_gpu.py
@ -527,7 +527,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        quantization_config = GPTQConfig(bits=4, use_exllama=False)
        kwargs = {
            "pretrained_model_name_or_path": model_id,
-            "torch_dtype": torch.float16,
+            "dtype": torch.float16,
            "device_map": "auto",
            "quantization_config": quantization_config,
        }
@ -850,7 +850,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = LlamaForCausalLM.from_pretrained(
            "trl-internal-testing/tiny-random-LlamaForCausalLM",
            quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
        )

@ -873,7 +873,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = LlamaForCausalLM.from_pretrained(
            "trl-internal-testing/tiny-random-LlamaForCausalLM",
            quantization_config=BitsAndBytesConfig(load_in_4bit=True),
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
        )

@ -939,7 +939,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForSequenceClassification.from_pretrained(
            model_id,
            quantization_config=BitsAndBytesConfig(load_in_4bit=True),
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )

        model = prepare_model_for_kbit_training(model)
@ -1080,7 +1080,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
        # compare outputs in probability space, because logits can have outliers
@ -1122,7 +1122,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
        # compare outputs in probability space, because logits can have outliers
@ -1165,7 +1165,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
        # compare outputs in probability space, because logits can have outliers
@ -1206,7 +1206,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()
        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
        # input with 9 samples
@ -1274,7 +1274,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()
        tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
        # input with 9 samples
@ -1359,7 +1359,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )

        torch.manual_seed(0)
@ -1372,7 +1372,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        torch.manual_seed(0)
        config_dora = LoraConfig(r=8, init_lora_weights=False, use_dora=True)
@ -1394,7 +1394,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        torch.manual_seed(0)
@ -1407,7 +1407,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        torch.manual_seed(0)
        config_dora = LoraConfig(r=8, init_lora_weights=False, use_dora=True)
@ -1434,7 +1434,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "trl-internal-testing/tiny-random-LlamaForCausalLM",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()
        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
        # compare outputs in probability space, because logits can have outliers
@ -1485,7 +1485,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
@ -1534,7 +1534,7 @@ class PeftGPUCommonTests(unittest.TestCase):

        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        config = LoraConfig(
@ -1586,7 +1586,7 @@ class PeftGPUCommonTests(unittest.TestCase):

        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        config = LoraConfig(
@ -1618,7 +1618,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=BitsAndBytesConfig(load_in_8bit=True),
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
@ -1665,7 +1665,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            "trl-internal-testing/tiny-random-LlamaForCausalLM",
            quantization_config=bnb_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()
        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
        # compare outputs in probability space, because logits can have outliers
@ -1705,7 +1705,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        # check for different result with and without apply_GS
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        torch.manual_seed(0)
@ -1717,7 +1717,7 @@ class PeftGPUCommonTests(unittest.TestCase):

        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        torch.manual_seed(0)
        config_hra_GS = HRAConfig(r=8, init_weights=True, apply_GS=True)
@ -1759,7 +1759,7 @@ class PeftGPUCommonTests(unittest.TestCase):
        # when r is an odd number
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        ).eval()

        random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
--- a/tests/test_custom_models.py
+++ b/tests/test_custom_models.py
@ -1576,57 +1576,57 @@ class MockTransformerWrapper:
    """

    @classmethod
-    def from_pretrained(cls, model_id, torch_dtype=None):
+    def from_pretrained(cls, model_id, dtype=None):
        # set the seed so that from_pretrained always returns the same model
        torch.manual_seed(0)

-        if torch_dtype is None:
-            torch_dtype = torch.float32
+        if dtype is None:
+            dtype = torch.float32

        if model_id == "MLP":
-            return MLP().to(torch_dtype)
+            return MLP().to(dtype)

        if model_id == "EmbConv1D":
-            return ModelEmbConv1D().to(torch_dtype)
+            return ModelEmbConv1D().to(dtype)

        if model_id == "Conv1d":
-            return ModelConv1D().to(torch_dtype)
+            return ModelConv1D().to(dtype)

        if model_id == "Conv1dBigger":
-            return ModelConv1DBigger().to(torch_dtype)
+            return ModelConv1DBigger().to(dtype)

        if model_id == "Conv2d":
-            return ModelConv2D().to(torch_dtype)
+            return ModelConv2D().to(dtype)

        if model_id == "Conv2d1x1":
-            return ModelConv2D1x1().to(torch_dtype)
+            return ModelConv2D1x1().to(dtype)

        if model_id == "Conv1dKernel1":
-            return ModelConv1DKernel1().to(torch_dtype)
+            return ModelConv1DKernel1().to(dtype)

        if model_id == "Conv2dGroups":
-            return ModelConv2DGroups().to(torch_dtype)
+            return ModelConv2DGroups().to(dtype)

        if model_id == "Conv2dGroups2":
-            return ModelConv2DGroups2().to(torch_dtype)
+            return ModelConv2DGroups2().to(dtype)

        if model_id == "Conv3d":
-            return ModelConv3D().to(torch_dtype)
+            return ModelConv3D().to(dtype)

        if model_id == "MLP_LayerNorm":
-            return MLP_LayerNorm().to(torch_dtype)
+            return MLP_LayerNorm().to(dtype)

        if model_id == "MLP2":
-            return MLP2().to(torch_dtype)
+            return MLP2().to(dtype)

        if model_id == "Conv2d2":
-            return ModelConv2D2().to(torch_dtype)
+            return ModelConv2D2().to(dtype)

        if model_id == "MHA":
-            return ModelMha().to(torch_dtype)
+            return ModelMha().to(dtype)

        if model_id == "MlpUsingParameters":
-            return MlpUsingParameters().to(torch_dtype)
+            return MlpUsingParameters().to(dtype)

        raise ValueError(f"model_id {model_id} not implemented")

@ -1827,7 +1827,7 @@ class TestPeftCustomModel(PeftCommonTester):
            pytest.skip(reason="MacOS does not support multiple ops in float16")

        X = self.prepare_inputs_for_testing()
-        model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16).to(self.torch_device)
+        model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16).to(self.torch_device)
        model.dtype = torch.float16
        config = config_cls(
            base_model_name_or_path=model_id,
@ -1869,7 +1869,7 @@ class TestPeftCustomModel(PeftCommonTester):
            pytest.skip(reason="MacOS does not support multiple ops in bfloat16")

        X = self.prepare_inputs_for_testing()
-        model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(self.torch_device)
+        model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16).to(self.torch_device)
        model.dtype = torch.bfloat16
        config = config_cls(
            base_model_name_or_path=model_id,
@ -1910,7 +1910,7 @@ class TestPeftCustomModel(PeftCommonTester):
            pytest.skip(reason="MacOS does not support multiple ops in float16")

        X = self.prepare_inputs_for_testing()
-        model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16).to(self.torch_device)
+        model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16).to(self.torch_device)
        model.dtype = torch.float16
        config = config_cls(
            base_model_name_or_path=model_id,
@ -1951,7 +1951,7 @@ class TestPeftCustomModel(PeftCommonTester):
            pytest.skip(reason="MacOS does not support multiple ops in bfloat16")

        X = self.prepare_inputs_for_testing()
-        model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(self.torch_device)
+        model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16).to(self.torch_device)
        model.dtype = torch.bfloat16
        config = config_cls(
            base_model_name_or_path=model_id,
--- a/tests/test_gptqmodel.py
+++ b/tests/test_gptqmodel.py
@ -77,7 +77,7 @@ class PeftGPTQModelCommonTests(unittest.TestCase):
        quantization_config = GPTQConfig(bits=4, use_exllama=False)
        kwargs = {
            "pretrained_model_name_or_path": model_id,
-            "torch_dtype": torch.float16,
+            "dtype": torch.float16,
            "device_map": "auto",
            "quantization_config": quantization_config,
        }
@ -114,7 +114,7 @@ class PeftGPTQModelCommonTests(unittest.TestCase):
        quantization_config = GPTQConfig(bits=4, use_exllama=False)
        kwargs = {
            "pretrained_model_name_or_path": model_id,
-            "torch_dtype": torch.float16,
+            "dtype": torch.float16,
            "device_map": "auto",
            "quantization_config": quantization_config,
        }
@ -179,7 +179,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map="auto",
                quantization_config=self.quantization_config,
            )
@ -232,7 +232,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map="auto",
                quantization_config=self.quantization_config,
            )
@ -284,7 +284,7 @@ class PeftGPTQModelTests(unittest.TestCase):

        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -353,7 +353,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map="auto",
                quantization_config=self.quantization_config,
            )
@ -416,7 +416,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map="auto",
                quantization_config=self.quantization_config,
            )
@ -478,7 +478,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        # default adapter name
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -489,7 +489,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        # other adapter name
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -514,7 +514,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        # default adapter name
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -525,7 +525,7 @@ class PeftGPTQModelTests(unittest.TestCase):
        # other adapter name
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
--- a/tests/test_gpu_examples.py
+++ b/tests/test_gpu_examples.py
@ -2075,7 +2075,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map="auto",
                quantization_config=self.quantization_config,
            )
@ -2128,7 +2128,7 @@ class PeftGPTQGPUTests(unittest.TestCase):

        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -2201,7 +2201,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map="auto",
                quantization_config=self.quantization_config,
            )
@ -2279,7 +2279,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
        with tempfile.TemporaryDirectory() as tmp_dir:
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
-                torch_dtype=torch.float16,
+                dtype=torch.float16,
                device_map=device_map,
                quantization_config=self.quantization_config,
            )
@ -2344,7 +2344,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
        # default adapter name
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -2355,7 +2355,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
        # other adapter name
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
            device_map="auto",
            quantization_config=self.quantization_config,
        )
@ -2918,7 +2918,7 @@ class TestLoftQ:
        clear_device_cache(garbage_collection=True)

        # now load quantized model and apply LoftQ-initialized weights on top
-        base_model = self.get_base_model(tmp_path / "base_model", device=device, **kwargs, torch_dtype=torch.float32)
+        base_model = self.get_base_model(tmp_path / "base_model", device=device, **kwargs, dtype=torch.float32)
        loftq_model = PeftModel.from_pretrained(base_model, tmp_path / "loftq_model", is_trainable=True)

        # TODO sanity check: model is quantized
@ -3226,7 +3226,7 @@ class MixedPrecisionTests(unittest.TestCase):
        # which should not use fp16.
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )
        model = get_peft_model(model, self.config, autocast_adapter_dtype=False)

@ -3250,7 +3250,7 @@ class MixedPrecisionTests(unittest.TestCase):
        # No exception should be raised.
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )
        model = get_peft_model(model, self.config, autocast_adapter_dtype=True)

@ -3272,7 +3272,7 @@ class MixedPrecisionTests(unittest.TestCase):
        # Same test as above but containing the fix to make it work
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )
        model = get_peft_model(model, self.config, autocast_adapter_dtype=False)

@ -3284,7 +3284,7 @@ class MixedPrecisionTests(unittest.TestCase):
        dtype_counts_before = Counter(p.dtype for p in model.parameters())
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )

        model = get_peft_model(model, self.config, autocast_adapter_dtype=True)
@ -3309,13 +3309,13 @@ class MixedPrecisionTests(unittest.TestCase):
        # Same as previous tests, but loading the adapter with PeftModel.from_pretrained instead
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )
        model = get_peft_model(model, self.config, autocast_adapter_dtype=False)

        with tempfile.TemporaryDirectory() as tmp_dir:
            model.save_pretrained(tmp_dir)
-            model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
+            model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
            model = PeftModel.from_pretrained(model, tmp_dir, autocast_adapter_dtype=False, is_trainable=True)

            trainer = Trainer(
@ -3336,7 +3336,7 @@ class MixedPrecisionTests(unittest.TestCase):
        # Same as previous tests, but loading the adapter with PeftModel.from_pretrained instead
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )
        # Below, we purposefully set autocast_adapter_dtype=False so that the saved adapter uses float16. We still want
        # the loaded adapter to use float32 when we load it with autocast_adapter_dtype=True.
@ -3349,7 +3349,7 @@ class MixedPrecisionTests(unittest.TestCase):

        with tempfile.TemporaryDirectory() as tmp_dir:
            model.save_pretrained(tmp_dir)
-            model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
+            model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
            model = PeftModel.from_pretrained(model, tmp_dir, autocast_adapter_dtype=True, is_trainable=True)
            # sanity check: this should NOT have float16 adapter weights:
            assert (
@ -3376,7 +3376,7 @@ class MixedPrecisionTests(unittest.TestCase):
        # load_model(..., autocast_adapter_dtype=True) (the default).
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
-            torch_dtype=torch.float16,
+            dtype=torch.float16,
        )
        # Below, we purposefully set autocast_adapter_dtype=False so that the saved adapter uses float16. We still want
        # the loaded adapter to use float32 when we load it with autocast_adapter_dtype=True.
@ -3389,7 +3389,7 @@ class MixedPrecisionTests(unittest.TestCase):

        with tempfile.TemporaryDirectory() as tmp_dir:
            model.save_pretrained(tmp_dir)
-            model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
+            model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
            # the default adapter is now in float16
            model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
            # sanity check: this should NOT have float16 adapter weights:
@ -3498,7 +3498,7 @@ class PeftAqlmGPUTests(unittest.TestCase):
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
                device_map="cuda",
-                torch_dtype="auto",
+                dtype="auto",
            )

            model = prepare_model_for_kbit_training(model)
@ -3584,7 +3584,7 @@ class PeftHqqGPUTests(unittest.TestCase):
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
                device_map=device,
-                torch_dtype=compute_dtype,
+                dtype=compute_dtype,
                quantization_config=quant_config,
            )

@ -3642,7 +3642,7 @@ class PeftHqqGPUTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
            device_map=device,
-            torch_dtype=compute_dtype,
+            dtype=compute_dtype,
        )
        config = LoraConfig(
            target_modules=["q_proj", "v_proj"],
@ -3665,7 +3665,7 @@ class PeftHqqGPUTests(unittest.TestCase):
        model = AutoModelForCausalLM.from_pretrained(
            self.causal_lm_model_id,
            device_map=device,
-            torch_dtype=compute_dtype,
+            dtype=compute_dtype,
            quantization_config=quant_config,
        )
        torch.manual_seed(0)
@ -3698,7 +3698,7 @@ class PeftHqqGPUTests(unittest.TestCase):
            model = AutoModelForCausalLM.from_pretrained(
                self.causal_lm_model_id,
                device_map=device,
-                torch_dtype=compute_dtype,
+                dtype=compute_dtype,
                quantization_config=quant_config,
            )
            model = PeftModel.from_pretrained(model, tmp_dir)
@ -4264,7 +4264,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
                self.causal_lm_model_id,
                device_map=device_map,
                quantization_config=quantization_config,
-                torch_dtype=torch.bfloat16,
+                dtype=torch.bfloat16,
            )

            assert set(model.hf_device_map.values()) == set(range(device_count))
@ -4345,7 +4345,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
            self.causal_lm_model_id,
            device_map=device_map,
            quantization_config=quantization_config,
-            torch_dtype=torch.bfloat16,
+            dtype=torch.bfloat16,
        )

        assert set(model.hf_device_map.values()) == set(range(device_count))
@ -4589,7 +4589,7 @@ class TestFSDPWrap:
        model = AutoModelForCausalLM.from_pretrained(
            "facebook/opt-125m",
            quantization_config=quant_config,
-            torch_dtype=torch.float32,
+            dtype=torch.float32,
        )
        # model = prepare_model_for_kbit_training(model)
        config = LoraConfig(
@ -5345,7 +5345,7 @@ class TestArrowQuantized:
            # Load quantized base model
            base_model = AutoModelForCausalLM.from_pretrained(
                model_id,
-                torch_dtype=torch.bfloat16,
+                dtype=torch.bfloat16,
                device_map="auto",
                quantization_config=bnb_config,
            )
--- a/tests/test_lora_variants.py
+++ b/tests/test_lora_variants.py
@ -91,9 +91,9 @@ class MockTransformerWrapper:
        # set the seed so that from_pretrained always returns the same model
        torch.manual_seed(0)

-        torch_dtype = torch.float32
+        dtype = torch.float32

-        return DummyLM().to(torch_dtype)
+        return DummyLM().to(dtype)


 VARIANT_MAP = {
--- a/tests/test_multitask_prompt_tuning.py
+++ b/tests/test_multitask_prompt_tuning.py
@ -217,7 +217,7 @@ class TestMultiTaskPromptTuning:
        input_ids = torch.LongTensor([[1, 1, 1], [2, 1, 2]]).to(self.torch_device)
        task_ids = torch.tensor([1, 2]).to(self.torch_device)

-        original = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+        original = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16)
        mpt = get_peft_model(original, config)
        mpt = mpt.to(self.torch_device)
        _ = mpt.generate(input_ids=input_ids, task_ids=task_ids)
--- a/tests/testing_common.py
+++ b/tests/testing_common.py
@ -603,7 +603,7 @@ class PeftCommonTester:
            self.skipTest("PyTorch 2.1 not supported for Half of addmm_impl_cpu_ ")

        with hub_online_once(model_id):
-            model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16)
+            model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16)
            config = config_cls(
                base_model_name_or_path=model_id,
                **config_kwargs,
@ -1142,7 +1142,7 @@ class PeftCommonTester:
            return pytest.skip("BFloat16 is not supported on MPS")

        with hub_online_once(model_id):
-            model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16)
+            model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
            config = config_cls(
                base_model_name_or_path=model_id,
                **config_kwargs,