Compare commits

...

3 Commits

Author SHA1 Message Date
8d8aa0b716 Method comparison: LoRA that targets MLP modules (#2845)
The "LoRA Without Regret" blog
post (https://thinkingmachines.ai/blog/lora/) mentions that targeting
the MLP part of the transformer is more effective than targeting the
attention modules. This experiment tests this by targeting:

["gate_proj", "up_proj", "down_proj"]

instead of the default layers (["q_proj", "v_proj"]).

I chose a rank to match the parameter count we would get when targeting
the attention modules with rank 32, which is rank 10. Testing on my
machine, there is indeed a nice improvement in the test score:

| metric               | target attention | target MLP |
|----------------------|------------------|------------|
| test accuracy        | 48.2%            | 51.3%      |
| # trainable params   | 9175040          | 9461760    |
| peak memory reserved | 20.74 GB         | 23.02 GB   |

There is, however, also a marked increase in memory usage, despite
matching parameter count. Since the operations are different, this may
not be a surprise, but let's wait for the final verdict once this
experiment runs on our AWS instance.

Note: I also tested higher and lower ranks when targeting the MLP. The
effect on memory usage was negligible, but it did improve the score:

| metric             | rank 8  | rank 10 | rank 12  | rank 32  |
|--------------------|---------|---------|----------|----------|
| test accuracy      | 50.3%   | 51.3%   | 52.2%    | 54.8%    |
| # trainable params | 7569408 | 9461760 | 11354112 | 30277632 |

In the end, I chose only to add the rank 10 experiment to match the
number of trainable parameters.
2025-10-16 17:37:02 +02:00
Nir
182f4c945a ENH Add RWKV default target modules (#2810) 2025-10-16 16:30:51 +02:00
1a1f97263d CHORE Replace deprecated torch_dtype with dtype (#2837)
Note: Diffusers is left as is for now, might need an update later.
2025-10-16 14:59:09 +02:00
56 changed files with 241 additions and 215 deletions

View File

@ -263,11 +263,11 @@ model = AutoModelForCausalLM.from_pretrained(
quantization_config=bnb_config,
trust_remote_code=True,
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
+ torch_dtype=quant_storage_dtype or torch.float32,
+ dtype=quant_storage_dtype or torch.float32,
)
```
Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
## Memory usage

View File

@ -264,11 +264,11 @@ model = AutoModelForCausalLM.from_pretrained(
quantization_config=bnb_config,
trust_remote_code=True,
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
+ torch_dtype=quant_storage_dtype or torch.float32,
+ dtype=quant_storage_dtype or torch.float32,
)
```
Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
## Memory usage

View File

@ -539,7 +539,7 @@ from peft import PeftModel
import torch
base_model = AutoModelForCausalLM.from_pretrained(
"mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, device_map="auto"
"mistralai/Mistral-7B-v0.1", dtype=torch.float16, device_map="auto"
)
```
@ -813,7 +813,7 @@ To encode general knowledge, GenKnowSub subtracts the average of the provided ge
> # Loading the model
> base_model = AutoModelForCausalLM.from_pretrained(
> "microsoft/Phi-3-mini-4k-instruct",
> torch_dtype=torch.bfloat16,
> dtype=torch.bfloat16,
> device_map="auto",
> quantization_config=bnb_config,
> )

View File

@ -144,7 +144,7 @@ The models support LoRA adapter tuning. To tune the quantized model you'll need
```py
quantized_model = AutoModelForCausalLM.from_pretrained(
"BlackSamorez/Mixtral-8x7b-AQLM-2Bit-1x16-hf-test-dispatch",
torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,
dtype="auto", device_map="auto", low_cpu_mem_usage=True,
)
peft_config = LoraConfig(...)

View File

@ -43,7 +43,7 @@ python -m pip install git+https://github.com/huggingface/peft
### ValueError: Attempting to unscale FP16 gradients
This error probably occurred because the model was loaded with `torch_dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
This error probably occurred because the model was loaded with `dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
```python
peft_model = get_peft_model(...)
@ -294,7 +294,7 @@ It is possible to get this information for non-PEFT models if they are using PEF
>>> path = "runwayml/stable-diffusion-v1-5"
>>> lora_id = "takuma104/lora-test-text-encoder-lora-target"
>>> pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
>>> pipe = StableDiffusionPipeline.from_pretrained(path, dtype=torch.float16)
>>> pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
>>> pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
>>> pipe.set_lora_device(["adapter-2"], "cuda")

View File

@ -303,7 +303,7 @@ if __name__ == "__main__":
# Loading the model
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_NAME,
torch_dtype=torch.bfloat16,
dtype=torch.bfloat16,
device_map="auto",
quantization_config=bnb_config,
)

View File

@ -84,7 +84,7 @@ def main(args):
args.pretrained_model_name_or_path,
controlnet=controlnet,
unet=unet.model,
torch_dtype=torch.float32,
dtype=torch.float32,
requires_safety_checker=False,
).to(device)

View File

@ -139,16 +139,16 @@ def main(args):
cur_class_images = len(list(class_images_dir.iterdir()))
if cur_class_images < args.num_class_images:
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
if args.prior_generation_precision == "fp32":
torch_dtype = torch.float32
dtype = torch.float32
elif args.prior_generation_precision == "fp16":
torch_dtype = torch.float16
dtype = torch.float16
elif args.prior_generation_precision == "bf16":
torch_dtype = torch.bfloat16
dtype = torch.bfloat16
pipeline = DiffusionPipeline.from_pretrained(
args.pretrained_model_name_or_path,
torch_dtype=torch_dtype,
dtype=dtype,
safety_checker=None,
revision=args.revision,
)

View File

@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token_id = tokenizer.eos_token_id
bone_config = BoneConfig(
@ -47,7 +47,7 @@ from peft import PeftModel
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
)
peft_model = PeftModel.from_pretrained(model, "bone-llama-2-7b")
```

View File

@ -57,7 +57,7 @@ elif script_args.base_model_name_or_path is not None:
print(f"No available pre-processed model, manually initialize a Bone using {script_args.base_model_name_or_path}.")
model = AutoModelForCausalLM.from_pretrained(
script_args.base_model_name_or_path,
torch_dtype=(
dtype=(
torch.float16
if script_args.bits == "fp16"
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)

View File

@ -78,7 +78,7 @@ from peft.tuners.lora.corda import preprocess_corda
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token_id = tokenizer.eos_token_id
sampled_dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:256]")
@ -236,7 +236,7 @@ from peft import PeftModel
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
)
# No SVD is performed during this step, and the base model remains unaltered.
peft_model = PeftModel.from_pretrained(model, "corda-llama-2-7b-lora")

View File

@ -229,7 +229,7 @@ def train():
print("Train in Full Finetuning mode")
model = transformers.AutoModelForCausalLM.from_pretrained(
script_args.model_name_or_path,
torch_dtype=torch.bfloat16,
dtype=torch.bfloat16,
device_map="auto",
)
trainable_params, all_param = get_nb_trainable_parameters(model)

View File

@ -49,7 +49,7 @@ def main(args):
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
model = AutoModelForCausalLM.from_pretrained(
model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
model_id, device_map="auto", dtype=torch.float16, trust_remote_code=True
)
# Collect data

View File

@ -553,7 +553,7 @@
"base_model = AutoModelForCausalLM.from_pretrained(\n",
" model_id,\n",
" cache_dir='.',\n",
" torch_dtype=torch.float16,\n",
" dtype=torch.float16,\n",
" device_map='auto'\n",
")\n",
"\n",

View File

@ -55,7 +55,7 @@ model = AutoModelForCausalLM.from_pretrained(
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
),
torch_dtype=torch.float16,
dtype=torch.float16,
)
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")

View File

@ -141,16 +141,16 @@ def main(args):
cur_class_images = len(list(class_images_dir.iterdir()))
if cur_class_images < args.num_class_images:
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
if args.prior_generation_precision == "fp32":
torch_dtype = torch.float32
dtype = torch.float32
elif args.prior_generation_precision == "fp16":
torch_dtype = torch.float16
dtype = torch.float16
elif args.prior_generation_precision == "bf16":
torch_dtype = torch.bfloat16
dtype = torch.bfloat16
pipeline = DiffusionPipeline.from_pretrained(
args.pretrained_model_name_or_path,
torch_dtype=torch_dtype,
dtype=dtype,
safety_checker=None,
revision=args.revision,
)

View File

@ -196,7 +196,7 @@
"name": "stderr",
"output_type": "stream",
"text": [
"Overriding torch_dtype=None with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning.\n"
"Overriding dtype=None with `dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass dtype=torch.float16 or don't pass this argument at all to remove this warning.\n"
]
},
{
@ -1201,7 +1201,7 @@
"peft_model_id = \"ybelkada/flan-t5-large-financial-phrasebank-lora\"\n",
"config = PeftConfig.from_pretrained(peft_model_id)\n",
"\n",
"model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, torch_dtype=\"auto\", device_map=\"auto\")\n",
"model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, dtype=\"auto\", device_map=\"auto\")\n",
"tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
"\n",
"# Load the Lora model\n",

View File

@ -24,7 +24,7 @@ MODEL_ID = "LoftQ/Mistral-7B-v0.1-4bit-64rank"
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_ID,
torch_dtype=torch.bfloat16, # you may change it with different models
dtype=torch.bfloat16, # you may change it with different models
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16, # bfloat16 is recommended
@ -81,7 +81,7 @@ MODEL_DIR = "model_zoo/loftq/Llama-2-7b-hf-4bit-16rank"
base_model = AutoModelForCausalLM.from_pretrained(
MODEL_DIR,
torch_dtype=torch.bfloat16,
dtype=torch.bfloat16,
quantization_config=BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_compute_dtype=torch.bfloat16,

View File

@ -454,7 +454,7 @@ def main():
load_in_4bit=True,
bnb_4bit_use_double_quant=False,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=config.torch_dtype,
bnb_4bit_compute_dtype=config.dtype,
),
)
else:

View File

@ -628,16 +628,16 @@ def main(args):
cur_class_images = len(list(class_images_dir.iterdir()))
if cur_class_images < args.num_class_images:
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
if args.prior_generation_precision == "fp32":
torch_dtype = torch.float32
dtype = torch.float32
elif args.prior_generation_precision == "fp16":
torch_dtype = torch.float16
dtype = torch.float16
elif args.prior_generation_precision == "bf16":
torch_dtype = torch.bfloat16
dtype = torch.bfloat16
pipeline = DiffusionPipeline.from_pretrained(
args.pretrained_model_name_or_path,
torch_dtype=torch_dtype,
dtype=dtype,
safety_checker=None,
revision=args.revision,
)

View File

@ -72,14 +72,14 @@ def train_model(
bnb_4bit_use_double_quant=False,
bnb_4bit_quant_type="nf4",
),
torch_dtype=compute_dtype,
dtype=compute_dtype,
device_map=device_map,
)
# setup for quantized training
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
else:
model = AutoModelForCausalLM.from_pretrained(
base_model_name_or_path, torch_dtype=compute_dtype, device_map=device_map
base_model_name_or_path, dtype=compute_dtype, device_map=device_map
)
# LoRA config for the PEFT model

View File

@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token_id = tokenizer.eos_token_id
@ -55,7 +55,7 @@ from peft import PeftModel
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
)
peft_model = PeftModel.from_pretrained(model, "miss-llama-2-7b")
```

View File

@ -59,7 +59,7 @@ elif script_args.base_model_name_or_path is not None:
print(f"No available pre-processed model, manually initialize a MiSS using {script_args.base_model_name_or_path}.")
model = AutoModelForCausalLM.from_pretrained(
script_args.base_model_name_or_path,
torch_dtype=(
dtype=(
torch.float16
if script_args.bits == "fp16"
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)

View File

@ -689,7 +689,7 @@
}
],
"source": [
"model = model.to(dtype=torch.float16, device=device)\n",
"model = model.to(torch_dtype=torch.float16, device=device)\n",
"\n",
"pipe = DiffusionPipeline.from_pretrained(\n",
" model_id, unet=model, variant=\"fp16\", torch_dtype=torch.float16,\n",
@ -796,7 +796,7 @@
}
],
"source": [
"model = model.to(dtype=torch.float16, device=device)\n",
"model = model.to(torch_dtype=torch.float16, device=device)\n",
"\n",
"pipe = DiffusionPipeline.from_pretrained(\n",
" model_id, unet=model, variant=\"fp16\", torch_dtype=torch.float16,\n",
@ -868,7 +868,7 @@
"del pipe\n",
"\n",
"pipe = DiffusionPipeline.from_pretrained(\n",
" model_id, variant=\"fp16\", torch_dtype=torch.float16,\n",
" model_id, variant=\"fp16\", dtype=torch.float16,\n",
").to(device)\n",
"\n",
"prompt = \"toy_face of a hacker with a hoodie, pixel art\"\n",

View File

@ -638,16 +638,16 @@ def main(args):
cur_class_images = len(list(class_images_dir.iterdir()))
if cur_class_images < args.num_class_images:
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
if args.prior_generation_precision == "fp32":
torch_dtype = torch.float32
dtype = torch.float32
elif args.prior_generation_precision == "fp16":
torch_dtype = torch.float16
dtype = torch.float16
elif args.prior_generation_precision == "bf16":
torch_dtype = torch.bfloat16
dtype = torch.bfloat16
pipeline = DiffusionPipeline.from_pretrained(
args.pretrained_model_name_or_path,
torch_dtype=torch_dtype,
dtype=dtype,
safety_checker=None,
revision=args.revision,
)

View File

@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.bfloat16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
dataset = load_dataset("imdb", split="train[:1%]")
lora_config = LoraConfig(

View File

@ -44,7 +44,7 @@ def train(
lora_alpha: int = 16,
lora_dropout: float = 0.05,
lora_target_modules: list[str] = None,
torch_dtype: str = "float16",
dtype: str = "float16",
init_lora_weights="olora",
seed: Optional[int] = None,
):
@ -57,7 +57,7 @@ def train(
# Set seed
if seed is not None:
set_seed(seed)
model_kwargs = {"torch_dtype": getattr(torch, torch_dtype), "device_map": device_map}
model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
if quantize:
model_kwargs["quantization_config"] = BitsAndBytesConfig(
load_in_4bit=True,
@ -170,7 +170,7 @@ if __name__ == "__main__":
parser.add_argument("--lora_alpha", type=int, default=16)
parser.add_argument("--lora_dropout", type=float, default=0.05)
parser.add_argument("--lora_target_modules", type=str, default=None)
parser.add_argument("--torch_dtype", type=str, default="float16")
parser.add_argument("--dtype", type=str, default="float16")
parser.add_argument("--init_lora_weights", type=str, default="olora")
parser.add_argument("--seed", type=int, default=None)
@ -193,7 +193,7 @@ if __name__ == "__main__":
lora_alpha=args.lora_alpha,
lora_dropout=args.lora_dropout,
lora_target_modules=args.lora_target_modules,
torch_dtype=args.torch_dtype,
dtype=args.dtype,
init_lora_weights=args.init_lora_weights,
seed=args.seed,
)

View File

@ -10,7 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
tokenizer.pad_token_id = tokenizer.eos_token_id
lora_config = LoraConfig(
@ -43,7 +43,7 @@ from peft import PeftModel
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
)
# Performs SVD again to initialize the residual model and loads the state_dict of the fine-tuned PiSSA modules.
peft_model = PeftModel.from_pretrained(model, "pissa-llama-2-7b")
@ -83,7 +83,7 @@ from peft import PeftModel
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
)
# No SVD is performed during this step, and the base model remains unaltered.
peft_model = PeftModel.from_pretrained(model, "pissa-llama-2-7b-lora")

View File

@ -75,7 +75,7 @@ if script_args.bits in ["nf4", "fp4", "int8"]:
elif script_args.residual_model_name_or_path is not None:
res_model = AutoModelForCausalLM.from_pretrained(
script_args.residual_model_name_or_path,
torch_dtype=(
dtype=(
torch.float16
if script_args.bits == "fp16"
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
@ -94,7 +94,7 @@ elif script_args.base_model_name_or_path is not None:
)
model = AutoModelForCausalLM.from_pretrained(
script_args.base_model_name_or_path,
torch_dtype=(
dtype=(
torch.float16
if script_args.bits == "fp16"
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)

View File

@ -39,7 +39,7 @@ print(script_args)
model = AutoModelForCausalLM.from_pretrained(
script_args.base_model_name_or_path,
torch_dtype=(
dtype=(
torch.float16
if script_args.bits == "fp16"
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)

View File

@ -44,7 +44,7 @@ def load_or_quantize_model(
test_model = AutoModelForCausalLM.from_pretrained(
base_model,
device_map="auto",
torch_dtype=torch.float16,
dtype=torch.float16,
trust_remote_code=True, # Some GPTQ models might need this
)
@ -95,7 +95,7 @@ def load_or_quantize_model(
# Load and quantize the model
model = AutoModelForCausalLM.from_pretrained(
base_model, device_map="auto", quantization_config=gptq_config, torch_dtype=torch.float16
base_model, device_map="auto", quantization_config=gptq_config, dtype=torch.float16
)
# Save the quantized model to cache

View File

@ -52,7 +52,7 @@ def train_model(
device_type = device.type
device_module = getattr(torch, device_type, torch.cuda)
bf16_suppotrted = device_module.is_available() and device_module.is_bf16_supported()
torch_dtype = torch.bfloat16 if bf16_suppotrted else torch.float16
dtype = torch.bfloat16 if bf16_suppotrted else torch.float16
# QRandLora (quantized randlora): IF YOU WANNA QUANTIZE THE MODEL
if quantize:
@ -65,14 +65,14 @@ def train_model(
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
),
torch_dtype=torch_dtype,
dtype=dtype,
)
# setup for quantized training
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
else:
model = AutoModelForCausalLM.from_pretrained(
base_model,
torch_dtype=torch_dtype,
dtype=dtype,
token=hf_token,
)
# LoRa config for the PEFT model

View File

@ -207,7 +207,7 @@
"source": [
"quant_config = TorchAoConfig(quant_type=\"int8_dynamic_activation_int8_weight\")\n",
"model = AutoModelForSequenceClassification.from_pretrained(\n",
" model_name_or_path, return_dict=True, device_map=0, torch_dtype=torch.bfloat16, quantization_config=quant_config\n",
" model_name_or_path, return_dict=True, device_map=0, dtype=torch.bfloat16, quantization_config=quant_config\n",
")"
]
},

View File

@ -207,7 +207,7 @@
"source": [
"quant_config = TorchAoConfig(quant_type=\"int8_weight_only\")\n",
"model = AutoModelForSequenceClassification.from_pretrained(\n",
" model_name_or_path, return_dict=True, device_map=0, torch_dtype=torch.bfloat16, quantization_config=quant_config\n",
" model_name_or_path, return_dict=True, device_map=0, dtype=torch.bfloat16, quantization_config=quant_config\n",
")"
]
},

View File

@ -129,14 +129,12 @@ def create_and_prepare_model(args, data_args, training_args):
load_in_4bit=args.use_4bit_quantization,
)
else:
torch_dtype = (
quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32
)
dtype = quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32
# Prepare model loading arguments
model_kwargs = {
"trust_remote_code": True,
"torch_dtype": torch_dtype,
"dtype": dtype,
}
if args.use_flash_attn:
if torch.xpu.is_available():

View File

@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
from trl import SFTConfig, SFTTrainer
from datasets import load_dataset
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.bfloat16, device_map="auto")
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", dtype=torch.bfloat16, device_map="auto")
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
dataset = load_dataset("imdb", split="train[:1%]")
shira_config = ShiraConfig(

View File

@ -42,7 +42,7 @@ def train(
device_map: str = "auto",
shira_r: int = 32,
shira_target_modules: list[str] = None,
torch_dtype: str = "float16",
dtype: str = "float16",
seed: Optional[int] = None,
use_custom_random_mask_function_with_custom_kwargs: Optional[bool] = False,
):
@ -55,7 +55,7 @@ def train(
# Set seed
if seed is not None:
set_seed(seed)
model_kwargs = {"torch_dtype": getattr(torch, torch_dtype), "device_map": device_map}
model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
model = AutoModelForCausalLM.from_pretrained(base_model, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
@ -191,7 +191,7 @@ if __name__ == "__main__":
parser.add_argument("--device_map", type=str, default="auto")
parser.add_argument("--shira_r", type=int, default=32)
parser.add_argument("--shira_target_modules", type=str, default=None)
parser.add_argument("--torch_dtype", type=str, default="float16")
parser.add_argument("--dtype", type=str, default="float16")
parser.add_argument("--seed", type=int, default=None)
parser.add_argument("--use_custom_random_mask_function_with_custom_kwargs", action="store_true")
@ -211,7 +211,7 @@ if __name__ == "__main__":
device_map=args.device_map,
shira_r=args.shira_r,
shira_target_modules=args.shira_target_modules,
torch_dtype=args.torch_dtype,
dtype=args.dtype,
seed=args.seed,
use_custom_random_mask_function_with_custom_kwargs=args.use_custom_random_mask_function_with_custom_kwargs,
)

View File

@ -802,16 +802,16 @@ def main(args):
cur_class_images = len(list(class_images_dir.iterdir()))
if cur_class_images < args.num_class_images:
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
if args.prior_generation_precision == "fp32":
torch_dtype = torch.float32
dtype = torch.float32
elif args.prior_generation_precision == "fp16":
torch_dtype = torch.float16
dtype = torch.float16
elif args.prior_generation_precision == "bf16":
torch_dtype = torch.bfloat16
dtype = torch.bfloat16
pipeline = DiffusionPipeline.from_pretrained(
args.pretrained_model_name_or_path,
torch_dtype=torch_dtype,
dtype=dtype,
safety_checker=None,
revision=args.revision,
)

View File

@ -44,7 +44,7 @@ def train(
waveft_scaling: float = 25.0,
waveft_wavelet_family: str = "db1",
waveft_use_idwt: bool = True,
torch_dtype: str = "float16",
dtype: str = "float16",
seed: Optional[int] = None,
):
# Set device_map to the right place when enabling DDP.
@ -56,7 +56,7 @@ def train(
# Set seed
if seed is not None:
set_seed(seed)
model_kwargs = {"dtype": getattr(torch, torch_dtype), "device_map": device_map}
model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
model = AutoModelForCausalLM.from_pretrained(base_model, **model_kwargs)
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
@ -162,7 +162,7 @@ if __name__ == "__main__":
parser.add_argument("--waveft_scaling", type=float, default=25.0)
parser.add_argument("--waveft_wavelet_family", type=str, default="db1")
parser.add_argument("--waveft_use_idwt", action="store_true", default=True)
parser.add_argument("--torch_dtype", type=str, default="float16")
parser.add_argument("--dtype", type=str, default="float16")
parser.add_argument("--seed", type=int, default=None)
args = parser.parse_args()
@ -184,6 +184,6 @@ if __name__ == "__main__":
waveft_scaling=args.waveft_scaling,
waveft_wavelet_family=args.waveft_wavelet_family,
waveft_use_idwt=args.waveft_use_idwt,
torch_dtype=args.torch_dtype,
dtype=args.dtype,
seed=args.seed,
)

View File

@ -0,0 +1,30 @@
{
"alpha_pattern": {},
"auto_mapping": null,
"base_model_name_or_path": null,
"bias": "none",
"corda_config": null,
"eva_config": null,
"exclude_modules": null,
"fan_in_fan_out": false,
"inference_mode": false,
"init_lora_weights": true,
"layer_replication": null,
"layers_pattern": null,
"layers_to_transform": null,
"loftq_config": {},
"lora_alpha": 20,
"lora_bias": false,
"lora_dropout": 0.0,
"megatron_config": null,
"megatron_core": "megatron.core",
"modules_to_save": null,
"peft_type": "LORA",
"r": 10,
"rank_pattern": {},
"revision": null,
"target_modules": ["gate_proj", "up_proj", "down_proj"],
"task_type": "CAUSAL_LM",
"use_dora": false,
"use_rslora": false
}

View File

@ -25,11 +25,12 @@ import random
import sys
import textwrap
import time
from contextlib import AbstractContextManager, nullcontext
from contextlib import nullcontext
from functools import partial
from typing import Any, Callable, Literal, Optional
import torch
from data import get_train_valid_test_datasets
from torch import nn
from torch.amp import GradScaler, autocast
from tqdm import tqdm
@ -53,9 +54,8 @@ from utils import (
validate_experiment_path,
)
from data import get_train_valid_test_datasets
from peft import AdaLoraConfig, PeftConfig
from peft.utils import infer_device, CONFIG_NAME
from peft.utils import CONFIG_NAME, infer_device
# # suppress all warnings

View File

@ -44,7 +44,8 @@ from transformers import (
import peft
from peft import PeftConfig, get_peft_model, prepare_model_for_kbit_training
from peft.optimizers import create_lorafa_optimizer, create_loraplus_optimizer
from peft.utils import infer_device, SAFETENSORS_WEIGHTS_NAME
from peft.utils import SAFETENSORS_WEIGHTS_NAME, infer_device
device = infer_device()

View File

@ -24,11 +24,12 @@ import subprocess
from dataclasses import asdict, dataclass, field
from enum import Enum
from typing import Any, Callable, Optional
from peft.utils import infer_device
import psutil
import torch
from peft.utils import infer_device
FILE_NAME_BENCHMARK_PARAMS = "benchmark_params.json"
FILE_NAME_DEFAULT_CONFIG = "default_benchmark_params.json"

View File

@ -100,6 +100,8 @@ TRANSFORMERS_MODELS_TO_LORA_TARGET_MODULES_MAPPING = {
"gemma3_text": ["q_proj", "v_proj"],
"qwen2": ["q_proj", "v_proj"],
"qwen3": ["q_proj", "v_proj"],
"rwkv": ["key", "value", "receptance", "output"],
"rwkv7": ["r_proj", "k_proj", "v_proj", "o_proj", "key", "value"],
}
# target module mappings that are identical to LORA

View File

@ -60,7 +60,7 @@ def test_opt_350m_4bit():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -79,7 +79,7 @@ def test_opt_350m_8bit():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -102,7 +102,7 @@ def test_opt_350m_4bit_double_quant():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -125,7 +125,7 @@ def test_opt_350m_4bit_compute_dtype_float16():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -149,7 +149,7 @@ def test_opt_350m_4bit_quant_type_nf4():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -174,7 +174,7 @@ def test_opt_350m_4bit_quant_storage():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -196,7 +196,7 @@ def test_opt_350m_8bit_threshold():
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -224,7 +224,7 @@ def test_flan_t5_4bit():
model = AutoModelForSeq2SeqLM.from_pretrained(
"google/flan-t5-base",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
@ -245,7 +245,7 @@ def test_flan_t5_8bit():
model = AutoModelForSeq2SeqLM.from_pretrained(
"google/flan-t5-base",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)

View File

@ -623,7 +623,7 @@ class TestOpt4bitBnb(RegressionTester):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-350m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
return model

View File

@ -388,7 +388,7 @@ class TestAdaptionPrompt:
"""Test that AdaptionPrompt works when Llama using a half-precision model."""
input_ids = torch.LongTensor([[1, 1, 1], [2, 1, 2]]).to(self.torch_device)
original = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16)
original = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
adapted = get_peft_model(
original, AdaptionPromptConfig(adapter_layers=2, adapter_len=4, task_type="CAUSAL_LM")
)

View File

@ -343,7 +343,7 @@ class TestArrowRouting:
# Create base in fp16 (no manual assignment to .dtype)
with hub_online_once(model_id):
base = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
base = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
cfg = ArrowConfig(top_k=2)
@ -353,7 +353,7 @@ class TestArrowRouting:
task_specific_adapter_paths=ts_adapters,
arrow_config=cfg,
autocast_adapter_dtype=False,
torch_dtype=torch.float16,
dtype=torch.float16,
).eval()
X = {

View File

@ -52,14 +52,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForCausalLM)
# check if kwargs are passed correctly
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForCausalLM.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForCausalLM)
assert model.base_model.lm_head.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_peft_causal_lm_extended_vocab(self):
model_id = "peft-internal-testing/tiny-random-OPTForCausalLM-extended-vocab"
@ -67,14 +67,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForCausalLM)
# check if kwargs are passed correctly
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForCausalLM.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForCausalLM)
assert model.base_model.lm_head.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_peft_seq2seq_lm(self):
model_id = "peft-internal-testing/tiny_T5ForSeq2SeqLM-lora"
@ -88,14 +88,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForSeq2SeqLM)
# check if kwargs are passed correctly
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForSeq2SeqLM)
assert model.base_model.lm_head.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
_ = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_peft_sequence_cls(self):
model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
@ -109,7 +109,7 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForSequenceClassification)
# check if kwargs are passed correctly
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForSequenceClassification)
assert model.score.original_module.weight.dtype == self.dtype
@ -117,7 +117,7 @@ class TestPeftAutoModel:
is_trainable = False
# This should work
_ = AutoPeftModelForSequenceClassification.from_pretrained(
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
model_id, adapter_name, is_trainable, dtype=self.dtype
)
def test_peft_token_classification(self):
@ -132,16 +132,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForTokenClassification)
# check if kwargs are passed correctly
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForTokenClassification)
assert model.base_model.classifier.original_module.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModelForTokenClassification.from_pretrained(
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
)
_ = AutoPeftModelForTokenClassification.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_peft_question_answering(self):
model_id = "peft-internal-testing/tiny_OPTForQuestionAnswering-lora"
@ -155,16 +153,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForQuestionAnswering)
# check if kwargs are passed correctly
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForQuestionAnswering)
assert model.base_model.qa_outputs.original_module.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModelForQuestionAnswering.from_pretrained(
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
)
_ = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_peft_feature_extraction(self):
model_id = "peft-internal-testing/tiny_OPTForFeatureExtraction-lora"
@ -178,16 +174,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModelForFeatureExtraction)
# check if kwargs are passed correctly
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModelForFeatureExtraction)
assert model.base_model.model.decoder.embed_tokens.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModelForFeatureExtraction.from_pretrained(
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
)
_ = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_peft_whisper(self):
model_id = "peft-internal-testing/tiny_WhisperForConditionalGeneration-lora"
@ -201,14 +195,14 @@ class TestPeftAutoModel:
assert isinstance(model, PeftModel)
# check if kwargs are passed correctly
model = AutoPeftModel.from_pretrained(model_id, torch_dtype=self.dtype)
model = AutoPeftModel.from_pretrained(model_id, dtype=self.dtype)
assert isinstance(model, PeftModel)
assert model.base_model.model.model.encoder.embed_positions.weight.dtype == self.dtype
adapter_name = "default"
is_trainable = False
# This should work
_ = AutoPeftModel.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
_ = AutoPeftModel.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
def test_embedding_size_not_reduced_if_greater_vocab_size(self, tmp_path):
# See 2415

View File

@ -527,7 +527,7 @@ class PeftGPUCommonTests(unittest.TestCase):
quantization_config = GPTQConfig(bits=4, use_exllama=False)
kwargs = {
"pretrained_model_name_or_path": model_id,
"torch_dtype": torch.float16,
"dtype": torch.float16,
"device_map": "auto",
"quantization_config": quantization_config,
}
@ -850,7 +850,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = LlamaForCausalLM.from_pretrained(
"trl-internal-testing/tiny-random-LlamaForCausalLM",
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
)
@ -873,7 +873,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = LlamaForCausalLM.from_pretrained(
"trl-internal-testing/tiny-random-LlamaForCausalLM",
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
)
@ -939,7 +939,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForSequenceClassification.from_pretrained(
model_id,
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
torch_dtype=torch.float32,
dtype=torch.float32,
)
model = prepare_model_for_kbit_training(model)
@ -1080,7 +1080,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
# compare outputs in probability space, because logits can have outliers
@ -1122,7 +1122,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
# compare outputs in probability space, because logits can have outliers
@ -1165,7 +1165,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
# compare outputs in probability space, because logits can have outliers
@ -1206,7 +1206,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
# input with 9 samples
@ -1274,7 +1274,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
# input with 9 samples
@ -1359,7 +1359,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
torch.manual_seed(0)
@ -1372,7 +1372,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
torch.manual_seed(0)
config_dora = LoraConfig(r=8, init_lora_weights=False, use_dora=True)
@ -1394,7 +1394,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
torch.manual_seed(0)
@ -1407,7 +1407,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
torch_dtype=torch.float32,
dtype=torch.float32,
)
torch.manual_seed(0)
config_dora = LoraConfig(r=8, init_lora_weights=False, use_dora=True)
@ -1434,7 +1434,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"trl-internal-testing/tiny-random-LlamaForCausalLM",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
# compare outputs in probability space, because logits can have outliers
@ -1485,7 +1485,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
@ -1534,7 +1534,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
config = LoraConfig(
@ -1586,7 +1586,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
config = LoraConfig(
@ -1618,7 +1618,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
@ -1665,7 +1665,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"trl-internal-testing/tiny-random-LlamaForCausalLM",
quantization_config=bnb_config,
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
# compare outputs in probability space, because logits can have outliers
@ -1705,7 +1705,7 @@ class PeftGPUCommonTests(unittest.TestCase):
# check for different result with and without apply_GS
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
torch.manual_seed(0)
@ -1717,7 +1717,7 @@ class PeftGPUCommonTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
torch_dtype=torch.float32,
dtype=torch.float32,
)
torch.manual_seed(0)
config_hra_GS = HRAConfig(r=8, init_weights=True, apply_GS=True)
@ -1759,7 +1759,7 @@ class PeftGPUCommonTests(unittest.TestCase):
# when r is an odd number
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
torch_dtype=torch.float32,
dtype=torch.float32,
).eval()
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)

View File

@ -1576,57 +1576,57 @@ class MockTransformerWrapper:
"""
@classmethod
def from_pretrained(cls, model_id, torch_dtype=None):
def from_pretrained(cls, model_id, dtype=None):
# set the seed so that from_pretrained always returns the same model
torch.manual_seed(0)
if torch_dtype is None:
torch_dtype = torch.float32
if dtype is None:
dtype = torch.float32
if model_id == "MLP":
return MLP().to(torch_dtype)
return MLP().to(dtype)
if model_id == "EmbConv1D":
return ModelEmbConv1D().to(torch_dtype)
return ModelEmbConv1D().to(dtype)
if model_id == "Conv1d":
return ModelConv1D().to(torch_dtype)
return ModelConv1D().to(dtype)
if model_id == "Conv1dBigger":
return ModelConv1DBigger().to(torch_dtype)
return ModelConv1DBigger().to(dtype)
if model_id == "Conv2d":
return ModelConv2D().to(torch_dtype)
return ModelConv2D().to(dtype)
if model_id == "Conv2d1x1":
return ModelConv2D1x1().to(torch_dtype)
return ModelConv2D1x1().to(dtype)
if model_id == "Conv1dKernel1":
return ModelConv1DKernel1().to(torch_dtype)
return ModelConv1DKernel1().to(dtype)
if model_id == "Conv2dGroups":
return ModelConv2DGroups().to(torch_dtype)
return ModelConv2DGroups().to(dtype)
if model_id == "Conv2dGroups2":
return ModelConv2DGroups2().to(torch_dtype)
return ModelConv2DGroups2().to(dtype)
if model_id == "Conv3d":
return ModelConv3D().to(torch_dtype)
return ModelConv3D().to(dtype)
if model_id == "MLP_LayerNorm":
return MLP_LayerNorm().to(torch_dtype)
return MLP_LayerNorm().to(dtype)
if model_id == "MLP2":
return MLP2().to(torch_dtype)
return MLP2().to(dtype)
if model_id == "Conv2d2":
return ModelConv2D2().to(torch_dtype)
return ModelConv2D2().to(dtype)
if model_id == "MHA":
return ModelMha().to(torch_dtype)
return ModelMha().to(dtype)
if model_id == "MlpUsingParameters":
return MlpUsingParameters().to(torch_dtype)
return MlpUsingParameters().to(dtype)
raise ValueError(f"model_id {model_id} not implemented")
@ -1827,7 +1827,7 @@ class TestPeftCustomModel(PeftCommonTester):
pytest.skip(reason="MacOS does not support multiple ops in float16")
X = self.prepare_inputs_for_testing()
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16).to(self.torch_device)
model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16).to(self.torch_device)
model.dtype = torch.float16
config = config_cls(
base_model_name_or_path=model_id,
@ -1869,7 +1869,7 @@ class TestPeftCustomModel(PeftCommonTester):
pytest.skip(reason="MacOS does not support multiple ops in bfloat16")
X = self.prepare_inputs_for_testing()
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(self.torch_device)
model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16).to(self.torch_device)
model.dtype = torch.bfloat16
config = config_cls(
base_model_name_or_path=model_id,
@ -1910,7 +1910,7 @@ class TestPeftCustomModel(PeftCommonTester):
pytest.skip(reason="MacOS does not support multiple ops in float16")
X = self.prepare_inputs_for_testing()
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16).to(self.torch_device)
model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16).to(self.torch_device)
model.dtype = torch.float16
config = config_cls(
base_model_name_or_path=model_id,
@ -1951,7 +1951,7 @@ class TestPeftCustomModel(PeftCommonTester):
pytest.skip(reason="MacOS does not support multiple ops in bfloat16")
X = self.prepare_inputs_for_testing()
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(self.torch_device)
model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16).to(self.torch_device)
model.dtype = torch.bfloat16
config = config_cls(
base_model_name_or_path=model_id,

View File

@ -77,7 +77,7 @@ class PeftGPTQModelCommonTests(unittest.TestCase):
quantization_config = GPTQConfig(bits=4, use_exllama=False)
kwargs = {
"pretrained_model_name_or_path": model_id,
"torch_dtype": torch.float16,
"dtype": torch.float16,
"device_map": "auto",
"quantization_config": quantization_config,
}
@ -114,7 +114,7 @@ class PeftGPTQModelCommonTests(unittest.TestCase):
quantization_config = GPTQConfig(bits=4, use_exllama=False)
kwargs = {
"pretrained_model_name_or_path": model_id,
"torch_dtype": torch.float16,
"dtype": torch.float16,
"device_map": "auto",
"quantization_config": quantization_config,
}
@ -179,7 +179,7 @@ class PeftGPTQModelTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -232,7 +232,7 @@ class PeftGPTQModelTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -284,7 +284,7 @@ class PeftGPTQModelTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -353,7 +353,7 @@ class PeftGPTQModelTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -416,7 +416,7 @@ class PeftGPTQModelTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -478,7 +478,7 @@ class PeftGPTQModelTests(unittest.TestCase):
# default adapter name
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -489,7 +489,7 @@ class PeftGPTQModelTests(unittest.TestCase):
# other adapter name
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -514,7 +514,7 @@ class PeftGPTQModelTests(unittest.TestCase):
# default adapter name
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -525,7 +525,7 @@ class PeftGPTQModelTests(unittest.TestCase):
# other adapter name
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)

View File

@ -2075,7 +2075,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -2128,7 +2128,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -2201,7 +2201,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -2279,7 +2279,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map=device_map,
quantization_config=self.quantization_config,
)
@ -2344,7 +2344,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
# default adapter name
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -2355,7 +2355,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
# other adapter name
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
device_map="auto",
quantization_config=self.quantization_config,
)
@ -2918,7 +2918,7 @@ class TestLoftQ:
clear_device_cache(garbage_collection=True)
# now load quantized model and apply LoftQ-initialized weights on top
base_model = self.get_base_model(tmp_path / "base_model", device=device, **kwargs, torch_dtype=torch.float32)
base_model = self.get_base_model(tmp_path / "base_model", device=device, **kwargs, dtype=torch.float32)
loftq_model = PeftModel.from_pretrained(base_model, tmp_path / "loftq_model", is_trainable=True)
# TODO sanity check: model is quantized
@ -3226,7 +3226,7 @@ class MixedPrecisionTests(unittest.TestCase):
# which should not use fp16.
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
@ -3250,7 +3250,7 @@ class MixedPrecisionTests(unittest.TestCase):
# No exception should be raised.
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
model = get_peft_model(model, self.config, autocast_adapter_dtype=True)
@ -3272,7 +3272,7 @@ class MixedPrecisionTests(unittest.TestCase):
# Same test as above but containing the fix to make it work
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
@ -3284,7 +3284,7 @@ class MixedPrecisionTests(unittest.TestCase):
dtype_counts_before = Counter(p.dtype for p in model.parameters())
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
model = get_peft_model(model, self.config, autocast_adapter_dtype=True)
@ -3309,13 +3309,13 @@ class MixedPrecisionTests(unittest.TestCase):
# Same as previous tests, but loading the adapter with PeftModel.from_pretrained instead
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
model = PeftModel.from_pretrained(model, tmp_dir, autocast_adapter_dtype=False, is_trainable=True)
trainer = Trainer(
@ -3336,7 +3336,7 @@ class MixedPrecisionTests(unittest.TestCase):
# Same as previous tests, but loading the adapter with PeftModel.from_pretrained instead
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
# Below, we purposefully set autocast_adapter_dtype=False so that the saved adapter uses float16. We still want
# the loaded adapter to use float32 when we load it with autocast_adapter_dtype=True.
@ -3349,7 +3349,7 @@ class MixedPrecisionTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
model = PeftModel.from_pretrained(model, tmp_dir, autocast_adapter_dtype=True, is_trainable=True)
# sanity check: this should NOT have float16 adapter weights:
assert (
@ -3376,7 +3376,7 @@ class MixedPrecisionTests(unittest.TestCase):
# load_model(..., autocast_adapter_dtype=True) (the default).
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
torch_dtype=torch.float16,
dtype=torch.float16,
)
# Below, we purposefully set autocast_adapter_dtype=False so that the saved adapter uses float16. We still want
# the loaded adapter to use float32 when we load it with autocast_adapter_dtype=True.
@ -3389,7 +3389,7 @@ class MixedPrecisionTests(unittest.TestCase):
with tempfile.TemporaryDirectory() as tmp_dir:
model.save_pretrained(tmp_dir)
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
# the default adapter is now in float16
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
# sanity check: this should NOT have float16 adapter weights:
@ -3498,7 +3498,7 @@ class PeftAqlmGPUTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
device_map="cuda",
torch_dtype="auto",
dtype="auto",
)
model = prepare_model_for_kbit_training(model)
@ -3584,7 +3584,7 @@ class PeftHqqGPUTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
device_map=device,
torch_dtype=compute_dtype,
dtype=compute_dtype,
quantization_config=quant_config,
)
@ -3642,7 +3642,7 @@ class PeftHqqGPUTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
device_map=device,
torch_dtype=compute_dtype,
dtype=compute_dtype,
)
config = LoraConfig(
target_modules=["q_proj", "v_proj"],
@ -3665,7 +3665,7 @@ class PeftHqqGPUTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
device_map=device,
torch_dtype=compute_dtype,
dtype=compute_dtype,
quantization_config=quant_config,
)
torch.manual_seed(0)
@ -3698,7 +3698,7 @@ class PeftHqqGPUTests(unittest.TestCase):
model = AutoModelForCausalLM.from_pretrained(
self.causal_lm_model_id,
device_map=device,
torch_dtype=compute_dtype,
dtype=compute_dtype,
quantization_config=quant_config,
)
model = PeftModel.from_pretrained(model, tmp_dir)
@ -4264,7 +4264,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
self.causal_lm_model_id,
device_map=device_map,
quantization_config=quantization_config,
torch_dtype=torch.bfloat16,
dtype=torch.bfloat16,
)
assert set(model.hf_device_map.values()) == set(range(device_count))
@ -4345,7 +4345,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
self.causal_lm_model_id,
device_map=device_map,
quantization_config=quantization_config,
torch_dtype=torch.bfloat16,
dtype=torch.bfloat16,
)
assert set(model.hf_device_map.values()) == set(range(device_count))
@ -4589,7 +4589,7 @@ class TestFSDPWrap:
model = AutoModelForCausalLM.from_pretrained(
"facebook/opt-125m",
quantization_config=quant_config,
torch_dtype=torch.float32,
dtype=torch.float32,
)
# model = prepare_model_for_kbit_training(model)
config = LoraConfig(
@ -5345,7 +5345,7 @@ class TestArrowQuantized:
# Load quantized base model
base_model = AutoModelForCausalLM.from_pretrained(
model_id,
torch_dtype=torch.bfloat16,
dtype=torch.bfloat16,
device_map="auto",
quantization_config=bnb_config,
)

View File

@ -91,9 +91,9 @@ class MockTransformerWrapper:
# set the seed so that from_pretrained always returns the same model
torch.manual_seed(0)
torch_dtype = torch.float32
dtype = torch.float32
return DummyLM().to(torch_dtype)
return DummyLM().to(dtype)
VARIANT_MAP = {

View File

@ -217,7 +217,7 @@ class TestMultiTaskPromptTuning:
input_ids = torch.LongTensor([[1, 1, 1], [2, 1, 2]]).to(self.torch_device)
task_ids = torch.tensor([1, 2]).to(self.torch_device)
original = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
original = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16)
mpt = get_peft_model(original, config)
mpt = mpt.to(self.torch_device)
_ = mpt.generate(input_ids=input_ids, task_ids=task_ids)

View File

@ -603,7 +603,7 @@ class PeftCommonTester:
self.skipTest("PyTorch 2.1 not supported for Half of addmm_impl_cpu_ ")
with hub_online_once(model_id):
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16)
model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16)
config = config_cls(
base_model_name_or_path=model_id,
**config_kwargs,
@ -1142,7 +1142,7 @@ class PeftCommonTester:
return pytest.skip("BFloat16 is not supported on MPS")
with hub_online_once(model_id):
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16)
model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
config = config_cls(
base_model_name_or_path=model_id,
**config_kwargs,