mirror of
https://github.com/huggingface/peft.git
synced 2025-10-20 15:33:48 +08:00
CHORE Replace deprecated torch_dtype with dtype (#2837)
Note: Diffusers is left as is for now, might need an update later.
This commit is contained in:
@ -263,11 +263,11 @@ model = AutoModelForCausalLM.from_pretrained(
|
||||
quantization_config=bnb_config,
|
||||
trust_remote_code=True,
|
||||
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
|
||||
+ torch_dtype=quant_storage_dtype or torch.float32,
|
||||
+ dtype=quant_storage_dtype or torch.float32,
|
||||
)
|
||||
```
|
||||
|
||||
Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
|
||||
Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
|
||||
|
||||
## Memory usage
|
||||
|
||||
|
@ -264,11 +264,11 @@ model = AutoModelForCausalLM.from_pretrained(
|
||||
quantization_config=bnb_config,
|
||||
trust_remote_code=True,
|
||||
attn_implementation="flash_attention_2" if args.use_flash_attn else "eager",
|
||||
+ torch_dtype=quant_storage_dtype or torch.float32,
|
||||
+ dtype=quant_storage_dtype or torch.float32,
|
||||
)
|
||||
```
|
||||
|
||||
Notice that `torch_dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
|
||||
Notice that `dtype` for `AutoModelForCausalLM` is same as the `bnb_4bit_quant_storage` data type. That's it. Everything else is handled by Trainer and TRL.
|
||||
|
||||
## Memory usage
|
||||
|
||||
|
@ -539,7 +539,7 @@ from peft import PeftModel
|
||||
import torch
|
||||
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
"mistralai/Mistral-7B-v0.1", torch_dtype=torch.float16, device_map="auto"
|
||||
"mistralai/Mistral-7B-v0.1", dtype=torch.float16, device_map="auto"
|
||||
)
|
||||
```
|
||||
|
||||
@ -813,7 +813,7 @@ To encode general knowledge, GenKnowSub subtracts the average of the provided ge
|
||||
> # Loading the model
|
||||
> base_model = AutoModelForCausalLM.from_pretrained(
|
||||
> "microsoft/Phi-3-mini-4k-instruct",
|
||||
> torch_dtype=torch.bfloat16,
|
||||
> dtype=torch.bfloat16,
|
||||
> device_map="auto",
|
||||
> quantization_config=bnb_config,
|
||||
> )
|
||||
|
@ -144,7 +144,7 @@ The models support LoRA adapter tuning. To tune the quantized model you'll need
|
||||
```py
|
||||
quantized_model = AutoModelForCausalLM.from_pretrained(
|
||||
"BlackSamorez/Mixtral-8x7b-AQLM-2Bit-1x16-hf-test-dispatch",
|
||||
torch_dtype="auto", device_map="auto", low_cpu_mem_usage=True,
|
||||
dtype="auto", device_map="auto", low_cpu_mem_usage=True,
|
||||
)
|
||||
|
||||
peft_config = LoraConfig(...)
|
||||
|
@ -43,7 +43,7 @@ python -m pip install git+https://github.com/huggingface/peft
|
||||
|
||||
### ValueError: Attempting to unscale FP16 gradients
|
||||
|
||||
This error probably occurred because the model was loaded with `torch_dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
|
||||
This error probably occurred because the model was loaded with `dtype=torch.float16` and then used in an automatic mixed precision (AMP) context, e.g. by setting `fp16=True` in the [`~transformers.Trainer`] class from 🤗 Transformers. The reason is that when using AMP, trainable weights should never use fp16. To make this work without loading the whole model in fp32, add the following to your code:
|
||||
|
||||
```python
|
||||
peft_model = get_peft_model(...)
|
||||
@ -294,7 +294,7 @@ It is possible to get this information for non-PEFT models if they are using PEF
|
||||
|
||||
>>> path = "runwayml/stable-diffusion-v1-5"
|
||||
>>> lora_id = "takuma104/lora-test-text-encoder-lora-target"
|
||||
>>> pipe = StableDiffusionPipeline.from_pretrained(path, torch_dtype=torch.float16)
|
||||
>>> pipe = StableDiffusionPipeline.from_pretrained(path, dtype=torch.float16)
|
||||
>>> pipe.load_lora_weights(lora_id, adapter_name="adapter-1")
|
||||
>>> pipe.load_lora_weights(lora_id, adapter_name="adapter-2")
|
||||
>>> pipe.set_lora_device(["adapter-2"], "cuda")
|
||||
|
@ -303,7 +303,7 @@ if __name__ == "__main__":
|
||||
# Loading the model
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_NAME,
|
||||
torch_dtype=torch.bfloat16,
|
||||
dtype=torch.bfloat16,
|
||||
device_map="auto",
|
||||
quantization_config=bnb_config,
|
||||
)
|
||||
|
@ -84,7 +84,7 @@ def main(args):
|
||||
args.pretrained_model_name_or_path,
|
||||
controlnet=controlnet,
|
||||
unet=unet.model,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
requires_safety_checker=False,
|
||||
).to(device)
|
||||
|
||||
|
@ -139,16 +139,16 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
if args.prior_generation_precision == "fp32":
|
||||
torch_dtype = torch.float32
|
||||
dtype = torch.float32
|
||||
elif args.prior_generation_precision == "fp16":
|
||||
torch_dtype = torch.float16
|
||||
dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
dtype = torch.bfloat16
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
safety_checker=None,
|
||||
revision=args.revision,
|
||||
)
|
||||
|
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from datasets import load_dataset
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
bone_config = BoneConfig(
|
||||
@ -47,7 +47,7 @@ from peft import PeftModel
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
peft_model = PeftModel.from_pretrained(model, "bone-llama-2-7b")
|
||||
```
|
||||
|
@ -57,7 +57,7 @@ elif script_args.base_model_name_or_path is not None:
|
||||
print(f"No available pre-processed model, manually initialize a Bone using {script_args.base_model_name_or_path}.")
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
script_args.base_model_name_or_path,
|
||||
torch_dtype=(
|
||||
dtype=(
|
||||
torch.float16
|
||||
if script_args.bits == "fp16"
|
||||
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
|
||||
|
@ -78,7 +78,7 @@ from peft.tuners.lora.corda import preprocess_corda
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from datasets import load_dataset
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
sampled_dataset = load_dataset("wikitext", "wikitext-2-raw-v1", split="train[:256]")
|
||||
@ -236,7 +236,7 @@ from peft import PeftModel
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
# No SVD is performed during this step, and the base model remains unaltered.
|
||||
peft_model = PeftModel.from_pretrained(model, "corda-llama-2-7b-lora")
|
||||
|
@ -229,7 +229,7 @@ def train():
|
||||
print("Train in Full Finetuning mode")
|
||||
model = transformers.AutoModelForCausalLM.from_pretrained(
|
||||
script_args.model_name_or_path,
|
||||
torch_dtype=torch.bfloat16,
|
||||
dtype=torch.bfloat16,
|
||||
device_map="auto",
|
||||
)
|
||||
trainable_params, all_param = get_nb_trainable_parameters(model)
|
||||
|
@ -49,7 +49,7 @@ def main(args):
|
||||
tokenizer = AutoTokenizer.from_pretrained(model_id, trust_remote_code=True)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id, device_map="auto", torch_dtype=torch.float16, trust_remote_code=True
|
||||
model_id, device_map="auto", dtype=torch.float16, trust_remote_code=True
|
||||
)
|
||||
|
||||
# Collect data
|
||||
|
@ -553,7 +553,7 @@
|
||||
"base_model = AutoModelForCausalLM.from_pretrained(\n",
|
||||
" model_id,\n",
|
||||
" cache_dir='.',\n",
|
||||
" torch_dtype=torch.float16,\n",
|
||||
" dtype=torch.float16,\n",
|
||||
" device_map='auto'\n",
|
||||
")\n",
|
||||
"\n",
|
||||
|
@ -55,7 +55,7 @@ model = AutoModelForCausalLM.from_pretrained(
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
),
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
|
@ -141,16 +141,16 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
if args.prior_generation_precision == "fp32":
|
||||
torch_dtype = torch.float32
|
||||
dtype = torch.float32
|
||||
elif args.prior_generation_precision == "fp16":
|
||||
torch_dtype = torch.float16
|
||||
dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
dtype = torch.bfloat16
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
safety_checker=None,
|
||||
revision=args.revision,
|
||||
)
|
||||
|
@ -196,7 +196,7 @@
|
||||
"name": "stderr",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Overriding torch_dtype=None with `torch_dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass torch_dtype=torch.float16 or don't pass this argument at all to remove this warning.\n"
|
||||
"Overriding dtype=None with `dtype=torch.float16` due to requirements of `bitsandbytes` to enable model loading in mixed int8. Either pass dtype=torch.float16 or don't pass this argument at all to remove this warning.\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@ -1201,7 +1201,7 @@
|
||||
"peft_model_id = \"ybelkada/flan-t5-large-financial-phrasebank-lora\"\n",
|
||||
"config = PeftConfig.from_pretrained(peft_model_id)\n",
|
||||
"\n",
|
||||
"model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, torch_dtype=\"auto\", device_map=\"auto\")\n",
|
||||
"model = AutoModelForSeq2SeqLM.from_pretrained(config.base_model_name_or_path, dtype=\"auto\", device_map=\"auto\")\n",
|
||||
"tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)\n",
|
||||
"\n",
|
||||
"# Load the Lora model\n",
|
||||
|
@ -24,7 +24,7 @@ MODEL_ID = "LoftQ/Mistral-7B-v0.1-4bit-64rank"
|
||||
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_ID,
|
||||
torch_dtype=torch.bfloat16, # you may change it with different models
|
||||
dtype=torch.bfloat16, # you may change it with different models
|
||||
quantization_config=BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16, # bfloat16 is recommended
|
||||
@ -81,7 +81,7 @@ MODEL_DIR = "model_zoo/loftq/Llama-2-7b-hf-4bit-16rank"
|
||||
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
MODEL_DIR,
|
||||
torch_dtype=torch.bfloat16,
|
||||
dtype=torch.bfloat16,
|
||||
quantization_config=BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
|
@ -454,7 +454,7 @@ def main():
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_use_double_quant=False,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=config.torch_dtype,
|
||||
bnb_4bit_compute_dtype=config.dtype,
|
||||
),
|
||||
)
|
||||
else:
|
||||
|
@ -628,16 +628,16 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
if args.prior_generation_precision == "fp32":
|
||||
torch_dtype = torch.float32
|
||||
dtype = torch.float32
|
||||
elif args.prior_generation_precision == "fp16":
|
||||
torch_dtype = torch.float16
|
||||
dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
dtype = torch.bfloat16
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
safety_checker=None,
|
||||
revision=args.revision,
|
||||
)
|
||||
|
@ -72,14 +72,14 @@ def train_model(
|
||||
bnb_4bit_use_double_quant=False,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
),
|
||||
torch_dtype=compute_dtype,
|
||||
dtype=compute_dtype,
|
||||
device_map=device_map,
|
||||
)
|
||||
# setup for quantized training
|
||||
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model_name_or_path, torch_dtype=compute_dtype, device_map=device_map
|
||||
base_model_name_or_path, dtype=compute_dtype, device_map=device_map
|
||||
)
|
||||
|
||||
# LoRA config for the PEFT model
|
||||
|
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from datasets import load_dataset
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
|
||||
@ -55,7 +55,7 @@ from peft import PeftModel
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
peft_model = PeftModel.from_pretrained(model, "miss-llama-2-7b")
|
||||
```
|
||||
|
@ -59,7 +59,7 @@ elif script_args.base_model_name_or_path is not None:
|
||||
print(f"No available pre-processed model, manually initialize a MiSS using {script_args.base_model_name_or_path}.")
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
script_args.base_model_name_or_path,
|
||||
torch_dtype=(
|
||||
dtype=(
|
||||
torch.float16
|
||||
if script_args.bits == "fp16"
|
||||
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
|
||||
|
@ -689,7 +689,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = model.to(dtype=torch.float16, device=device)\n",
|
||||
"model = model.to(torch_dtype=torch.float16, device=device)\n",
|
||||
"\n",
|
||||
"pipe = DiffusionPipeline.from_pretrained(\n",
|
||||
" model_id, unet=model, variant=\"fp16\", torch_dtype=torch.float16,\n",
|
||||
@ -796,7 +796,7 @@
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"model = model.to(dtype=torch.float16, device=device)\n",
|
||||
"model = model.to(torch_dtype=torch.float16, device=device)\n",
|
||||
"\n",
|
||||
"pipe = DiffusionPipeline.from_pretrained(\n",
|
||||
" model_id, unet=model, variant=\"fp16\", torch_dtype=torch.float16,\n",
|
||||
@ -868,7 +868,7 @@
|
||||
"del pipe\n",
|
||||
"\n",
|
||||
"pipe = DiffusionPipeline.from_pretrained(\n",
|
||||
" model_id, variant=\"fp16\", torch_dtype=torch.float16,\n",
|
||||
" model_id, variant=\"fp16\", dtype=torch.float16,\n",
|
||||
").to(device)\n",
|
||||
"\n",
|
||||
"prompt = \"toy_face of a hacker with a hoodie, pixel art\"\n",
|
||||
|
@ -638,16 +638,16 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
if args.prior_generation_precision == "fp32":
|
||||
torch_dtype = torch.float32
|
||||
dtype = torch.float32
|
||||
elif args.prior_generation_precision == "fp16":
|
||||
torch_dtype = torch.float16
|
||||
dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
dtype = torch.bfloat16
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
safety_checker=None,
|
||||
revision=args.revision,
|
||||
)
|
||||
|
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from datasets import load_dataset
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", dtype=torch.bfloat16, device_map="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
dataset = load_dataset("imdb", split="train[:1%]")
|
||||
lora_config = LoraConfig(
|
||||
|
@ -44,7 +44,7 @@ def train(
|
||||
lora_alpha: int = 16,
|
||||
lora_dropout: float = 0.05,
|
||||
lora_target_modules: list[str] = None,
|
||||
torch_dtype: str = "float16",
|
||||
dtype: str = "float16",
|
||||
init_lora_weights="olora",
|
||||
seed: Optional[int] = None,
|
||||
):
|
||||
@ -57,7 +57,7 @@ def train(
|
||||
# Set seed
|
||||
if seed is not None:
|
||||
set_seed(seed)
|
||||
model_kwargs = {"torch_dtype": getattr(torch, torch_dtype), "device_map": device_map}
|
||||
model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
|
||||
if quantize:
|
||||
model_kwargs["quantization_config"] = BitsAndBytesConfig(
|
||||
load_in_4bit=True,
|
||||
@ -170,7 +170,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--lora_alpha", type=int, default=16)
|
||||
parser.add_argument("--lora_dropout", type=float, default=0.05)
|
||||
parser.add_argument("--lora_target_modules", type=str, default=None)
|
||||
parser.add_argument("--torch_dtype", type=str, default="float16")
|
||||
parser.add_argument("--dtype", type=str, default="float16")
|
||||
parser.add_argument("--init_lora_weights", type=str, default="olora")
|
||||
parser.add_argument("--seed", type=int, default=None)
|
||||
|
||||
@ -193,7 +193,7 @@ if __name__ == "__main__":
|
||||
lora_alpha=args.lora_alpha,
|
||||
lora_dropout=args.lora_dropout,
|
||||
lora_target_modules=args.lora_target_modules,
|
||||
torch_dtype=args.torch_dtype,
|
||||
dtype=args.dtype,
|
||||
init_lora_weights=args.init_lora_weights,
|
||||
seed=args.seed,
|
||||
)
|
||||
|
@ -10,7 +10,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from datasets import load_dataset
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-2-7b-hf")
|
||||
tokenizer.pad_token_id = tokenizer.eos_token_id
|
||||
lora_config = LoraConfig(
|
||||
@ -43,7 +43,7 @@ from peft import PeftModel
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
# Performs SVD again to initialize the residual model and loads the state_dict of the fine-tuned PiSSA modules.
|
||||
peft_model = PeftModel.from_pretrained(model, "pissa-llama-2-7b")
|
||||
@ -83,7 +83,7 @@ from peft import PeftModel
|
||||
from transformers import AutoModelForCausalLM
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"meta-llama/Llama-2-7b-hf", torch_dtype=torch.bfloat16, device_map="auto"
|
||||
"meta-llama/Llama-2-7b-hf", dtype=torch.bfloat16, device_map="auto"
|
||||
)
|
||||
# No SVD is performed during this step, and the base model remains unaltered.
|
||||
peft_model = PeftModel.from_pretrained(model, "pissa-llama-2-7b-lora")
|
||||
|
@ -75,7 +75,7 @@ if script_args.bits in ["nf4", "fp4", "int8"]:
|
||||
elif script_args.residual_model_name_or_path is not None:
|
||||
res_model = AutoModelForCausalLM.from_pretrained(
|
||||
script_args.residual_model_name_or_path,
|
||||
torch_dtype=(
|
||||
dtype=(
|
||||
torch.float16
|
||||
if script_args.bits == "fp16"
|
||||
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
|
||||
@ -94,7 +94,7 @@ elif script_args.base_model_name_or_path is not None:
|
||||
)
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
script_args.base_model_name_or_path,
|
||||
torch_dtype=(
|
||||
dtype=(
|
||||
torch.float16
|
||||
if script_args.bits == "fp16"
|
||||
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
|
||||
|
@ -39,7 +39,7 @@ print(script_args)
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
script_args.base_model_name_or_path,
|
||||
torch_dtype=(
|
||||
dtype=(
|
||||
torch.float16
|
||||
if script_args.bits == "fp16"
|
||||
else (torch.bfloat16 if script_args.bits == "bf16" else torch.float32)
|
||||
|
@ -44,7 +44,7 @@ def load_or_quantize_model(
|
||||
test_model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model,
|
||||
device_map="auto",
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
trust_remote_code=True, # Some GPTQ models might need this
|
||||
)
|
||||
|
||||
@ -95,7 +95,7 @@ def load_or_quantize_model(
|
||||
|
||||
# Load and quantize the model
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model, device_map="auto", quantization_config=gptq_config, torch_dtype=torch.float16
|
||||
base_model, device_map="auto", quantization_config=gptq_config, dtype=torch.float16
|
||||
)
|
||||
|
||||
# Save the quantized model to cache
|
||||
|
@ -52,7 +52,7 @@ def train_model(
|
||||
device_type = device.type
|
||||
device_module = getattr(torch, device_type, torch.cuda)
|
||||
bf16_suppotrted = device_module.is_available() and device_module.is_bf16_supported()
|
||||
torch_dtype = torch.bfloat16 if bf16_suppotrted else torch.float16
|
||||
dtype = torch.bfloat16 if bf16_suppotrted else torch.float16
|
||||
|
||||
# QRandLora (quantized randlora): IF YOU WANNA QUANTIZE THE MODEL
|
||||
if quantize:
|
||||
@ -65,14 +65,14 @@ def train_model(
|
||||
bnb_4bit_use_double_quant=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
),
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
)
|
||||
# setup for quantized training
|
||||
model = prepare_model_for_kbit_training(model, use_gradient_checkpointing=True)
|
||||
else:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
base_model,
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
token=hf_token,
|
||||
)
|
||||
# LoRa config for the PEFT model
|
||||
|
@ -207,7 +207,7 @@
|
||||
"source": [
|
||||
"quant_config = TorchAoConfig(quant_type=\"int8_dynamic_activation_int8_weight\")\n",
|
||||
"model = AutoModelForSequenceClassification.from_pretrained(\n",
|
||||
" model_name_or_path, return_dict=True, device_map=0, torch_dtype=torch.bfloat16, quantization_config=quant_config\n",
|
||||
" model_name_or_path, return_dict=True, device_map=0, dtype=torch.bfloat16, quantization_config=quant_config\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -207,7 +207,7 @@
|
||||
"source": [
|
||||
"quant_config = TorchAoConfig(quant_type=\"int8_weight_only\")\n",
|
||||
"model = AutoModelForSequenceClassification.from_pretrained(\n",
|
||||
" model_name_or_path, return_dict=True, device_map=0, torch_dtype=torch.bfloat16, quantization_config=quant_config\n",
|
||||
" model_name_or_path, return_dict=True, device_map=0, dtype=torch.bfloat16, quantization_config=quant_config\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
|
@ -129,14 +129,12 @@ def create_and_prepare_model(args, data_args, training_args):
|
||||
load_in_4bit=args.use_4bit_quantization,
|
||||
)
|
||||
else:
|
||||
torch_dtype = (
|
||||
quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32
|
||||
)
|
||||
dtype = quant_storage_dtype if quant_storage_dtype and quant_storage_dtype.is_floating_point else torch.float32
|
||||
|
||||
# Prepare model loading arguments
|
||||
model_kwargs = {
|
||||
"trust_remote_code": True,
|
||||
"torch_dtype": torch_dtype,
|
||||
"dtype": dtype,
|
||||
}
|
||||
if args.use_flash_attn:
|
||||
if torch.xpu.is_available():
|
||||
|
@ -11,7 +11,7 @@ from transformers import AutoTokenizer, AutoModelForCausalLM
|
||||
from trl import SFTConfig, SFTTrainer
|
||||
from datasets import load_dataset
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", torch_dtype=torch.bfloat16, device_map="auto")
|
||||
model = AutoModelForCausalLM.from_pretrained("facebook/opt-350m", dtype=torch.bfloat16, device_map="auto")
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-350m")
|
||||
dataset = load_dataset("imdb", split="train[:1%]")
|
||||
shira_config = ShiraConfig(
|
||||
|
@ -42,7 +42,7 @@ def train(
|
||||
device_map: str = "auto",
|
||||
shira_r: int = 32,
|
||||
shira_target_modules: list[str] = None,
|
||||
torch_dtype: str = "float16",
|
||||
dtype: str = "float16",
|
||||
seed: Optional[int] = None,
|
||||
use_custom_random_mask_function_with_custom_kwargs: Optional[bool] = False,
|
||||
):
|
||||
@ -55,7 +55,7 @@ def train(
|
||||
# Set seed
|
||||
if seed is not None:
|
||||
set_seed(seed)
|
||||
model_kwargs = {"torch_dtype": getattr(torch, torch_dtype), "device_map": device_map}
|
||||
model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
|
||||
model = AutoModelForCausalLM.from_pretrained(base_model, **model_kwargs)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
@ -191,7 +191,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--device_map", type=str, default="auto")
|
||||
parser.add_argument("--shira_r", type=int, default=32)
|
||||
parser.add_argument("--shira_target_modules", type=str, default=None)
|
||||
parser.add_argument("--torch_dtype", type=str, default="float16")
|
||||
parser.add_argument("--dtype", type=str, default="float16")
|
||||
parser.add_argument("--seed", type=int, default=None)
|
||||
parser.add_argument("--use_custom_random_mask_function_with_custom_kwargs", action="store_true")
|
||||
|
||||
@ -211,7 +211,7 @@ if __name__ == "__main__":
|
||||
device_map=args.device_map,
|
||||
shira_r=args.shira_r,
|
||||
shira_target_modules=args.shira_target_modules,
|
||||
torch_dtype=args.torch_dtype,
|
||||
dtype=args.dtype,
|
||||
seed=args.seed,
|
||||
use_custom_random_mask_function_with_custom_kwargs=args.use_custom_random_mask_function_with_custom_kwargs,
|
||||
)
|
||||
|
@ -802,16 +802,16 @@ def main(args):
|
||||
cur_class_images = len(list(class_images_dir.iterdir()))
|
||||
|
||||
if cur_class_images < args.num_class_images:
|
||||
torch_dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
dtype = torch.float16 if accelerator.device.type in ["cuda", "xpu"] else torch.float32
|
||||
if args.prior_generation_precision == "fp32":
|
||||
torch_dtype = torch.float32
|
||||
dtype = torch.float32
|
||||
elif args.prior_generation_precision == "fp16":
|
||||
torch_dtype = torch.float16
|
||||
dtype = torch.float16
|
||||
elif args.prior_generation_precision == "bf16":
|
||||
torch_dtype = torch.bfloat16
|
||||
dtype = torch.bfloat16
|
||||
pipeline = DiffusionPipeline.from_pretrained(
|
||||
args.pretrained_model_name_or_path,
|
||||
torch_dtype=torch_dtype,
|
||||
dtype=dtype,
|
||||
safety_checker=None,
|
||||
revision=args.revision,
|
||||
)
|
||||
|
@ -44,7 +44,7 @@ def train(
|
||||
waveft_scaling: float = 25.0,
|
||||
waveft_wavelet_family: str = "db1",
|
||||
waveft_use_idwt: bool = True,
|
||||
torch_dtype: str = "float16",
|
||||
dtype: str = "float16",
|
||||
seed: Optional[int] = None,
|
||||
):
|
||||
# Set device_map to the right place when enabling DDP.
|
||||
@ -56,7 +56,7 @@ def train(
|
||||
# Set seed
|
||||
if seed is not None:
|
||||
set_seed(seed)
|
||||
model_kwargs = {"dtype": getattr(torch, torch_dtype), "device_map": device_map}
|
||||
model_kwargs = {"dtype": getattr(torch, dtype), "device_map": device_map}
|
||||
model = AutoModelForCausalLM.from_pretrained(base_model, **model_kwargs)
|
||||
|
||||
tokenizer = AutoTokenizer.from_pretrained(base_model, trust_remote_code=True)
|
||||
@ -162,7 +162,7 @@ if __name__ == "__main__":
|
||||
parser.add_argument("--waveft_scaling", type=float, default=25.0)
|
||||
parser.add_argument("--waveft_wavelet_family", type=str, default="db1")
|
||||
parser.add_argument("--waveft_use_idwt", action="store_true", default=True)
|
||||
parser.add_argument("--torch_dtype", type=str, default="float16")
|
||||
parser.add_argument("--dtype", type=str, default="float16")
|
||||
parser.add_argument("--seed", type=int, default=None)
|
||||
|
||||
args = parser.parse_args()
|
||||
@ -184,6 +184,6 @@ if __name__ == "__main__":
|
||||
waveft_scaling=args.waveft_scaling,
|
||||
waveft_wavelet_family=args.waveft_wavelet_family,
|
||||
waveft_use_idwt=args.waveft_use_idwt,
|
||||
torch_dtype=args.torch_dtype,
|
||||
dtype=args.dtype,
|
||||
seed=args.seed,
|
||||
)
|
||||
|
@ -25,11 +25,12 @@ import random
|
||||
import sys
|
||||
import textwrap
|
||||
import time
|
||||
from contextlib import AbstractContextManager, nullcontext
|
||||
from contextlib import nullcontext
|
||||
from functools import partial
|
||||
from typing import Any, Callable, Literal, Optional
|
||||
|
||||
import torch
|
||||
from data import get_train_valid_test_datasets
|
||||
from torch import nn
|
||||
from torch.amp import GradScaler, autocast
|
||||
from tqdm import tqdm
|
||||
@ -53,9 +54,8 @@ from utils import (
|
||||
validate_experiment_path,
|
||||
)
|
||||
|
||||
from data import get_train_valid_test_datasets
|
||||
from peft import AdaLoraConfig, PeftConfig
|
||||
from peft.utils import infer_device, CONFIG_NAME
|
||||
from peft.utils import CONFIG_NAME, infer_device
|
||||
|
||||
|
||||
# # suppress all warnings
|
||||
|
@ -44,7 +44,8 @@ from transformers import (
|
||||
import peft
|
||||
from peft import PeftConfig, get_peft_model, prepare_model_for_kbit_training
|
||||
from peft.optimizers import create_lorafa_optimizer, create_loraplus_optimizer
|
||||
from peft.utils import infer_device, SAFETENSORS_WEIGHTS_NAME
|
||||
from peft.utils import SAFETENSORS_WEIGHTS_NAME, infer_device
|
||||
|
||||
|
||||
device = infer_device()
|
||||
|
||||
|
@ -24,11 +24,12 @@ import subprocess
|
||||
from dataclasses import asdict, dataclass, field
|
||||
from enum import Enum
|
||||
from typing import Any, Callable, Optional
|
||||
from peft.utils import infer_device
|
||||
|
||||
import psutil
|
||||
import torch
|
||||
|
||||
from peft.utils import infer_device
|
||||
|
||||
|
||||
FILE_NAME_BENCHMARK_PARAMS = "benchmark_params.json"
|
||||
FILE_NAME_DEFAULT_CONFIG = "default_benchmark_params.json"
|
||||
|
@ -60,7 +60,7 @@ def test_opt_350m_4bit():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -79,7 +79,7 @@ def test_opt_350m_8bit():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -102,7 +102,7 @@ def test_opt_350m_4bit_double_quant():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -125,7 +125,7 @@ def test_opt_350m_4bit_compute_dtype_float16():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -149,7 +149,7 @@ def test_opt_350m_4bit_quant_type_nf4():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -174,7 +174,7 @@ def test_opt_350m_4bit_quant_storage():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -196,7 +196,7 @@ def test_opt_350m_8bit_threshold():
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -224,7 +224,7 @@ def test_flan_t5_4bit():
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||
"google/flan-t5-base",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
@ -245,7 +245,7 @@ def test_flan_t5_8bit():
|
||||
model = AutoModelForSeq2SeqLM.from_pretrained(
|
||||
"google/flan-t5-base",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
input = torch.LongTensor([[1, 0, 1, 0, 1, 2]]).to(device)
|
||||
|
@ -623,7 +623,7 @@ class TestOpt4bitBnb(RegressionTester):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-350m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
return model
|
||||
|
||||
|
@ -388,7 +388,7 @@ class TestAdaptionPrompt:
|
||||
|
||||
"""Test that AdaptionPrompt works when Llama using a half-precision model."""
|
||||
input_ids = torch.LongTensor([[1, 1, 1], [2, 1, 2]]).to(self.torch_device)
|
||||
original = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
original = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
|
||||
adapted = get_peft_model(
|
||||
original, AdaptionPromptConfig(adapter_layers=2, adapter_len=4, task_type="CAUSAL_LM")
|
||||
)
|
||||
|
@ -343,7 +343,7 @@ class TestArrowRouting:
|
||||
|
||||
# Create base in fp16 (no manual assignment to .dtype)
|
||||
with hub_online_once(model_id):
|
||||
base = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
base = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.float16)
|
||||
|
||||
cfg = ArrowConfig(top_k=2)
|
||||
|
||||
@ -353,7 +353,7 @@ class TestArrowRouting:
|
||||
task_specific_adapter_paths=ts_adapters,
|
||||
arrow_config=cfg,
|
||||
autocast_adapter_dtype=False,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
).eval()
|
||||
|
||||
X = {
|
||||
|
@ -52,14 +52,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
assert model.base_model.lm_head.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
|
||||
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_peft_causal_lm_extended_vocab(self):
|
||||
model_id = "peft-internal-testing/tiny-random-OPTForCausalLM-extended-vocab"
|
||||
@ -67,14 +67,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForCausalLM.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForCausalLM)
|
||||
assert model.base_model.lm_head.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
|
||||
_ = AutoPeftModelForCausalLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_peft_seq2seq_lm(self):
|
||||
model_id = "peft-internal-testing/tiny_T5ForSeq2SeqLM-lora"
|
||||
@ -88,14 +88,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForSeq2SeqLM)
|
||||
assert model.base_model.lm_head.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
|
||||
_ = AutoPeftModelForSeq2SeqLM.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_peft_sequence_cls(self):
|
||||
model_id = "peft-internal-testing/tiny_OPTForSequenceClassification-lora"
|
||||
@ -109,7 +109,7 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForSequenceClassification)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForSequenceClassification.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForSequenceClassification)
|
||||
assert model.score.original_module.weight.dtype == self.dtype
|
||||
|
||||
@ -117,7 +117,7 @@ class TestPeftAutoModel:
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForSequenceClassification.from_pretrained(
|
||||
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
|
||||
model_id, adapter_name, is_trainable, dtype=self.dtype
|
||||
)
|
||||
|
||||
def test_peft_token_classification(self):
|
||||
@ -132,16 +132,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForTokenClassification)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForTokenClassification.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForTokenClassification)
|
||||
assert model.base_model.classifier.original_module.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForTokenClassification.from_pretrained(
|
||||
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
|
||||
)
|
||||
_ = AutoPeftModelForTokenClassification.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_peft_question_answering(self):
|
||||
model_id = "peft-internal-testing/tiny_OPTForQuestionAnswering-lora"
|
||||
@ -155,16 +153,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForQuestionAnswering)
|
||||
assert model.base_model.qa_outputs.original_module.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForQuestionAnswering.from_pretrained(
|
||||
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
|
||||
)
|
||||
_ = AutoPeftModelForQuestionAnswering.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_peft_feature_extraction(self):
|
||||
model_id = "peft-internal-testing/tiny_OPTForFeatureExtraction-lora"
|
||||
@ -178,16 +174,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModelForFeatureExtraction)
|
||||
assert model.base_model.model.decoder.embed_tokens.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModelForFeatureExtraction.from_pretrained(
|
||||
model_id, adapter_name, is_trainable, torch_dtype=self.dtype
|
||||
)
|
||||
_ = AutoPeftModelForFeatureExtraction.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_peft_whisper(self):
|
||||
model_id = "peft-internal-testing/tiny_WhisperForConditionalGeneration-lora"
|
||||
@ -201,14 +195,14 @@ class TestPeftAutoModel:
|
||||
assert isinstance(model, PeftModel)
|
||||
|
||||
# check if kwargs are passed correctly
|
||||
model = AutoPeftModel.from_pretrained(model_id, torch_dtype=self.dtype)
|
||||
model = AutoPeftModel.from_pretrained(model_id, dtype=self.dtype)
|
||||
assert isinstance(model, PeftModel)
|
||||
assert model.base_model.model.model.encoder.embed_positions.weight.dtype == self.dtype
|
||||
|
||||
adapter_name = "default"
|
||||
is_trainable = False
|
||||
# This should work
|
||||
_ = AutoPeftModel.from_pretrained(model_id, adapter_name, is_trainable, torch_dtype=self.dtype)
|
||||
_ = AutoPeftModel.from_pretrained(model_id, adapter_name, is_trainable, dtype=self.dtype)
|
||||
|
||||
def test_embedding_size_not_reduced_if_greater_vocab_size(self, tmp_path):
|
||||
# See 2415
|
||||
|
@ -527,7 +527,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
quantization_config = GPTQConfig(bits=4, use_exllama=False)
|
||||
kwargs = {
|
||||
"pretrained_model_name_or_path": model_id,
|
||||
"torch_dtype": torch.float16,
|
||||
"dtype": torch.float16,
|
||||
"device_map": "auto",
|
||||
"quantization_config": quantization_config,
|
||||
}
|
||||
@ -850,7 +850,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = LlamaForCausalLM.from_pretrained(
|
||||
"trl-internal-testing/tiny-random-LlamaForCausalLM",
|
||||
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
)
|
||||
|
||||
@ -873,7 +873,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = LlamaForCausalLM.from_pretrained(
|
||||
"trl-internal-testing/tiny-random-LlamaForCausalLM",
|
||||
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
)
|
||||
|
||||
@ -939,7 +939,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForSequenceClassification.from_pretrained(
|
||||
model_id,
|
||||
quantization_config=BitsAndBytesConfig(load_in_4bit=True),
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
@ -1080,7 +1080,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
# compare outputs in probability space, because logits can have outliers
|
||||
@ -1122,7 +1122,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
# compare outputs in probability space, because logits can have outliers
|
||||
@ -1165,7 +1165,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
# compare outputs in probability space, because logits can have outliers
|
||||
@ -1206,7 +1206,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
|
||||
# input with 9 samples
|
||||
@ -1274,7 +1274,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
tokenizer = AutoTokenizer.from_pretrained("facebook/opt-125m")
|
||||
# input with 9 samples
|
||||
@ -1359,7 +1359,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
|
||||
torch.manual_seed(0)
|
||||
@ -1372,7 +1372,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
config_dora = LoraConfig(r=8, init_lora_weights=False, use_dora=True)
|
||||
@ -1394,7 +1394,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
torch.manual_seed(0)
|
||||
@ -1407,7 +1407,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
config_dora = LoraConfig(r=8, init_lora_weights=False, use_dora=True)
|
||||
@ -1434,7 +1434,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"trl-internal-testing/tiny-random-LlamaForCausalLM",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
# compare outputs in probability space, because logits can have outliers
|
||||
@ -1485,7 +1485,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
@ -1534,7 +1534,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
config = LoraConfig(
|
||||
@ -1586,7 +1586,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
config = LoraConfig(
|
||||
@ -1618,7 +1618,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=BitsAndBytesConfig(load_in_8bit=True),
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
@ -1665,7 +1665,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"trl-internal-testing/tiny-random-LlamaForCausalLM",
|
||||
quantization_config=bnb_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
# compare outputs in probability space, because logits can have outliers
|
||||
@ -1705,7 +1705,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
# check for different result with and without apply_GS
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
torch.manual_seed(0)
|
||||
@ -1717,7 +1717,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
config_hra_GS = HRAConfig(r=8, init_weights=True, apply_GS=True)
|
||||
@ -1759,7 +1759,7 @@ class PeftGPUCommonTests(unittest.TestCase):
|
||||
# when r is an odd number
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
).eval()
|
||||
|
||||
random_input = torch.LongTensor([[1, 0, 1, 0, 1, 0]]).to(model.device)
|
||||
|
@ -1576,57 +1576,57 @@ class MockTransformerWrapper:
|
||||
"""
|
||||
|
||||
@classmethod
|
||||
def from_pretrained(cls, model_id, torch_dtype=None):
|
||||
def from_pretrained(cls, model_id, dtype=None):
|
||||
# set the seed so that from_pretrained always returns the same model
|
||||
torch.manual_seed(0)
|
||||
|
||||
if torch_dtype is None:
|
||||
torch_dtype = torch.float32
|
||||
if dtype is None:
|
||||
dtype = torch.float32
|
||||
|
||||
if model_id == "MLP":
|
||||
return MLP().to(torch_dtype)
|
||||
return MLP().to(dtype)
|
||||
|
||||
if model_id == "EmbConv1D":
|
||||
return ModelEmbConv1D().to(torch_dtype)
|
||||
return ModelEmbConv1D().to(dtype)
|
||||
|
||||
if model_id == "Conv1d":
|
||||
return ModelConv1D().to(torch_dtype)
|
||||
return ModelConv1D().to(dtype)
|
||||
|
||||
if model_id == "Conv1dBigger":
|
||||
return ModelConv1DBigger().to(torch_dtype)
|
||||
return ModelConv1DBigger().to(dtype)
|
||||
|
||||
if model_id == "Conv2d":
|
||||
return ModelConv2D().to(torch_dtype)
|
||||
return ModelConv2D().to(dtype)
|
||||
|
||||
if model_id == "Conv2d1x1":
|
||||
return ModelConv2D1x1().to(torch_dtype)
|
||||
return ModelConv2D1x1().to(dtype)
|
||||
|
||||
if model_id == "Conv1dKernel1":
|
||||
return ModelConv1DKernel1().to(torch_dtype)
|
||||
return ModelConv1DKernel1().to(dtype)
|
||||
|
||||
if model_id == "Conv2dGroups":
|
||||
return ModelConv2DGroups().to(torch_dtype)
|
||||
return ModelConv2DGroups().to(dtype)
|
||||
|
||||
if model_id == "Conv2dGroups2":
|
||||
return ModelConv2DGroups2().to(torch_dtype)
|
||||
return ModelConv2DGroups2().to(dtype)
|
||||
|
||||
if model_id == "Conv3d":
|
||||
return ModelConv3D().to(torch_dtype)
|
||||
return ModelConv3D().to(dtype)
|
||||
|
||||
if model_id == "MLP_LayerNorm":
|
||||
return MLP_LayerNorm().to(torch_dtype)
|
||||
return MLP_LayerNorm().to(dtype)
|
||||
|
||||
if model_id == "MLP2":
|
||||
return MLP2().to(torch_dtype)
|
||||
return MLP2().to(dtype)
|
||||
|
||||
if model_id == "Conv2d2":
|
||||
return ModelConv2D2().to(torch_dtype)
|
||||
return ModelConv2D2().to(dtype)
|
||||
|
||||
if model_id == "MHA":
|
||||
return ModelMha().to(torch_dtype)
|
||||
return ModelMha().to(dtype)
|
||||
|
||||
if model_id == "MlpUsingParameters":
|
||||
return MlpUsingParameters().to(torch_dtype)
|
||||
return MlpUsingParameters().to(dtype)
|
||||
|
||||
raise ValueError(f"model_id {model_id} not implemented")
|
||||
|
||||
@ -1827,7 +1827,7 @@ class TestPeftCustomModel(PeftCommonTester):
|
||||
pytest.skip(reason="MacOS does not support multiple ops in float16")
|
||||
|
||||
X = self.prepare_inputs_for_testing()
|
||||
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16).to(self.torch_device)
|
||||
model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16).to(self.torch_device)
|
||||
model.dtype = torch.float16
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
@ -1869,7 +1869,7 @@ class TestPeftCustomModel(PeftCommonTester):
|
||||
pytest.skip(reason="MacOS does not support multiple ops in bfloat16")
|
||||
|
||||
X = self.prepare_inputs_for_testing()
|
||||
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(self.torch_device)
|
||||
model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16).to(self.torch_device)
|
||||
model.dtype = torch.bfloat16
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
@ -1910,7 +1910,7 @@ class TestPeftCustomModel(PeftCommonTester):
|
||||
pytest.skip(reason="MacOS does not support multiple ops in float16")
|
||||
|
||||
X = self.prepare_inputs_for_testing()
|
||||
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16).to(self.torch_device)
|
||||
model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16).to(self.torch_device)
|
||||
model.dtype = torch.float16
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
@ -1951,7 +1951,7 @@ class TestPeftCustomModel(PeftCommonTester):
|
||||
pytest.skip(reason="MacOS does not support multiple ops in bfloat16")
|
||||
|
||||
X = self.prepare_inputs_for_testing()
|
||||
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16).to(self.torch_device)
|
||||
model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16).to(self.torch_device)
|
||||
model.dtype = torch.bfloat16
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
|
@ -77,7 +77,7 @@ class PeftGPTQModelCommonTests(unittest.TestCase):
|
||||
quantization_config = GPTQConfig(bits=4, use_exllama=False)
|
||||
kwargs = {
|
||||
"pretrained_model_name_or_path": model_id,
|
||||
"torch_dtype": torch.float16,
|
||||
"dtype": torch.float16,
|
||||
"device_map": "auto",
|
||||
"quantization_config": quantization_config,
|
||||
}
|
||||
@ -114,7 +114,7 @@ class PeftGPTQModelCommonTests(unittest.TestCase):
|
||||
quantization_config = GPTQConfig(bits=4, use_exllama=False)
|
||||
kwargs = {
|
||||
"pretrained_model_name_or_path": model_id,
|
||||
"torch_dtype": torch.float16,
|
||||
"dtype": torch.float16,
|
||||
"device_map": "auto",
|
||||
"quantization_config": quantization_config,
|
||||
}
|
||||
@ -179,7 +179,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -232,7 +232,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -284,7 +284,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -353,7 +353,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -416,7 +416,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -478,7 +478,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
# default adapter name
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -489,7 +489,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
# other adapter name
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -514,7 +514,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
# default adapter name
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -525,7 +525,7 @@ class PeftGPTQModelTests(unittest.TestCase):
|
||||
# other adapter name
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
|
@ -2075,7 +2075,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -2128,7 +2128,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -2201,7 +2201,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -2279,7 +2279,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map=device_map,
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -2344,7 +2344,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
# default adapter name
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -2355,7 +2355,7 @@ class PeftGPTQGPUTests(unittest.TestCase):
|
||||
# other adapter name
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
device_map="auto",
|
||||
quantization_config=self.quantization_config,
|
||||
)
|
||||
@ -2918,7 +2918,7 @@ class TestLoftQ:
|
||||
clear_device_cache(garbage_collection=True)
|
||||
|
||||
# now load quantized model and apply LoftQ-initialized weights on top
|
||||
base_model = self.get_base_model(tmp_path / "base_model", device=device, **kwargs, torch_dtype=torch.float32)
|
||||
base_model = self.get_base_model(tmp_path / "base_model", device=device, **kwargs, dtype=torch.float32)
|
||||
loftq_model = PeftModel.from_pretrained(base_model, tmp_path / "loftq_model", is_trainable=True)
|
||||
|
||||
# TODO sanity check: model is quantized
|
||||
@ -3226,7 +3226,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
# which should not use fp16.
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
|
||||
|
||||
@ -3250,7 +3250,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
# No exception should be raised.
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
model = get_peft_model(model, self.config, autocast_adapter_dtype=True)
|
||||
|
||||
@ -3272,7 +3272,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
# Same test as above but containing the fix to make it work
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
|
||||
|
||||
@ -3284,7 +3284,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
dtype_counts_before = Counter(p.dtype for p in model.parameters())
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
|
||||
model = get_peft_model(model, self.config, autocast_adapter_dtype=True)
|
||||
@ -3309,13 +3309,13 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
# Same as previous tests, but loading the adapter with PeftModel.from_pretrained instead
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir)
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
|
||||
model = PeftModel.from_pretrained(model, tmp_dir, autocast_adapter_dtype=False, is_trainable=True)
|
||||
|
||||
trainer = Trainer(
|
||||
@ -3336,7 +3336,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
# Same as previous tests, but loading the adapter with PeftModel.from_pretrained instead
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
# Below, we purposefully set autocast_adapter_dtype=False so that the saved adapter uses float16. We still want
|
||||
# the loaded adapter to use float32 when we load it with autocast_adapter_dtype=True.
|
||||
@ -3349,7 +3349,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir)
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
|
||||
model = PeftModel.from_pretrained(model, tmp_dir, autocast_adapter_dtype=True, is_trainable=True)
|
||||
# sanity check: this should NOT have float16 adapter weights:
|
||||
assert (
|
||||
@ -3376,7 +3376,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
# load_model(..., autocast_adapter_dtype=True) (the default).
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
torch_dtype=torch.float16,
|
||||
dtype=torch.float16,
|
||||
)
|
||||
# Below, we purposefully set autocast_adapter_dtype=False so that the saved adapter uses float16. We still want
|
||||
# the loaded adapter to use float32 when we load it with autocast_adapter_dtype=True.
|
||||
@ -3389,7 +3389,7 @@ class MixedPrecisionTests(unittest.TestCase):
|
||||
|
||||
with tempfile.TemporaryDirectory() as tmp_dir:
|
||||
model.save_pretrained(tmp_dir)
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, torch_dtype=torch.float16)
|
||||
model = AutoModelForCausalLM.from_pretrained(self.causal_lm_model_id, dtype=torch.float16)
|
||||
# the default adapter is now in float16
|
||||
model = get_peft_model(model, self.config, autocast_adapter_dtype=False)
|
||||
# sanity check: this should NOT have float16 adapter weights:
|
||||
@ -3498,7 +3498,7 @@ class PeftAqlmGPUTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
device_map="cuda",
|
||||
torch_dtype="auto",
|
||||
dtype="auto",
|
||||
)
|
||||
|
||||
model = prepare_model_for_kbit_training(model)
|
||||
@ -3584,7 +3584,7 @@ class PeftHqqGPUTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
device_map=device,
|
||||
torch_dtype=compute_dtype,
|
||||
dtype=compute_dtype,
|
||||
quantization_config=quant_config,
|
||||
)
|
||||
|
||||
@ -3642,7 +3642,7 @@ class PeftHqqGPUTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
device_map=device,
|
||||
torch_dtype=compute_dtype,
|
||||
dtype=compute_dtype,
|
||||
)
|
||||
config = LoraConfig(
|
||||
target_modules=["q_proj", "v_proj"],
|
||||
@ -3665,7 +3665,7 @@ class PeftHqqGPUTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
device_map=device,
|
||||
torch_dtype=compute_dtype,
|
||||
dtype=compute_dtype,
|
||||
quantization_config=quant_config,
|
||||
)
|
||||
torch.manual_seed(0)
|
||||
@ -3698,7 +3698,7 @@ class PeftHqqGPUTests(unittest.TestCase):
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
self.causal_lm_model_id,
|
||||
device_map=device,
|
||||
torch_dtype=compute_dtype,
|
||||
dtype=compute_dtype,
|
||||
quantization_config=quant_config,
|
||||
)
|
||||
model = PeftModel.from_pretrained(model, tmp_dir)
|
||||
@ -4264,7 +4264,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
|
||||
self.causal_lm_model_id,
|
||||
device_map=device_map,
|
||||
quantization_config=quantization_config,
|
||||
torch_dtype=torch.bfloat16,
|
||||
dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
assert set(model.hf_device_map.values()) == set(range(device_count))
|
||||
@ -4345,7 +4345,7 @@ class PeftTorchaoGPUTests(unittest.TestCase):
|
||||
self.causal_lm_model_id,
|
||||
device_map=device_map,
|
||||
quantization_config=quantization_config,
|
||||
torch_dtype=torch.bfloat16,
|
||||
dtype=torch.bfloat16,
|
||||
)
|
||||
|
||||
assert set(model.hf_device_map.values()) == set(range(device_count))
|
||||
@ -4589,7 +4589,7 @@ class TestFSDPWrap:
|
||||
model = AutoModelForCausalLM.from_pretrained(
|
||||
"facebook/opt-125m",
|
||||
quantization_config=quant_config,
|
||||
torch_dtype=torch.float32,
|
||||
dtype=torch.float32,
|
||||
)
|
||||
# model = prepare_model_for_kbit_training(model)
|
||||
config = LoraConfig(
|
||||
@ -5345,7 +5345,7 @@ class TestArrowQuantized:
|
||||
# Load quantized base model
|
||||
base_model = AutoModelForCausalLM.from_pretrained(
|
||||
model_id,
|
||||
torch_dtype=torch.bfloat16,
|
||||
dtype=torch.bfloat16,
|
||||
device_map="auto",
|
||||
quantization_config=bnb_config,
|
||||
)
|
||||
|
@ -91,9 +91,9 @@ class MockTransformerWrapper:
|
||||
# set the seed so that from_pretrained always returns the same model
|
||||
torch.manual_seed(0)
|
||||
|
||||
torch_dtype = torch.float32
|
||||
dtype = torch.float32
|
||||
|
||||
return DummyLM().to(torch_dtype)
|
||||
return DummyLM().to(dtype)
|
||||
|
||||
|
||||
VARIANT_MAP = {
|
||||
|
@ -217,7 +217,7 @@ class TestMultiTaskPromptTuning:
|
||||
input_ids = torch.LongTensor([[1, 1, 1], [2, 1, 2]]).to(self.torch_device)
|
||||
task_ids = torch.tensor([1, 2]).to(self.torch_device)
|
||||
|
||||
original = AutoModelForCausalLM.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
original = AutoModelForCausalLM.from_pretrained(model_id, dtype=torch.bfloat16)
|
||||
mpt = get_peft_model(original, config)
|
||||
mpt = mpt.to(self.torch_device)
|
||||
_ = mpt.generate(input_ids=input_ids, task_ids=task_ids)
|
||||
|
@ -603,7 +603,7 @@ class PeftCommonTester:
|
||||
self.skipTest("PyTorch 2.1 not supported for Half of addmm_impl_cpu_ ")
|
||||
|
||||
with hub_online_once(model_id):
|
||||
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.float16)
|
||||
model = self.transformers_class.from_pretrained(model_id, dtype=torch.float16)
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
**config_kwargs,
|
||||
@ -1142,7 +1142,7 @@ class PeftCommonTester:
|
||||
return pytest.skip("BFloat16 is not supported on MPS")
|
||||
|
||||
with hub_online_once(model_id):
|
||||
model = self.transformers_class.from_pretrained(model_id, torch_dtype=torch.bfloat16)
|
||||
model = self.transformers_class.from_pretrained(model_id, dtype=torch.bfloat16)
|
||||
config = config_cls(
|
||||
base_model_name_or_path=model_id,
|
||||
**config_kwargs,
|
||||
|
Reference in New Issue
Block a user