mirror of
https://github.com/huggingface/peft.git
synced 2025-10-20 15:33:48 +08:00
ENH Support XPU for CPT, EVA, GPU offload (#2694)
--------- Signed-off-by: Yao, Matrix <matrix.yao@intel.com>
This commit is contained in:
@ -1129,7 +1129,7 @@
|
||||
"# Convert the test dataset to a CPT-compatible format\n",
|
||||
"cpt_test_dataset = CPTDataset(test_dataset, tokenizer, templates)\n",
|
||||
"\n",
|
||||
"# Get the device where the model is loaded (CPU or GPU)\n",
|
||||
"# Get the device where the model is loaded (CPU, GPU or XPU)\n",
|
||||
"device = model.device\n",
|
||||
"list_bool_predictions = []\n",
|
||||
"\n",
|
||||
@ -1552,4 +1552,4 @@
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 5
|
||||
}
|
||||
}
|
||||
|
@ -59,7 +59,7 @@ def main():
|
||||
)
|
||||
parser.add_argument("--ephemeral_gpu_offload", action="store_true", help="Use ephemeral GPU offloading")
|
||||
parser.add_argument(
|
||||
"--merge_model_path", type="str", help="Merge the model with the DoRA model and save to the given path"
|
||||
"--merge_model_path", type=str, help="Merge the model with the DoRA model and save to the given path"
|
||||
)
|
||||
args = parser.parse_args()
|
||||
|
||||
|
@ -60,8 +60,9 @@ peft_config = LoraConfig(
|
||||
eva_config=eva_config
|
||||
)
|
||||
|
||||
# move model to GPU
|
||||
model = model.cuda()
|
||||
# move model to accelerator
|
||||
device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
|
||||
model = model.to(device)
|
||||
|
||||
# to optimize memory usage during EVA initialization, set low_cpu_mem_usage=True
|
||||
peft_model = get_peft_model(model, peft_config, low_cpu_mem_usage=True)
|
||||
@ -90,7 +91,7 @@ In some cases you might just want to get the state_dict after EVA initialization
|
||||
- you want to precompute and store the state_dict for different downstream tasks.
|
||||
- you need to quantize the model for finetuning but want to perform EVA initialization with model weights in full/half precision.
|
||||
- you do not intend to use a peft model for LoRA finetuning.
|
||||
- you would like to leverage multiple GPUs for EVA initialization. (At the moment this is not directly supported by `initialize_lora_eva_weights`)
|
||||
- you would like to leverage multiple accelerators for EVA initialization. (At the moment this is not directly supported by `initialize_lora_eva_weights`)
|
||||
|
||||
You can do this by calling `get_eva_state_dict` directly (you only need to pass `peft_config` if `model` is not a PeftModel):
|
||||
```python
|
||||
@ -103,9 +104,9 @@ Later you can load the state_dict into a `PeftModel` by using the `eva_state_dic
|
||||
initialize_lora_eva_weights(peft_model, eva_state_dict=eva_state_dict)
|
||||
```
|
||||
|
||||
## Leveraging multiple GPUs
|
||||
## Leveraging multiple accelerators
|
||||
|
||||
EVA initialization can be parallelized across multiple GPUs. In this case inputs from multiple GPUs are gathered before computing the SVD for the batch. This requires that the model is wrapped in a `torch.nn.DataParallel` or `torch.nn.DistributedDataParallel` class. An example of how to use this can be found in [eva_finetuning_multi_gpu.py](https://github.com/huggingface/peft/blob/main/examples/eva_finetuning/eva_finetuning_multi_gpu.py).
|
||||
EVA initialization can be parallelized across multiple accelerators. In this case inputs from multiple accelerators are gathered before computing the SVD for the batch. This requires that the model is wrapped in a `torch.nn.DataParallel` or `torch.nn.DistributedDataParallel` class. An example of how to use this can be found in [eva_finetuning_multi_accelerator.py](https://github.com/huggingface/peft/blob/main/examples/eva_finetuning/eva_finetuning_multi_accelerator.py).
|
||||
|
||||
## Customizing EVA
|
||||
|
||||
|
@ -21,8 +21,7 @@ from utils import DataCollator, TokenizerMetaMath
|
||||
from peft import EvaConfig, LoraConfig, get_peft_model, initialize_lora_eva_weights
|
||||
|
||||
|
||||
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
|
||||
|
||||
DEVICE = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
|
||||
|
||||
# config
|
||||
model_name = "meta-llama/Llama-3.1-8B"
|
||||
@ -69,7 +68,7 @@ peft_config = LoraConfig(
|
||||
r=rank, lora_alpha=alpha, target_modules=target_modules, init_lora_weights="eva", eva_config=eva_config
|
||||
)
|
||||
|
||||
# move model to GPU
|
||||
# move model to accelerator
|
||||
model = model.to(DEVICE)
|
||||
|
||||
# to optimize memory usage during eva initialization, set low_cpu_mem_usage=True
|
||||
|
@ -50,6 +50,11 @@ if torch.cuda.is_available():
|
||||
torch.cuda.set_device(local_rank)
|
||||
dist.init_process_group("nccl")
|
||||
world_size = dist.get_world_size()
|
||||
elif torch.xpu.is_available():
|
||||
local_rank = int(os.environ.get("LOCAL_RANK", -1))
|
||||
torch.xpu.set_device(local_rank)
|
||||
dist.init_process_group("xccl")
|
||||
world_size = dist.get_world_size()
|
||||
else:
|
||||
local_rank = -1
|
||||
world_size = 1
|
Reference in New Issue
Block a user