ENH Support XPU for CPT, EVA, GPU offload (#2694)

---------

Signed-off-by: Yao, Matrix <matrix.yao@intel.com>
This commit is contained in:
Yao Matrix
2025-08-05 02:43:53 -07:00
committed by GitHub
parent daee6367aa
commit 86feb8c4f9
5 changed files with 16 additions and 11 deletions

View File

@ -1129,7 +1129,7 @@
"# Convert the test dataset to a CPT-compatible format\n",
"cpt_test_dataset = CPTDataset(test_dataset, tokenizer, templates)\n",
"\n",
"# Get the device where the model is loaded (CPU or GPU)\n",
"# Get the device where the model is loaded (CPU, GPU or XPU)\n",
"device = model.device\n",
"list_bool_predictions = []\n",
"\n",
@ -1552,4 +1552,4 @@
},
"nbformat": 4,
"nbformat_minor": 5
}
}

View File

@ -59,7 +59,7 @@ def main():
)
parser.add_argument("--ephemeral_gpu_offload", action="store_true", help="Use ephemeral GPU offloading")
parser.add_argument(
"--merge_model_path", type="str", help="Merge the model with the DoRA model and save to the given path"
"--merge_model_path", type=str, help="Merge the model with the DoRA model and save to the given path"
)
args = parser.parse_args()

View File

@ -60,8 +60,9 @@ peft_config = LoraConfig(
eva_config=eva_config
)
# move model to GPU
model = model.cuda()
# move model to accelerator
device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
model = model.to(device)
# to optimize memory usage during EVA initialization, set low_cpu_mem_usage=True
peft_model = get_peft_model(model, peft_config, low_cpu_mem_usage=True)
@ -90,7 +91,7 @@ In some cases you might just want to get the state_dict after EVA initialization
- you want to precompute and store the state_dict for different downstream tasks.
- you need to quantize the model for finetuning but want to perform EVA initialization with model weights in full/half precision.
- you do not intend to use a peft model for LoRA finetuning.
- you would like to leverage multiple GPUs for EVA initialization. (At the moment this is not directly supported by `initialize_lora_eva_weights`)
- you would like to leverage multiple accelerators for EVA initialization. (At the moment this is not directly supported by `initialize_lora_eva_weights`)
You can do this by calling `get_eva_state_dict` directly (you only need to pass `peft_config` if `model` is not a PeftModel):
```python
@ -103,9 +104,9 @@ Later you can load the state_dict into a `PeftModel` by using the `eva_state_dic
initialize_lora_eva_weights(peft_model, eva_state_dict=eva_state_dict)
```
## Leveraging multiple GPUs
## Leveraging multiple accelerators
EVA initialization can be parallelized across multiple GPUs. In this case inputs from multiple GPUs are gathered before computing the SVD for the batch. This requires that the model is wrapped in a `torch.nn.DataParallel` or `torch.nn.DistributedDataParallel` class. An example of how to use this can be found in [eva_finetuning_multi_gpu.py](https://github.com/huggingface/peft/blob/main/examples/eva_finetuning/eva_finetuning_multi_gpu.py).
EVA initialization can be parallelized across multiple accelerators. In this case inputs from multiple accelerators are gathered before computing the SVD for the batch. This requires that the model is wrapped in a `torch.nn.DataParallel` or `torch.nn.DistributedDataParallel` class. An example of how to use this can be found in [eva_finetuning_multi_accelerator.py](https://github.com/huggingface/peft/blob/main/examples/eva_finetuning/eva_finetuning_multi_accelerator.py).
## Customizing EVA

View File

@ -21,8 +21,7 @@ from utils import DataCollator, TokenizerMetaMath
from peft import EvaConfig, LoraConfig, get_peft_model, initialize_lora_eva_weights
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
# config
model_name = "meta-llama/Llama-3.1-8B"
@ -69,7 +68,7 @@ peft_config = LoraConfig(
r=rank, lora_alpha=alpha, target_modules=target_modules, init_lora_weights="eva", eva_config=eva_config
)
# move model to GPU
# move model to accelerator
model = model.to(DEVICE)
# to optimize memory usage during eva initialization, set low_cpu_mem_usage=True

View File

@ -50,6 +50,11 @@ if torch.cuda.is_available():
torch.cuda.set_device(local_rank)
dist.init_process_group("nccl")
world_size = dist.get_world_size()
elif torch.xpu.is_available():
local_rank = int(os.environ.get("LOCAL_RANK", -1))
torch.xpu.set_device(local_rank)
dist.init_process_group("xccl")
world_size = dist.get_world_size()
else:
local_rank = -1
world_size = 1