ENH Support XPU for CPT, EVA, GPU offload (#2694)

--------- Signed-off-by: Yao, Matrix <matrix.yao@intel.com>
2025-10-20 15:33:48 +08:00 · 2025-08-05 02:43:53 -07:00
parent daee6367aa
commit 86feb8c4f9
5 changed files with 16 additions and 11 deletions
--- a/examples/cpt_finetuning/cpt_train_and_inference.ipynb
+++ b/examples/cpt_finetuning/cpt_train_and_inference.ipynb
@ -1129,7 +1129,7 @@
        "# Convert the test dataset to a CPT-compatible format\n",
        "cpt_test_dataset = CPTDataset(test_dataset, tokenizer, templates)\n",
        "\n",
-        "# Get the device where the model is loaded (CPU or GPU)\n",
+        "# Get the device where the model is loaded (CPU, GPU or XPU)\n",
        "device = model.device\n",
        "list_bool_predictions = []\n",
        "\n",
@ -1552,4 +1552,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 5
-}
+}
--- a/examples/ephemeral_gpu_offloading/load_with_dora.py
+++ b/examples/ephemeral_gpu_offloading/load_with_dora.py
@ -59,7 +59,7 @@ def main():
    )
    parser.add_argument("--ephemeral_gpu_offload", action="store_true", help="Use ephemeral GPU offloading")
    parser.add_argument(
-        "--merge_model_path", type="str", help="Merge the model with the DoRA model and save to the given path"
+        "--merge_model_path", type=str, help="Merge the model with the DoRA model and save to the given path"
    )
    args = parser.parse_args()

--- a/examples/eva_finetuning/README.md
+++ b/examples/eva_finetuning/README.md
@ -60,8 +60,9 @@ peft_config = LoraConfig(
    eva_config=eva_config
 )

-# move model to GPU
-model = model.cuda()
+# move model to accelerator
+device = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"
+model = model.to(device)

 # to optimize memory usage during EVA initialization, set low_cpu_mem_usage=True
 peft_model = get_peft_model(model, peft_config, low_cpu_mem_usage=True)
@ -90,7 +91,7 @@ In some cases you might just want to get the state_dict after EVA initialization
 - you want to precompute and store the state_dict for different downstream tasks.
 - you need to quantize the model for finetuning but want to perform EVA initialization with model weights in full/half precision.
 - you do not intend to use a peft model for LoRA finetuning.
- you would like to leverage multiple GPUs for EVA initialization. (At the moment this is not directly supported by `initialize_lora_eva_weights`)
+- you would like to leverage multiple accelerators for EVA initialization. (At the moment this is not directly supported by `initialize_lora_eva_weights`)

 You can do this by calling `get_eva_state_dict` directly (you only need to pass `peft_config` if `model` is not a PeftModel):
 ```python
@ -103,9 +104,9 @@ Later you can load the state_dict into a `PeftModel` by using the `eva_state_dic
 initialize_lora_eva_weights(peft_model, eva_state_dict=eva_state_dict)
 ```

-## Leveraging multiple GPUs
+## Leveraging multiple accelerators

-EVA initialization can be parallelized across multiple GPUs. In this case inputs from multiple GPUs are gathered before computing the SVD for the batch. This requires that the model is wrapped in a `torch.nn.DataParallel` or `torch.nn.DistributedDataParallel` class. An example of how to use this can be found in [eva_finetuning_multi_gpu.py](https://github.com/huggingface/peft/blob/main/examples/eva_finetuning/eva_finetuning_multi_gpu.py).
+EVA initialization can be parallelized across multiple accelerators. In this case inputs from multiple accelerators are gathered before computing the SVD for the batch. This requires that the model is wrapped in a `torch.nn.DataParallel` or `torch.nn.DistributedDataParallel` class. An example of how to use this can be found in [eva_finetuning_multi_accelerator.py](https://github.com/huggingface/peft/blob/main/examples/eva_finetuning/eva_finetuning_multi_accelerator.py).

 ## Customizing EVA

--- a/examples/eva_finetuning/eva_finetuning.py
+++ b/examples/eva_finetuning/eva_finetuning.py
@ -21,8 +21,7 @@ from utils import DataCollator, TokenizerMetaMath
 from peft import EvaConfig, LoraConfig, get_peft_model, initialize_lora_eva_weights


-DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
-
+DEVICE = torch.accelerator.current_accelerator().type if hasattr(torch, "accelerator") else "cuda"

 # config
 model_name = "meta-llama/Llama-3.1-8B"
@ -69,7 +68,7 @@ peft_config = LoraConfig(
    r=rank, lora_alpha=alpha, target_modules=target_modules, init_lora_weights="eva", eva_config=eva_config
 )

-# move model to GPU
+# move model to accelerator
 model = model.to(DEVICE)

 # to optimize memory usage during eva initialization, set low_cpu_mem_usage=True
--- a/examples/eva_finetuning/eva_finetuning_multi_accelerator.py
+++ b/examples/eva_finetuning/eva_finetuning_multi_accelerator.py
@ -50,6 +50,11 @@ if torch.cuda.is_available():
    torch.cuda.set_device(local_rank)
    dist.init_process_group("nccl")
    world_size = dist.get_world_size()
+elif torch.xpu.is_available():
+    local_rank = int(os.environ.get("LOCAL_RANK", -1))
+    torch.xpu.set_device(local_rank)
+    dist.init_process_group("xccl")
+    world_size = dist.get_world_size()
 else:
    local_rank = -1
    world_size = 1