remove use_xpu to fix ut issues, we don't need this since XPU is OOB … (#3460)

* remove use_xpu to fix ut issues, we don't need this since XPU is OOB supported now Signed-off-by: Yao, Matrix <matrix.yao@intel.com> * fix style Signed-off-by: Yao, Matrix <matrix.yao@intel.com> * add deprecate warnings Signed-off-by: YAO Matrix <matrix.yao@intel.com> * fix Signed-off-by: YAO Matrix <matrix.yao@intel.com> --------- Signed-off-by: Yao, Matrix <matrix.yao@intel.com> Signed-off-by: YAO Matrix <matrix.yao@intel.com>
2025-10-20 10:03:46 +08:00 · 2025-04-01 17:55:37 +08:00
parent 531643436e
commit 67a768be07
7 changed files with 25 additions and 46 deletions
--- a/docs/source/package_reference/cli.md
+++ b/docs/source/package_reference/cli.md
@ -158,7 +158,7 @@ The following arguments are useful for selecting which training paradigm to use.
 * `--use_deepspeed` (`bool`) -- Whether or not to use DeepSpeed for training.
 * `--use_fsdp` (`bool`) -- Whether or not to use FullyShardedDataParallel for training.
 * `--use_megatron_lm` (`bool`) -- Whether or not to use Megatron-LM for training.
-* `--use_xpu` (`bool`) -- Whether to use IPEX plugin to speed up training on XPU specifically.
+* `--use_xpu` (`bool`) -- Whether to use IPEX plugin to speed up training on XPU specifically. **This argument is deprecated and ignored, will be removed in Accelerate v1.20**

 **Distributed GPU Arguments**:

--- a/src/accelerate/accelerator.py
+++ b/src/accelerate/accelerator.py
@ -1430,10 +1430,8 @@ class Accelerator:
                            param_group["params"][i].data_ptr = p.data_ptr()

        if self.distributed_type in [DistributedType.MULTI_CPU, DistributedType.MULTI_XPU, DistributedType.NO]:
-            if self.device.type == "cpu" and self.state.use_ipex:
-                args = self._prepare_ipex_or_xpu(*args)
-            elif self.device.type == "xpu" and is_xpu_available():
-                args = self._prepare_ipex_or_xpu(*args)
+            if (self.device.type == "cpu" or self.device.type == "xpu") and self.state.use_ipex:
+                args = self._prepare_ipex(*args)
        if self.fp8_backend == "TE":
            args = self._prepare_te(*args)
        elif self.fp8_backend == "AO":
@ -2178,11 +2176,11 @@ class Accelerator:

        return tuple(result)

-    def _prepare_ipex_or_xpu(self, *args):
+    def _prepare_ipex(self, *args):
        """
-        Prepares model and optimizer for training with IPEX or XPU acceleration. This covers 3 cases, IPEX compiled
-        with CPU only support, IPEX compiled with XPU support and training with XPU pytorch backend available in stock
-        pytorch starting from version 2.4.
+        Prepares model and optimizer for training with IPEX on CPU/XPU. This covers 3 cases, IPEX compiled with CPU
+        only support, IPEX compiled with XPU support and training with XPU pytorch backend available in stock pytorch
+        starting from version 2.4.
        """
        if self.state.use_ipex:
            if not is_ipex_available():
--- a/src/accelerate/commands/config/cluster.py
+++ b/src/accelerate/commands/config/cluster.py
@ -138,13 +138,15 @@ def get_cluster_input():

    ipex_config = {}
    mpirun_config = {}
-    if use_cpu:
+    if use_cpu or is_xpu_available():
        ipex_config["ipex"] = _ask_field(
-            "Do you want to use Intel PyTorch Extension (IPEX) to speed up training on CPU? [yes/NO]:",
+            "Do you want to use Intel PyTorch Extension (IPEX) to speed up training on CPU/XPU? [yes/NO]:",
            _convert_yes_no_to_bool,
            default=False,
            error_message="Please enter yes or no.",
        )
+
+    if use_cpu:
        if distributed_type == DistributedType.MULTI_CPU:
            use_mpirun = _ask_field(
                "Do you want accelerate to launch mpirun? [yes/NO]: ",
@ -160,25 +162,6 @@ def get_cluster_input():
                )
                mpirun_config["mpirun_hostfile"] = os.path.expanduser(mpirun_hostfile.strip())
                mpirun_config["mpirun_ccl"] = _ask_field("Enter the number of oneCCL worker threads [1]: ", default=1)
-    if (
-        not use_cpu
-        and is_xpu_available()
-        and distributed_type
-        not in [
-            DistributedType.MULTI_GPU,
-            DistributedType.MULTI_NPU,
-            DistributedType.MULTI_MLU,
-            DistributedType.MULTI_SDAA,
-            DistributedType.XLA,
-            DistributedType.MULTI_MUSA,
-        ]
-    ):
-        ipex_config["use_xpu"] = _ask_field(
-            "Do you want to use XPU plugin to speed up training on XPU? [yes/NO]:",
-            _convert_yes_no_to_bool,
-            default=False,
-            error_message="Please enter yes or no.",
-        )

    dynamo_config = {}
    use_dynamo = _ask_field(
--- a/src/accelerate/commands/config/default.py
+++ b/src/accelerate/commands/config/default.py
@ -33,7 +33,7 @@ from .config_utils import SubcommandHelpFormatter
 description = "Create a default config file for Accelerate with only a few flags set."


-def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file, use_xpu: bool = False):
+def write_basic_config(mixed_precision="no", save_location: str = default_json_config_file):
    """
    Creates and saves a basic cluster config to be used on a local machine with potentially multiple GPUs. Will also
    set CPU if it is a CPU-only machine.
@ -45,8 +45,6 @@ def write_basic_config(mixed_precision="no", save_location: str = default_json_c
            Optional custom save location. Should be passed to `--config_file` when using `accelerate launch`. Default
            location is inside the huggingface cache folder (`~/.cache/huggingface`) but can be overriden by setting
            the `HF_HOME` environmental variable, followed by `accelerate/default_config.yaml`.
-        use_xpu (`bool`, *optional*, defaults to `False`):
-            Whether to use XPU if available.
    """
    path = Path(save_location)
    path.parent.mkdir(parents=True, exist_ok=True)
@ -104,7 +102,7 @@ def write_basic_config(mixed_precision="no", save_location: str = default_json_c
            config["distributed_type"] = "MULTI_GPU"
        else:
            config["distributed_type"] = "NO"
-    elif is_xpu_available() and use_xpu:
+    elif is_xpu_available():
        num_xpus = torch.xpu.device_count()
        config["num_processes"] = num_xpus
        config["use_cpu"] = False
--- a/src/accelerate/commands/launch.py
+++ b/src/accelerate/commands/launch.py
@ -276,11 +276,12 @@ def launch_command_parser(subparsers=None):
        action="store_true",
        help="Whether to use Megatron-LM.",
    )
+
    paradigm_args.add_argument(
        "--use_xpu",
-        default=False,
+        default=None,
        action="store_true",
-        help="Whether to use IPEX plugin to speed up training on XPU specifically.",
+        help="Whether to use IPEX plugin to speed up training on XPU specifically. This argument is deprecated and ignored, will be removed in Accelerate v1.20.",
    )

    # distributed GPU training arguments
@ -1086,10 +1087,7 @@ def _validate_launch_command(args):
                args.mixed_precision = defaults.mixed_precision
                mp_from_config_flag = True
        else:
-            if args.use_cpu or (args.use_xpu and torch.xpu.is_available()):
-                native_amp = True
-            else:
-                native_amp = is_bf16_available(True)
+            native_amp = is_bf16_available(True)
            if (
                args.mixed_precision == "bf16"
                and not native_amp
@ -1104,7 +1102,7 @@ def _validate_launch_command(args):
            raise ValueError("You need to manually pass in `--num_processes` using this config yaml.")
    else:
        if args.num_processes is None:
-            if args.use_xpu and is_xpu_available():
+            if is_xpu_available():
                args.num_processes = torch.xpu.device_count()
            elif is_mlu_available():
                args.num_processes = torch.mlu.device_count()
@ -1170,6 +1168,13 @@ def _validate_launch_command(args):
                    f"\t`--num_cpu_threads_per_process` was set to `{args.num_cpu_threads_per_process}` to improve out-of-box performance when training on CPUs"
                )

+    if args.use_xpu is not None:
+        logger.warning(
+            "use_xpu is deprecated and ignored, will be removed in Accelerate v1.20. "
+            "XPU is a PyTorch native citizen now, we don't need extra argument to enable it any more.",
+            FutureWarning,
+        )
+
    if any(warned):
        message = "The following values were not passed to `accelerate launch` and had defaults used instead:\n"
        message += "\n".join(warned)
--- a/src/accelerate/utils/imports.py
+++ b/src/accelerate/utils/imports.py
@ -465,10 +465,6 @@ def is_xpu_available(check_device=False):
    potentially if a XPU is in the environment
    """

-    "check if user disables it explicitly"
-    if not parse_flag_from_env("ACCELERATE_USE_XPU", default=True):
-        return False
-
    if is_ipex_available():
        import intel_extension_for_pytorch  # noqa: F401
    else:
--- a/src/accelerate/utils/launch.py
+++ b/src/accelerate/utils/launch.py
@ -186,7 +186,6 @@ def prepare_simple_launcher_cmd_env(args: argparse.Namespace) -> tuple[list[str]
    current_env["OMP_NUM_THREADS"] = str(args.num_cpu_threads_per_process)
    if is_ipex_available():
        current_env["ACCELERATE_USE_IPEX"] = str(args.ipex).lower()
-        current_env["ACCELERATE_USE_XPU"] = str(args.use_xpu).lower()
    if args.enable_cpu_affinity:
        current_env["ACCELERATE_CPU_AFFINITY"] = "1"
    return cmd, current_env