mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-21 08:43:50 +08:00
This is an WIP PR that make op builder detection adapt to accelerator change. This is followup of https://github.com/microsoft/DeepSpeed/issues/5173 Currently, DeepSpeed generate `installed_ops` and `compatible_ops` at setup time. If the system change to a different accelerator at DeepSpeed launch time, these two list would contain incorrect information. This PR intend to solve this problem with more flexity ops detection. * For `installed_ops`, DeepSpeed should disable all installed ops if accelerator detected at setup time is different from launch time. * For `compatible_ops`, DeepSpeed should refresh the list for each launch to avoid impact of accelerator change. In the first step, nv-inference workflow is temporary change to emulate the scenario that the system is setup with CPU_Accelerator, then launch with CUDA_Accelerator. And CPU_Accelerator is modified to make Intel Extension for PyTorch and oneCCL binding for PyTorch not mandatory. Starting from here we can reconstruct installed_ops and compatible_ops to follow the design above. --------- Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
32 lines
1.0 KiB
Python
32 lines
1.0 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# DeepSpeed Team
|
|
|
|
try:
|
|
# This is populated by setup.py
|
|
from .git_version_info_installed import * # noqa: F401 # type: ignore
|
|
except ModuleNotFoundError:
|
|
import os
|
|
if os.path.isfile('version.txt'):
|
|
# Will be missing from checkouts that haven't been installed (e.g., readthedocs)
|
|
version = open('version.txt', 'r').read().strip()
|
|
else:
|
|
version = "0.0.0"
|
|
git_hash = '[none]'
|
|
git_branch = '[none]'
|
|
|
|
from .ops.op_builder.all_ops import ALL_OPS
|
|
installed_ops = dict.fromkeys(ALL_OPS.keys(), False)
|
|
accelerator_name = ""
|
|
torch_info = {'version': "0.0", "cuda_version": "0.0", "hip_version": "0.0"}
|
|
|
|
# compatible_ops list is recreated for each launch
|
|
from .ops.op_builder.all_ops import ALL_OPS
|
|
|
|
compatible_ops = dict.fromkeys(ALL_OPS.keys(), False)
|
|
for op_name, builder in ALL_OPS.items():
|
|
op_compatible = builder.is_compatible()
|
|
compatible_ops[op_name] = op_compatible
|
|
compatible_ops["deepspeed_not_implemented"] = False
|