mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 23:53:48 +08:00
This is an WIP PR that make op builder detection adapt to accelerator change. This is followup of https://github.com/microsoft/DeepSpeed/issues/5173 Currently, DeepSpeed generate `installed_ops` and `compatible_ops` at setup time. If the system change to a different accelerator at DeepSpeed launch time, these two list would contain incorrect information. This PR intend to solve this problem with more flexity ops detection. * For `installed_ops`, DeepSpeed should disable all installed ops if accelerator detected at setup time is different from launch time. * For `compatible_ops`, DeepSpeed should refresh the list for each launch to avoid impact of accelerator change. In the first step, nv-inference workflow is temporary change to emulate the scenario that the system is setup with CPU_Accelerator, then launch with CUDA_Accelerator. And CPU_Accelerator is modified to make Intel Extension for PyTorch and oneCCL binding for PyTorch not mandatory. Starting from here we can reconstruct installed_ops and compatible_ops to follow the design above. --------- Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
34 lines
1.2 KiB
Python
34 lines
1.2 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# DeepSpeed Team
|
|
|
|
import os
|
|
import pkgutil
|
|
import importlib
|
|
try:
|
|
# during installation time accelerator is visible, otherwise return deepspeed.accelerator
|
|
from accelerator import get_accelerator
|
|
except ImportError:
|
|
from deepspeed.accelerator import get_accelerator
|
|
|
|
# List of all available ops
|
|
|
|
# reflect all builder names into __op_builders__
|
|
op_builder_dir = get_accelerator().op_builder_dir()
|
|
op_builder_module = importlib.import_module(op_builder_dir)
|
|
__op_builders__ = []
|
|
|
|
for _, module_name, _ in pkgutil.iter_modules([os.path.dirname(op_builder_module.__file__)]):
|
|
# avoid self references
|
|
if module_name != 'all_ops' and module_name != 'builder':
|
|
module = importlib.import_module("{}.{}".format(op_builder_dir, module_name))
|
|
for member_name in module.__dir__():
|
|
if member_name.endswith('Builder'):
|
|
# append builder to __op_builders__ list
|
|
builder = get_accelerator().create_op_builder(member_name)
|
|
__op_builders__.append(builder)
|
|
|
|
ALL_OPS = {op.name: op for op in __op_builders__ if op is not None}
|
|
accelerator_name = get_accelerator()._name
|