mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
* Abstract accelerator (step 2) * more flex op_builder path for both installation and runtime * add SpatialInferenceBuilder into cuda_accelerator.py * use reflection to make cuda_accelerator adapt to CUDA op builder change automatically * clean up deepspeed/__init__.py * add comments in cuda_accelerator for no torch path * Update deepspeed/env_report.py Change env_report.py according to suggestion Co-authored-by: Michael Wyatt <mrwyattii@gmail.com> * reduce the range of try...except for better code clarity * Add porting for deepspeed/ops/random_ltd/dropping_utils.py * move accelerator to top directory and create symlink under deepspeed Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: Michael Wyatt <mrwyattii@gmail.com> Co-authored-by: Jeff Rasley <jerasley@microsoft.com>
144 lines
4.9 KiB
Python
144 lines
4.9 KiB
Python
import torch
|
|
import deepspeed
|
|
import subprocess
|
|
import argparse
|
|
from .ops.op_builder.all_ops import ALL_OPS
|
|
from .git_version_info import installed_ops, torch_info
|
|
from deepspeed.accelerator import get_accelerator
|
|
|
|
GREEN = '\033[92m'
|
|
RED = '\033[91m'
|
|
YELLOW = '\033[93m'
|
|
END = '\033[0m'
|
|
SUCCESS = f"{GREEN} [SUCCESS] {END}"
|
|
OKAY = f"{GREEN}[OKAY]{END}"
|
|
WARNING = f"{YELLOW}[WARNING]{END}"
|
|
FAIL = f'{RED}[FAIL]{END}'
|
|
INFO = '[INFO]'
|
|
|
|
color_len = len(GREEN) + len(END)
|
|
okay = f"{GREEN}[OKAY]{END}"
|
|
warning = f"{YELLOW}[WARNING]{END}"
|
|
|
|
|
|
def op_report(verbose=True):
|
|
max_dots = 23
|
|
max_dots2 = 11
|
|
h = ["op name", "installed", "compatible"]
|
|
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
|
|
print("DeepSpeed C++/CUDA extension op report")
|
|
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
|
|
|
|
print("NOTE: Ops not installed will be just-in-time (JIT) compiled at\n"
|
|
" runtime if needed. Op compatibility means that your system\n"
|
|
" meet the required dependencies to JIT install the op.")
|
|
|
|
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
|
|
print("JIT compiled ops requires ninja")
|
|
ninja_status = OKAY if ninja_installed() else FAIL
|
|
print('ninja', "." * (max_dots - 5), ninja_status)
|
|
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
|
|
print(h[0], "." * (max_dots - len(h[0])), h[1], "." * (max_dots2 - len(h[1])), h[2])
|
|
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
|
|
installed = f"{GREEN}[YES]{END}"
|
|
no = f"{YELLOW}[NO]{END}"
|
|
for op_name, builder in ALL_OPS.items():
|
|
dots = "." * (max_dots - len(op_name))
|
|
is_compatible = OKAY if builder.is_compatible(verbose) else no
|
|
is_installed = installed if installed_ops[op_name] else no
|
|
dots2 = '.' * ((len(h[1]) + (max_dots2 - len(h[1]))) -
|
|
(len(is_installed) - color_len))
|
|
print(op_name, dots, is_installed, dots2, is_compatible)
|
|
print("-" * (max_dots + max_dots2 + len(h[0]) + len(h[1])))
|
|
|
|
|
|
def ninja_installed():
|
|
try:
|
|
import ninja # noqa: F401
|
|
except ImportError:
|
|
return False
|
|
return True
|
|
|
|
|
|
def nvcc_version():
|
|
import torch.utils.cpp_extension
|
|
cuda_home = torch.utils.cpp_extension.CUDA_HOME
|
|
if cuda_home is None:
|
|
return f"{RED} [FAIL] cannot find CUDA_HOME via torch.utils.cpp_extension.CUDA_HOME={torch.utils.cpp_extension.CUDA_HOME} {END}"
|
|
try:
|
|
output = subprocess.check_output([cuda_home + "/bin/nvcc",
|
|
"-V"],
|
|
universal_newlines=True)
|
|
except FileNotFoundError:
|
|
return f"{RED} [FAIL] nvcc missing {END}"
|
|
output_split = output.split()
|
|
release_idx = output_split.index("release")
|
|
release = output_split[release_idx + 1].replace(',', '').split(".")
|
|
return ".".join(release)
|
|
|
|
|
|
def debug_report():
|
|
max_dots = 33
|
|
|
|
report = [
|
|
("torch install path",
|
|
torch.__path__),
|
|
("torch version",
|
|
torch.__version__),
|
|
("deepspeed install path",
|
|
deepspeed.__path__),
|
|
("deepspeed info",
|
|
f"{deepspeed.__version__}, {deepspeed.__git_hash__}, {deepspeed.__git_branch__}"
|
|
)
|
|
]
|
|
if get_accelerator().device_name() == 'cuda':
|
|
hip_version = getattr(torch.version, "hip", None)
|
|
report.extend([("torch cuda version",
|
|
torch.version.cuda),
|
|
("torch hip version",
|
|
hip_version),
|
|
("nvcc version",
|
|
(None if hip_version else nvcc_version())),
|
|
("deepspeed wheel compiled w.",
|
|
f"torch {torch_info['version']}, " +
|
|
(f"hip {torch_info['hip_version']}"
|
|
if hip_version else f"cuda {torch_info['cuda_version']}"))])
|
|
else:
|
|
report.extend([("deepspeed wheel compiled w.",
|
|
f"torch {torch_info['version']} ")])
|
|
|
|
print("DeepSpeed general environment info:")
|
|
for name, value in report:
|
|
print(name, "." * (max_dots - len(name)), value)
|
|
|
|
|
|
def parse_arguments():
|
|
parser = argparse.ArgumentParser()
|
|
parser.add_argument(
|
|
'--hide_operator_status',
|
|
action='store_true',
|
|
help=
|
|
'Suppress display of installation and compatibility statuses of DeepSpeed operators. '
|
|
)
|
|
parser.add_argument('--hide_errors_and_warnings',
|
|
action='store_true',
|
|
help='Suppress warning and error messages.')
|
|
args = parser.parse_args()
|
|
return args
|
|
|
|
|
|
def main(hide_operator_status=False, hide_errors_and_warnings=False):
|
|
if not hide_operator_status:
|
|
op_report(verbose=not hide_errors_and_warnings)
|
|
debug_report()
|
|
|
|
|
|
def cli_main():
|
|
args = parse_arguments()
|
|
main(hide_operator_status=args.hide_operator_status,
|
|
hide_errors_and_warnings=args.hide_errors_and_warnings)
|
|
|
|
|
|
if __name__ == "__main__":
|
|
main()
|