Files
DeepSpeed/op_builder/transformer_inference.py
Fabien Dupont f4caed6d4f Allow NVIDIA Blackwell (#6991)
NVIDIA Blackwell GPU generation has number 10. The SM code and
architecture should be `100`, but the current code generates `1.`,
because it expects a 2 characters string.

This change modifies the logic to consider it as a string that contains
a `.`, hence splits the string and uses the array of strings.

Signed-off-by: Fabien Dupont <fdupont@redhat.com>
2025-02-04 17:47:44 +00:00

78 lines
2.8 KiB
Python
Executable File

# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from .builder import CUDAOpBuilder, installed_cuda_version
class InferenceBuilder(CUDAOpBuilder):
BUILD_VAR = "DS_BUILD_TRANSFORMER_INFERENCE"
NAME = "transformer_inference"
def __init__(self, name=None):
name = self.NAME if name is None else name
super().__init__(name=name)
def absolute_name(self):
return f'deepspeed.ops.transformer.inference.{self.NAME}_op'
def is_compatible(self, verbose=False):
try:
import torch
except ImportError:
if verbose:
self.warning("Please install torch if trying to pre-compile inference kernels")
return False
cuda_okay = True
if not self.is_rocm_pytorch() and torch.cuda.is_available():
sys_cuda_major, _ = installed_cuda_version()
torch_cuda_major = int(torch.version.cuda.split('.')[0])
cuda_capability = torch.cuda.get_device_properties(0).major
if cuda_capability < 6:
if verbose:
self.warning("NVIDIA Inference is only supported on Pascal and newer architectures")
cuda_okay = False
if cuda_capability >= 8:
if torch_cuda_major < 11 or sys_cuda_major < 11:
if verbose:
self.warning("On Ampere and higher architectures please use CUDA 11+")
cuda_okay = False
return super().is_compatible(verbose) and cuda_okay
def filter_ccs(self, ccs):
ccs_retained = []
ccs_pruned = []
for cc in [cc.split('.') for cc in ccs]:
if int(cc[0]) >= 6:
ccs_retained.append(cc)
else:
ccs_pruned.append(cc)
if len(ccs_pruned) > 0:
self.warning(f"Filtered compute capabilities {ccs_pruned}")
return ccs_retained
def sources(self):
return [
'csrc/transformer/inference/csrc/pt_binding.cpp',
'csrc/transformer/inference/csrc/gelu.cu',
'csrc/transformer/inference/csrc/relu.cu',
'csrc/transformer/inference/csrc/layer_norm.cu',
'csrc/transformer/inference/csrc/rms_norm.cu',
'csrc/transformer/inference/csrc/softmax.cu',
'csrc/transformer/inference/csrc/dequantize.cu',
'csrc/transformer/inference/csrc/apply_rotary_pos_emb.cu',
'csrc/transformer/inference/csrc/transform.cu',
'csrc/transformer/inference/csrc/pointwise_ops.cu',
]
def extra_ldflags(self):
if not self.is_rocm_pytorch():
return ['-lcurand']
else:
return []
def include_paths(self):
return ['csrc/transformer/inference/includes', 'csrc/includes']