mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 23:53:48 +08:00
This is an WIP PR that make op builder detection adapt to accelerator change. This is followup of https://github.com/microsoft/DeepSpeed/issues/5173 Currently, DeepSpeed generate `installed_ops` and `compatible_ops` at setup time. If the system change to a different accelerator at DeepSpeed launch time, these two list would contain incorrect information. This PR intend to solve this problem with more flexity ops detection. * For `installed_ops`, DeepSpeed should disable all installed ops if accelerator detected at setup time is different from launch time. * For `compatible_ops`, DeepSpeed should refresh the list for each launch to avoid impact of accelerator change. In the first step, nv-inference workflow is temporary change to emulate the scenario that the system is setup with CPU_Accelerator, then launch with CUDA_Accelerator. And CPU_Accelerator is modified to make Intel Extension for PyTorch and oneCCL binding for PyTorch not mandatory. Starting from here we can reconstruct installed_ops and compatible_ops to follow the design above. --------- Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com> Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
44 lines
1.5 KiB
Python
44 lines
1.5 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# DeepSpeed Team
|
|
|
|
# Copyright (c) 2023, 2023, Oracle and/or its affiliates.
|
|
|
|
import os
|
|
import torch
|
|
import pytest
|
|
from unit.common import DistributedTest
|
|
import deepspeed
|
|
from deepspeed.accelerator import get_accelerator
|
|
|
|
|
|
class TestDequantization(DistributedTest):
|
|
|
|
def init(self):
|
|
local_rank = int(os.getenv("LOCAL_RANK", "0"))
|
|
self.device = torch.device(get_accelerator().device_name(local_rank))
|
|
|
|
from deepspeed.ops.op_builder import InferenceBuilder
|
|
if not deepspeed.ops.__compatible_ops__[InferenceBuilder.NAME]:
|
|
pytest.skip("InferenceBuilder is not implemented")
|
|
else:
|
|
self.dequantize_func = InferenceBuilder().load().dequantize_fp16
|
|
|
|
def run_dequantize_test(self, M, N, num_groups):
|
|
weight = torch.randint(-255, 255, (M, N)).to(dtype=torch.int8, device=self.device)
|
|
scale = torch.rand(num_groups, 1).to(device=self.device)
|
|
|
|
weight_deq = (weight.reshape(num_groups, -1) * scale).reshape(M, N).to(torch.float16).contiguous()
|
|
weight_deq_backend = self.dequantize_func(weight, scale, num_groups)
|
|
|
|
assert torch.allclose(weight_deq, weight_deq_backend)
|
|
|
|
def test_dequantize(self):
|
|
self.init()
|
|
|
|
self.run_dequantize_test(14336, 7168, 32)
|
|
self.run_dequantize_test(14336, 1792, 32)
|
|
self.run_dequantize_test(768, 768, 32)
|
|
self.run_dequantize_test(768, 768, 48)
|