Files
DeepSpeed/deepspeed/inference/v2/config_v2.py
Michael Wyatt 0a4457cc48 Pydantic v2 migration (#5167)
Pydantic v2 has been out for some time now. We have been relying on
using the v1 API available in v2 until now. This is a refresh of #3902
to bring proper v2 support to DeepSpeed.

Corresponding DeepSpeed-MII PR
[here](https://github.com/microsoft/DeepSpeed-MII/pull/423).

@loadams

---------

Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
Co-authored-by: Logan Adams <loadams@microsoft.com>
Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com>
Co-authored-by: Abhishek Kulkarni <11399+adk9@users.noreply.github.com>
Co-authored-by: Abhishek Kulkarni <abkulkarni@microsoft.com>
Co-authored-by: Lev Kurilenko <113481193+lekurile@users.noreply.github.com>
2024-08-22 15:38:13 -07:00

45 lines
1.4 KiB
Python

# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from pydantic import Field
from typing import Optional
from deepspeed.runtime.config_utils import DeepSpeedConfigModel
from .ragged import DSStateManagerConfig
class DeepSpeedTPConfig(DeepSpeedConfigModel):
""" Configure tensor parallelism settings """
tp_size: int = 1
""" Number of devices to split the model across using tensor parallelism. """
class QuantizationConfig(DeepSpeedConfigModel):
""" Configure tensor parallelism settings """
quantization_mode: Optional[str] = None
""" The quantization mode in string format. The supported modes are as follows:
- 'wf6af16', weight-only quantization with FP6 weight and FP16 activation.
"""
# TODO: may reuse the constants in deepspeed/compression/constants.py
class RaggedInferenceEngineConfig(DeepSpeedConfigModel):
""" Sets parameters for DeepSpeed Inference Engine. """
tensor_parallel: DeepSpeedTPConfig = Field({}, alias="tp")
"""
Configuration for tensor parallelism used to split the model across several
GPUs. Expects a dictionary containing values for :any:`DeepSpeedTPConfig`.
"""
state_manager: DSStateManagerConfig = Field({}, alias="manager")
"""
Configuration for managing persistent state
"""
quantization: QuantizationConfig = {}