mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 15:33:51 +08:00
Signed-off-by: Tian, Feng <feng.tian@intel.com> Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com>
189 lines
5.4 KiB
Python
189 lines
5.4 KiB
Python
# Copyright (c) Microsoft Corporation.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# DeepSpeed Team
|
|
|
|
#########################################
|
|
# Compression Methods
|
|
# It has several sub-components
|
|
# #########################################
|
|
COMPRESSION_TRAINING = "compression_training"
|
|
SHARED_PARAMETERS = "shared_parameters"
|
|
DIFFERENT_GROUPS = "different_groups"
|
|
TECHNIQUE_ENABLED = "enabled"
|
|
TECHNIQUE_SCHEDULE_OFFSET = "schedule_offset"
|
|
TECHNIQUE_SCHEDULE_OFFSET_END = "schedule_offset_end"
|
|
DIFFERENT_GROUPS_PARAMETERS = "params"
|
|
DIFFERENT_GROUPS_MODULE_SCOPE = "modules"
|
|
DIFFERENT_GROUPS_MODULE_SCOPE_DEFAULT = "*"
|
|
DIFFERENT_GROUPS_RELATED_MODULE_SCOPE = "related_modules"
|
|
DIFFERENT_GROUPS_RELATED_MODULE_SCOPE_DEFAULT = None
|
|
# COMPRESSION_TRAINING_ENABLED = "enabled"
|
|
# COMPRESSION_TRAINING_ENABLED_DEFAULT = False
|
|
|
|
####
|
|
# Layer Reduction
|
|
####
|
|
LAYER_REDUCTION = "layer_reduction"
|
|
LAYER_REDUCTION_ENABLED = "enabled"
|
|
LAYER_REDUCTION_ENABLED_DEFAULT = False
|
|
KEEP_NUMBER_LAYER = "keep_number_layer"
|
|
MODULE_NAME_PREFIX = "module_name_prefix"
|
|
TEACHER_LAYER = "teacher_layer"
|
|
OTHER_MODULE_NAME = "other_module_name"
|
|
|
|
####
|
|
# Weight Quantization
|
|
####
|
|
WEIGHT_QUANTIZATION = "weight_quantization"
|
|
|
|
WEIGHT_QUANTIZATION_PERIOD = "quantization_period"
|
|
WEIGHT_QUANTIZATION_PERIOD_DEFAULT = 1
|
|
|
|
WEIGHT_QUANTIZE_IN_FORWARD_ENABLED = "quantize_weight_in_forward"
|
|
WEIGHT_QUANTIZE_IN_FORWARD_ENABLED_DEFAULT = False
|
|
|
|
WEIGHT_QUANTIZE_ENABLED = TECHNIQUE_ENABLED
|
|
WEIGHT_QUANTIZE_ENABLED_DEFAULT = False
|
|
|
|
WEIGHT_QUANTIZE_KERNEL = "quantizer_kernel"
|
|
WEIGHT_QUANTIZE_KERNEL_DEFAULT = False
|
|
|
|
WEIGHT_QUANTIZE_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET
|
|
WEIGHT_QUANTIZE_SCHEDULE_OFFSET_DEFAULT = 0
|
|
|
|
WEIGHT_QUANTIZE_GROUPS = "quantize_groups"
|
|
WEIGHT_QUANTIZE_GROUPS_DEFAULT = 1
|
|
|
|
WEIGHT_QUANTIZE_VERBOSE = "quantize_verbose"
|
|
WEIGHT_QUANTIZE_VERBOSE_DEFAULT = False
|
|
|
|
WEIGHT_QUANTIZE_TYPE = "quantization_type"
|
|
WEIGHT_QUANTIZE_TYPE_DEFAULT = "symmetric"
|
|
WEIGHT_QUANTIZE_SYMMETRIC = "symmetric"
|
|
WEIGHT_QUANTIZE_ASYMMETRIC = "asymmetric"
|
|
|
|
WEIGHT_QUANTIZE_ROUNDING = "rounding"
|
|
WEIGHT_QUANTIZE_ROUNDING_DEFAULT = "nearest"
|
|
WEIGHT_QUANTIZE_STOCHASTIC_ROUNDING = "stochastic"
|
|
WEIGHT_QUANTIZE_NEAREST_ROUNDING = "nearest"
|
|
# maybe deleted for a cleaner version
|
|
WEIGHT_QUANTIZE_FP16_MIXED_QUANTIZE = "fp16_mixed_quantize"
|
|
|
|
WEIGHT_QUANTIZE_FP16_MIXED_QUANTIZE_ENABLED = "enabled"
|
|
WEIGHT_QUANTIZE_FP16_MIXED_QUANTIZE_ENABLED_DEFAULT = False
|
|
|
|
WEIGHT_QUANTIZE_CHANGE_RATIO = "quantize_change_ratio"
|
|
WEIGHT_QUANTIZE_CHANGE_RATIO_DEFAULT = 0.001
|
|
|
|
WEIGHT_QUANTIZE_START_BITS = "start_bits"
|
|
WEIGHT_QUANTIZE_TARGET_BITS = "target_bits"
|
|
###
|
|
# Activation Quantization
|
|
###
|
|
ACTIVATION_QUANTIZATION = "activation_quantization"
|
|
|
|
ACTIVATION_QUANTIZATION_ENABLED = TECHNIQUE_ENABLED
|
|
ACTIVATION_QUANTIZATION_ENABLED_DEFAULT = False
|
|
|
|
ACTIVATION_QUANTIZE_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET
|
|
ACTIVATION_QUANTIZE_SCHEDULE_OFFSET_DEFAULT = 1000
|
|
|
|
ACTIVATION_QUANTIZE_TYPE = "quantization_type"
|
|
ACTIVATION_QUANTIZE_TYPE_DEFAULT = "symmetric"
|
|
ACTIVATION_QUANTIZE_SYMMETRIC = "symmetric"
|
|
ACTIVATION_QUANTIZE_ASYMMETRIC = "asymmetric"
|
|
|
|
ACTIVATION_QUANTIZE_RANGE = 'range_calibration'
|
|
ACTIVATION_QUANTIZE_RANGE_DEFAULT = 'dynamic'
|
|
ACTIVATION_QUANTIZE_RANGE_STATIC = 'static'
|
|
ACTIVATION_QUANTIZE_RANGE_DYNAMIC = 'dynamic'
|
|
|
|
ACTIVATION_QUANTIZE_BITS = "bits"
|
|
###
|
|
# Sparse Pruning
|
|
###
|
|
SPARSE_PRUNING = "sparse_pruning"
|
|
|
|
SPARSE_PRUNING_ENABLED = TECHNIQUE_ENABLED
|
|
SPARSE_PRUNING_ENABLED_DEFAULT = False
|
|
|
|
SPARSE_PRUNING_METHOD = "method"
|
|
SPARSE_PRUNING_METHOD_DEFAULT = "l1"
|
|
SPARSE_PRUNING_METHOD_L1 = "l1"
|
|
SPARSE_PRUNING_METHOD_TOPK = "topk"
|
|
SPARSE_PRUNING_METHOD_SNIP_MOMENTUM = "snip_momentum"
|
|
|
|
SPARSE_PRUNING_BLOCK_PATTERN = "block_pattern"
|
|
SPARSE_PRUNING_BLOCK_PATTERN_DEFAULT = "4x1"
|
|
|
|
SPARSE_PRUNING_SCHEDULE_OFFSET_STRIDE = "schedule_offset_stride"
|
|
SPARSE_PRUNING_SCHEDULE_OFFSET_STRIDE_DEFAULT = 1
|
|
|
|
SPARSE_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET
|
|
SPARSE_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000
|
|
|
|
SPARSE_PRUNING_SCHEDULE_OFFSET_END = TECHNIQUE_SCHEDULE_OFFSET_END
|
|
SPARSE_PRUNING_SCHEDULE_OFFSET_END_DEFAULT = SPARSE_PRUNING_SCHEDULE_OFFSET_DEFAULT
|
|
|
|
SPARSE_PRUNING_DENSE_RATIO = "dense_ratio"
|
|
SPARSE_PRUNING_DENSE_RATIO_DEFAULT = 0.1
|
|
|
|
SPARSE_PRUNING_EXCLUDED_MODULES = "excluded_modules"
|
|
SPARSE_PRUNING_EXCLUDED_MODULES_DEFAULT = []
|
|
###
|
|
# Row Pruning
|
|
###
|
|
ROW_PRUNING = "row_pruning"
|
|
|
|
ROW_PRUNING_ENABLED = TECHNIQUE_ENABLED
|
|
ROW_PRUNING_ENABLED_DEFAULT = False
|
|
|
|
ROW_PRUNING_METHOD = "method"
|
|
ROW_PRUNING_METHOD_DEFAULT = "l1"
|
|
ROW_PRUNING_METHOD_L1 = "l1"
|
|
ROW_PRUNING_METHOD_TOPK = "topk"
|
|
|
|
ROW_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET
|
|
ROW_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000
|
|
|
|
ROW_PRUNING_DENSE_RATIO = "dense_ratio"
|
|
|
|
###
|
|
# Head Pruning
|
|
###
|
|
HEAD_PRUNING = "head_pruning"
|
|
|
|
HEAD_PRUNING_ENABLED = TECHNIQUE_ENABLED
|
|
HEAD_PRUNING_ENABLED_DEFAULT = False
|
|
|
|
HEAD_PRUNING_METHOD = "method"
|
|
HEAD_PRUNING_METHOD_DEFAULT = "topk"
|
|
HEAD_PRUNING_METHOD_L1 = "l1"
|
|
HEAD_PRUNING_METHOD_TOPK = "topk"
|
|
|
|
HEAD_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET
|
|
HEAD_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000
|
|
|
|
HEAD_PRUNING_NUM_HEADS = "num_heads"
|
|
|
|
HEAD_PRUNING_DENSE_RATIO = "dense_ratio"
|
|
|
|
###
|
|
# Channel Pruning
|
|
###
|
|
CHANNEL_PRUNING = "channel_pruning"
|
|
|
|
CHANNEL_PRUNING_ENABLED = TECHNIQUE_ENABLED
|
|
CHANNEL_PRUNING_ENABLED_DEFAULT = False
|
|
|
|
CHANNEL_PRUNING_METHOD = "method"
|
|
CHANNEL_PRUNING_METHOD_DEFAULT = "l1"
|
|
CHANNEL_PRUNING_METHOD_L1 = "l1"
|
|
CHANNEL_PRUNING_METHOD_TOPK = "topk"
|
|
|
|
CHANNEL_PRUNING_SCHEDULE_OFFSET = TECHNIQUE_SCHEDULE_OFFSET
|
|
CHANNEL_PRUNING_SCHEDULE_OFFSET_DEFAULT = 1000
|
|
|
|
CHANNEL_PRUNING_DENSE_RATIO = "dense_ratio"
|