add missing license info to top of all source code (#2889)

Co-authored-by: Michael Wyatt <michaelwyatt@microsoft.com> Co-authored-by: Conglong Li <conglong.li@gmail.com> Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com>
2025-11-11 16:50:33 +08:00 · 2023-02-27 11:20:41 -08:00
parent 8710f0514e
commit da84e60d98
300 changed files with 658 additions and 5 deletions
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@ -42,6 +42,14 @@ repos:
        exclude: ^(deepspeed/comm/|docs/|benchmarks/|scripts/check-torchdist.py|deepspeed/moe/sharded_moe.py|deepspeed/runtime/comm/coalesced_collectives.py|deepspeed/elasticity/elastic_agent.py|deepspeed/launcher/launch.py|tests/unit/comm/test_dist.py)
        # Specific deepspeed/ files are excluded for now until we wrap ProcessGroup in deepspeed.comm

+-   repo: local
+    hooks:
+    -   id: check-license
+        name: check-license
+        entry: ./scripts/check-license.py
+        language: script
+        files: \.(py|cc|cu|h|cuh|hip)$
+
 -   repo: https://github.com/codespell-project/codespell
    rev: v2.1.0
    hooks:
--- a/accelerator/init.py
+++ b/accelerator/init.py
@ -1,2 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .abstract_accelerator import DeepSpeedAccelerator
 from .real_accelerator import get_accelerator, set_accelerator
--- a/accelerator/abstract_accelerator.py
+++ b/accelerator/abstract_accelerator.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import abc
 from abc import ABC

--- a/accelerator/cuda_accelerator.py
+++ b/accelerator/cuda_accelerator.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 import pkgutil
 import importlib
--- a/accelerator/real_accelerator.py
+++ b/accelerator/real_accelerator.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 try:
    from accelerator.abstract_accelerator import DeepSpeedAccelerator as dsa1
 except ImportError as e:
--- a/benchmarks/init.py
+++ b/benchmarks/init.py
@ -0,0 +1 @@
+'''Copyright The Microsoft DeepSpeed Team'''
--- a/benchmarks/communication/init.py
+++ b/benchmarks/communication/init.py
@ -0,0 +1 @@
+'''Copyright The Microsoft DeepSpeed Team'''
--- a/benchmarks/communication/all_gather.py
+++ b/benchmarks/communication/all_gather.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from benchmarks.communication.utils import *
 from benchmarks.communication.constants import *

--- a/benchmarks/communication/all_reduce.py
+++ b/benchmarks/communication/all_reduce.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from benchmarks.communication.utils import *
 from benchmarks.communication.constants import *

--- a/benchmarks/communication/all_to_all.py
+++ b/benchmarks/communication/all_to_all.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from benchmarks.communication.utils import *
 from benchmarks.communication.constants import *

--- a/benchmarks/communication/broadcast.py
+++ b/benchmarks/communication/broadcast.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 from benchmarks.communication.utils import *
 from benchmarks.communication.constants import *
--- a/benchmarks/communication/constants.py
+++ b/benchmarks/communication/constants.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 DEFAULT_WARMUPS = 5
 DEFAULT_TRIALS = 50
 DEFAULT_TYPE = 'float'
--- a/benchmarks/communication/pt2pt.py
+++ b/benchmarks/communication/pt2pt.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from benchmarks.communication.utils import *
 from benchmarks.communication.constants import *

--- a/benchmarks/communication/run_all.py
+++ b/benchmarks/communication/run_all.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from benchmarks.communication.utils import *
 from benchmarks.communication.all_reduce import run_all_reduce
 from benchmarks.communication.all_gather import run_all_gather
--- a/benchmarks/communication/utils.py
+++ b/benchmarks/communication/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 import os
 import math
--- a/benchmarks/inference/bert-bench.py
+++ b/benchmarks/inference/bert-bench.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 import time
 import deepspeed
--- a/benchmarks/inference/collect_results.py
+++ b/benchmarks/inference/collect_results.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 import re
 import argparse
--- a/benchmarks/inference/gpt-bench.py
+++ b/benchmarks/inference/gpt-bench.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 import torch
 import time
--- a/csrc/aio/py_test/perf_sweep_utils.py
+++ b/csrc/aio/py_test/perf_sweep_utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 SCRIPT_PREFIX = '_aio_bench'
 WRITE_OP_DESC = 'write'
 READ_OP_DESC = 'read'
--- a/csrc/common/custom_cuda_kernel.cu
+++ b/csrc/common/custom_cuda_kernel.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "custom_cuda_layers.h"

 __global__ void param_update_kernel(const float* input, __half* output, int size)
--- a/csrc/includes/StopWatch.h
+++ b/csrc/includes/StopWatch.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once
 #ifdef _WIN32
 #include <windows.h>
--- a/csrc/includes/Timer.h
+++ b/csrc/includes/Timer.h
@ -1,3 +1,6 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/

 #ifndef __TIMER_H__
 #define __TIMER_H__
--- a/csrc/includes/context.h
+++ b/csrc/includes/context.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <ATen/cuda/CUDAContext.h>
--- a/csrc/includes/cpu_adagrad.h
+++ b/csrc/includes/cpu_adagrad.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #define NOMINMAX  // Windows idiosyncrasy
--- a/csrc/includes/cpu_adam.h
+++ b/csrc/includes/cpu_adam.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #define NOMINMAX  // Windows idiosyncrasy
--- a/csrc/includes/cublas_wrappers.h
+++ b/csrc/includes/cublas_wrappers.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <assert.h>
--- a/csrc/includes/dropout.h
+++ b/csrc/includes/dropout.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cuda.h>
--- a/csrc/includes/ds_transformer_cuda.h
+++ b/csrc/includes/ds_transformer_cuda.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cuda_runtime_api.h>
--- a/csrc/includes/feed_forward.h
+++ b/csrc/includes/feed_forward.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #ifndef __FEEDFORWARD_H__
 #define __FEEDFORWARD_H__

--- a/csrc/includes/gelu.h
+++ b/csrc/includes/gelu.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cuda.h>
--- a/csrc/includes/gemm_test.h
+++ b/csrc/includes/gemm_test.h
@ -1,3 +1,6 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/

 #pragma once

--- a/csrc/includes/general_kernels.h
+++ b/csrc/includes/general_kernels.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include <cuda.h>
 #include <cuda_fp16.h>
 #include <stdio.h>
--- a/csrc/includes/normalize_layer.h
+++ b/csrc/includes/normalize_layer.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cuda.h>
--- a/csrc/includes/quantization.h
+++ b/csrc/includes/quantization.h
@ -1,3 +1,6 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/

 #pragma once

--- a/csrc/includes/quantizer.h
+++ b/csrc/includes/quantizer.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cooperative_groups.h>
--- a/csrc/includes/simd.h
+++ b/csrc/includes/simd.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #if (__x86_64__ || __i386__)
--- a/csrc/includes/softmax.h
+++ b/csrc/includes/softmax.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cuda.h>
--- a/csrc/includes/strided_batch_gemm.h
+++ b/csrc/includes/strided_batch_gemm.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #pragma once

 #include <cuda.h>
--- a/csrc/includes/type_shim.h
+++ b/csrc/includes/type_shim.h
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 /* Taken from NVIDIA/apex commit 855808f3fc268e9715d613f3c2e56469d8c986d8 */
 #include <ATen/ATen.h>

--- a/csrc/quantization/fake_quantizer.cu
+++ b/csrc/quantization/fake_quantizer.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include <math.h>
 #include "custom_cuda_layers.h"
 #include "memory_access_utils.h"
--- a/csrc/transformer/cublas_wrappers.cu
+++ b/csrc/transformer/cublas_wrappers.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "cublas_wrappers.h"

 #ifdef __HIP_PLATFORM_HCC__
--- a/csrc/transformer/dropout_kernels.cu
+++ b/csrc/transformer/dropout_kernels.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "custom_cuda_layers.h"

 const int unroll_factor = 4;
--- a/csrc/transformer/gelu_kernels.cu
+++ b/csrc/transformer/gelu_kernels.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "custom_cuda_layers.h"

 inline __device__ float gelu(const float x)
--- a/csrc/transformer/general_kernels.cu
+++ b/csrc/transformer/general_kernels.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "general_kernels.h"

 namespace cg = cooperative_groups;
--- a/csrc/transformer/normalize_kernels.cu
+++ b/csrc/transformer/normalize_kernels.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "custom_cuda_layers.h"

 namespace cg = cooperative_groups;
--- a/csrc/transformer/softmax_kernels.cu
+++ b/csrc/transformer/softmax_kernels.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include <math.h>
 #include "custom_cuda_layers.h"
 #include "general_kernels.h"
--- a/csrc/transformer/transform_kernels.cu
+++ b/csrc/transformer/transform_kernels.cu
@ -1,3 +1,7 @@
+/*
+Copyright The Microsoft DeepSpeed Team
+*/
+
 #include "custom_cuda_layers.h"

 #define rows_trans 16
--- a/deepspeed/autotuning/init.py
+++ b/deepspeed/autotuning/init.py
@ -1 +1,3 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .autotuner import Autotuner
--- a/deepspeed/autotuning/autotuner.py
+++ b/deepspeed/autotuning/autotuner.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import shutil
 import subprocess
 import time
--- a/deepspeed/autotuning/config.py
+++ b/deepspeed/autotuning/config.py
@ -1,3 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
 """
 Copyright (c) Microsoft Corporation
 Licensed under the MIT license.
--- a/deepspeed/autotuning/constants.py
+++ b/deepspeed/autotuning/constants.py
@ -1,3 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
 """
 Copyright (c) Microsoft Corporation
 Licensed under the MIT license.
--- a/deepspeed/autotuning/scheduler.py
+++ b/deepspeed/autotuning/scheduler.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import copy

 from numpy import BUFSIZE
--- a/deepspeed/autotuning/tuner/init.py
+++ b/deepspeed/autotuning/tuner/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .index_based_tuner import RandomTuner, GridSearchTuner
 # from .ga_tuner import GATuner
 from .model_based_tuner import ModelBasedTuner
--- a/deepspeed/autotuning/tuner/base_tuner.py
+++ b/deepspeed/autotuning/tuner/base_tuner.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import sys

 from deepspeed.autotuning.constants import *
--- a/deepspeed/autotuning/tuner/cost_model.py
+++ b/deepspeed/autotuning/tuner/cost_model.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .utils import *

 try:
--- a/deepspeed/autotuning/tuner/index_based_tuner.py
+++ b/deepspeed/autotuning/tuner/index_based_tuner.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import random

 from .base_tuner import BaseTuner
--- a/deepspeed/autotuning/tuner/model_based_tuner.py
+++ b/deepspeed/autotuning/tuner/model_based_tuner.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import hjson

 from ..constants import AUTOTUNING, AUTOTUNING_METRIC_PATH
--- a/deepspeed/autotuning/tuner/utils.py
+++ b/deepspeed/autotuning/tuner/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import numpy as np
 import itertools
 from ..utils import *
--- a/deepspeed/autotuning/utils.py
+++ b/deepspeed/autotuning/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import re
 import collections.abc
 import os
--- a/deepspeed/checkpoint/init.py
+++ b/deepspeed/checkpoint/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .reshape_meg_2d import reshape_meg_2d_parallel

 from .deepspeed_checkpoint import DeepSpeedCheckpoint
--- a/deepspeed/checkpoint/constants.py
+++ b/deepspeed/checkpoint/constants.py
@ -1,3 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
 '''
    Various symbolic constants used for model checkpointing
 '''
--- a/deepspeed/checkpoint/deepspeed_checkpoint.py
+++ b/deepspeed/checkpoint/deepspeed_checkpoint.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 from typing import Dict
 import torch
--- a/deepspeed/checkpoint/reshape_3d_utils.py
+++ b/deepspeed/checkpoint/reshape_3d_utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .reshape_utils import (get_files,
                            get_files_with_prefix,
                            partition_data,
--- a/deepspeed/checkpoint/reshape_meg_2d.py
+++ b/deepspeed/checkpoint/reshape_meg_2d.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .reshape_utils import partition_data


--- a/deepspeed/checkpoint/reshape_utils.py
+++ b/deepspeed/checkpoint/reshape_utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 import torch
 from collections import OrderedDict
--- a/deepspeed/checkpoint/utils.py
+++ b/deepspeed/checkpoint/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 from .constants import (MODEL_FILE_PREFIX,
                        MODEL_FILE_SUFFIX,
--- a/deepspeed/checkpoint/zero_checkpoint.py
+++ b/deepspeed/checkpoint/zero_checkpoint.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch

 from .constants import (BASE_OPTIMIZER_STATE,
--- a/deepspeed/comm/init.py
+++ b/deepspeed/comm/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 from .utils import *
 from deepspeed import utils
--- a/deepspeed/comm/backend.py
+++ b/deepspeed/comm/backend.py
@ -1,3 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
 ''' DeepSpeed Communication Backend.

 In the future, directly use NCCL/MPI/Gloo/etc without requiring torch.distributed. Simply wrap torch.distributed for now.
--- a/deepspeed/comm/config.py
+++ b/deepspeed/comm/config.py
@ -1,3 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
 """
 Copyright (c) Microsoft Corporation
 Licensed under the MIT license.
--- a/deepspeed/comm/constants.py
+++ b/deepspeed/comm/constants.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 NCCL_BACKEND = 'nccl'
 MPI_BACKEND = 'mpi'
 GLOO_BACKEND = 'gloo'
--- a/deepspeed/comm/utils.py
+++ b/deepspeed/comm/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 import torch
 import inspect
--- a/deepspeed/compression/init.py
+++ b/deepspeed/compression/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .compress import init_compression, redundancy_clean
 from .scheduler import compression_scheduler
 from .helper import convert_conv1d_to_linear
--- a/deepspeed/compression/basic_layer.py
+++ b/deepspeed/compression/basic_layer.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 import math
 from torch import nn
--- a/deepspeed/compression/compress.py
+++ b/deepspeed/compression/compress.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import re
 from .helper import compression_preparation, fix_compression, recursive_getattr, is_module_compressible
 from .config import get_compression_config
--- a/deepspeed/compression/config.py
+++ b/deepspeed/compression/config.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .constants import *
 import copy
 from ..runtime.config_utils import get_scalar_param
--- a/deepspeed/compression/constants.py
+++ b/deepspeed/compression/constants.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 #########################################
 # Compression Methods
 # It has several sub-components
--- a/deepspeed/compression/helper.py
+++ b/deepspeed/compression/helper.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 from .basic_layer import Embedding_Compress, LinearLayer_Compress, Conv2dLayer_Compress, BNLayer_Compress, ColumnParallelLinear_Compress, RowParallelLinear_Compress
 from .constants import *
--- a/deepspeed/compression/scheduler.py
+++ b/deepspeed/compression/scheduler.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .compress import get_module_name
 from .constants import *
 from .helper import recursive_getattr
--- a/deepspeed/compression/utils.py
+++ b/deepspeed/compression/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 from torch import autograd
 import math
--- a/deepspeed/elasticity/init.py
+++ b/deepspeed/elasticity/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .elasticity import compute_elastic_config, elasticity_enabled, ensure_immutable_elastic_config
 from .utils import is_torch_elastic_compatible
 from .constants import ENABLED, ENABLED_DEFAULT, ELASTICITY
--- a/deepspeed/elasticity/elastic_agent.py
+++ b/deepspeed/elasticity/elastic_agent.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from torch.distributed.elastic.agent.server.local_elastic_agent import LocalElasticAgent
 from typing import Any, Dict, Optional, Tuple
 from datetime import datetime
--- a/deepspeed/elasticity/utils.py
+++ b/deepspeed/elasticity/utils.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch


--- a/deepspeed/env_report.py
+++ b/deepspeed/env_report.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 import deepspeed
 import subprocess
--- a/deepspeed/git_version_info.py
+++ b/deepspeed/git_version_info.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 try:
    #  This is populated by setup.py
    from .git_version_info_installed import *  # noqa: F401
--- a/deepspeed/inference/init.py
+++ b/deepspeed/inference/init.py
@ -1 +1,3 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .engine import InferenceEngine
--- a/deepspeed/inference/config.py
+++ b/deepspeed/inference/config.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch
 from deepspeed.runtime.config_utils import DeepSpeedConfigModel
 from deepspeed.runtime.zero.config import DeepSpeedZeroConfig
--- a/deepspeed/launcher/init.py
+++ b/deepspeed/launcher/init.py
@ -0,0 +1 @@
+'''Copyright The Microsoft DeepSpeed Team'''
--- a/deepspeed/launcher/multinode_runner.py
+++ b/deepspeed/launcher/multinode_runner.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import os
 import sys
 import shutil
--- a/deepspeed/model_implementations/init.py
+++ b/deepspeed/model_implementations/init.py
@ -1,2 +1,4 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .transformers.ds_transformer import DeepSpeedTransformerInference
 from .transformers.clip_encoder import DSClipEncoder
--- a/deepspeed/model_implementations/diffusers/init.py
+++ b/deepspeed/model_implementations/diffusers/init.py
@ -0,0 +1 @@
+'''Copyright The Microsoft DeepSpeed Team'''
--- a/deepspeed/model_implementations/transformers/init.py
+++ b/deepspeed/model_implementations/transformers/init.py
@ -0,0 +1 @@
+'''Copyright The Microsoft DeepSpeed Team'''
--- a/deepspeed/model_implementations/transformers/ds_base.py
+++ b/deepspeed/model_implementations/transformers/ds_base.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 import torch.nn as nn


--- a/deepspeed/module_inject/init.py
+++ b/deepspeed/module_inject/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .replace_module import replace_transformer_layer, revert_transformer_layer, ReplaceWithTensorSlicing, GroupQuantizer, generic_injection
 from .module_quantize import quantize_transformer_layer
 from .replace_policy import HFBertLayerPolicy
--- a/deepspeed/module_inject/auto_tp.py
+++ b/deepspeed/module_inject/auto_tp.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 # Automatic Tensor Parallelism
 import re

--- a/deepspeed/module_inject/containers/init.py
+++ b/deepspeed/module_inject/containers/init.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .bert import DS_BERTContainer, HFBertLayerPolicy
 from .bloom import DS_BloomContainer, BLOOMLayerPolicy, supported_models
 from .distil_bert import DS_DistilBERTContainer, HFDistilBertLayerPolicy
--- a/deepspeed/module_inject/containers/base.py
+++ b/deepspeed/module_inject/containers/base.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 # Create a container object to save model-specific tensors using the policy file above.
 from abc import ABC
 import torch
--- a/deepspeed/module_inject/containers/base_moe.py
+++ b/deepspeed/module_inject/containers/base_moe.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 # Create a container object to save model-specific tensors using the policy file above.
 from .base import *
 import torch
--- a/deepspeed/module_inject/containers/bert.py
+++ b/deepspeed/module_inject/containers/bert.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .base import *
 from deepspeed.model_implementations.transformers.ds_bert import DeepSpeedBERTInference
 import torch
--- a/deepspeed/module_inject/containers/bloom.py
+++ b/deepspeed/module_inject/containers/bloom.py
@ -1,3 +1,5 @@
+'''Copyright The Microsoft DeepSpeed Team'''
+
 from .base import *
 from .features.meta_tensor import MetaTensorContainer
 from deepspeed.model_implementations.transformers.ds_bloom import DeepSpeedBloomInference
--- a/Show More
+++ b/Show More
				`@ -0,0 +1 @@`
				`'''Copyright The Microsoft DeepSpeed Team'''`