Files
DeepSpeed/deepspeed/compile/profilers/__init__.py
Masahiro Tanaka 227a60c0c4 DeepCompile for enhanced compiler integration (#7154)
This PR introduces *DeepCompile*, a new feature that efficiently
integrates compiler optimizations with other DeepSpeed features.
DeepCompile utilizes torch's dynamo to capture the computation graph and
modifies it to incorporate DeepSpeed’s optimizations seamlessly.

Currently, DeepCompile supports ZeRO-1 and ZeRO-3, with enhancements
such as proactive prefetching and selective unsharding to improve
performance.
(More details will be added later.)

---------

Signed-off-by: Masahiro Tanaka <mtanaka@microsoft.com>
Signed-off-by: Olatunji Ruwase <olruwase@microsoft.com>
Co-authored-by: zafarsadiq <zafarsadiq120@gmail.com>
Co-authored-by: Logan Adams <114770087+loadams@users.noreply.github.com>
Co-authored-by: Olatunji Ruwase <olruwase@microsoft.com>
2025-04-16 04:33:53 +00:00

24 lines
957 B
Python

# Copyright (c) Microsoft Corporation.
# SPDX-License-Identifier: Apache-2.0
# DeepSpeed Team
from typing import List, Tuple
from dataclasses import dataclass, field
from torch.fx import Graph
@dataclass
class ProfilingResult:
fwd_graph: Graph = None
bwd_graph: Graph = None
needs_backward: bool = False
fwd_mem: List[Tuple[str, int, int, int]] = field(default_factory=list) # name, current_alloc, delta, peak
bwd_mem: List[Tuple[str, int, int, int]] = field(default_factory=list)
fwd_time: List[Tuple[str, int, int]] = field(default_factory=list) # name, device_time, wall_time
bwd_time: List[Tuple[str, int, int]] = field(default_factory=list)
fwd_tensor_sizes: List[Tuple[str, int]] = field(default_factory=list) # name, size
bwd_tensor_sizes: List[Tuple[str, int]] = field(default_factory=list)
param_indices: List[Tuple[int, int, Tuple[int, ...]]] = field(default_factory=list) # index, ds_id, ds_shape