mirror of
https://github.com/deepspeedai/DeepSpeed.git
synced 2025-10-20 23:53:48 +08:00
This is the Deepspeed counterpart of https://github.com/snowflakedb/ArcticTraining/pull/45 - as the new feature(s) require changes on both sides. For PR reviewers: Readiness status: - [x] Code - [x] Tests - [ ] Docs - working on it Features: - [x] add support for delaying grad addition via `param.ds_grad_is_ready` flag (used when performing tiled compute in an autograd function) - [x] add light sp-only mpu version (Jeff Rasley) - [x] improved debug - [x] added `all_gather_object` to `dist` - [x] `UlyssesSPAttentionHF` (port of UlyssesAttention from Megatron-Deepspeed plus modern MHA-variations) - [x] `UlyssesSPDataLoaderAdapter` - DL adapter to shard the normal DL batches to be used by `UlyssesSPAttentionHF` - [x] `SequenceTiledCompute` - generic autograd function to perform compute after tiling on the sequence dimension - [x] `TiledMLP` - a specific autograd function to perform tiled MLP (it's much easier to understand before trying to grok `SequenceTiledCompute`) - [x] added a differentiable `_DimZeroAllToAll` (Samyam Rajbhandari) - [x] torch-dist-check now allows `torch.distributed.nn` (which is needed since deepspeed's dist is not up to date with `torch.distributed.nn`) --------- Signed-off-by: Stas Bekman <stas.bekman@snowflake.com> Signed-off-by: Stas Bekman <stas@stason.org> Co-authored-by: Stas Bekman <stas.bekman@snowflake.com> Co-authored-by: Jeff Rasley <jerasley@microsoft.com> Co-authored-by: Olatunji Ruwase <tunji.ruwase@snowflake.com>
42 lines
1.3 KiB
Python
Executable File
42 lines
1.3 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# Copyright (c) Microsoft Corporation.
|
|
# SPDX-License-Identifier: Apache-2.0
|
|
|
|
# DeepSpeed Team
|
|
|
|
from __future__ import annotations
|
|
'''Copyright The Microsoft DeepSpeed Team'''
|
|
"""
|
|
Checks each file in sys.argv for the string "torch.distributed".
|
|
Modified from https://github.com/jlebar/pre-commit-hooks/blob/master/check_do_not_submit.py
|
|
"""
|
|
|
|
import subprocess
|
|
import sys
|
|
|
|
|
|
def err(s: str) -> None:
|
|
print(s, file=sys.stderr)
|
|
|
|
|
|
# There are many ways we could search for the string "torch.distributed", but `git
|
|
# grep --no-index` is nice because
|
|
# - it's very fast (as compared to iterating over the file in Python)
|
|
# - we can reasonably assume it's available on all machines
|
|
# - unlike plain grep, which is slower and has different flags on MacOS versus
|
|
# Linux, git grep is always the same.
|
|
# allowing `torch.distributed.nn`
|
|
res = subprocess.run(
|
|
["git", "grep", "-Hn", "--no-index", "-P", r"torch\.distributed |torch\.distributed(?!\.nn)", *sys.argv[1:]],
|
|
capture_output=True,
|
|
)
|
|
if res.returncode == 0:
|
|
err('Error: The string "torch.distributed" was found. Please replace all calls to torch.distributed with "deepspeed.comm"'
|
|
)
|
|
err(res.stdout.decode("utf-8"))
|
|
sys.exit(1)
|
|
elif res.returncode == 2:
|
|
err(f"Error invoking grep on {', '.join(sys.argv[1:])}:")
|
|
err(res.stderr.decode("utf-8"))
|
|
sys.exit(2)
|