mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-05 00:14:54 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58003 adds trainer class DdpTrainer adds trainer class DdpSparseRpcTrainer adds server class ParameterServerBase adds server class AverageParameterServer adds experiment ddp_cpu_sparse_rpc_nccl_allreduce adds experiment ddp_cuda_sparse_rpc_nccl_allreduce quip document https://fb.quip.com/iQUtAeKIxWpF Test Plan: Imported from OSS Reviewed By: albanD Differential Revision: D29379696 Pulled By: gcramer23 fbshipit-source-id: 9cf5fb7398ba2fa3eb694afbddc4ed00d97f205f
32 lines
733 B
Python
32 lines
733 B
Python
import subprocess
|
|
from os.path import join
|
|
from pathlib import Path
|
|
|
|
script_dir = join(
|
|
Path(__file__).parent, "experiment_scripts"
|
|
)
|
|
encoding = 'utf-8'
|
|
|
|
|
|
def run_script(script_name):
|
|
# runs the script and asserts that there are no errors
|
|
p = subprocess.run(
|
|
["bash", f"{join(script_dir,script_name)}"],
|
|
stdout=subprocess.PIPE,
|
|
stderr=subprocess.PIPE
|
|
)
|
|
error = p.stderr.decode(encoding)
|
|
assert not error
|
|
|
|
|
|
def test_ddp_nccl_allreduce():
|
|
run_script("ddp_nccl_allreduce.sh")
|
|
|
|
|
|
def test_ddp_cpu_sparse_rpc_nccl_allreduce():
|
|
run_script("ddp_cpu_sparse_rpc_nccl_allreduce.sh")
|
|
|
|
|
|
def test_ddp_cuda_sparse_rpc_nccl_allreduce():
|
|
run_script("ddp_cuda_sparse_rpc_nccl_allreduce.sh")
|