mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Refactor multigpu tests to test_cuda_multigpu
(#104059)
Mostly refactor, that moves all the tests from `test_cuda` that benefit from multiGPU environment into its own file. - Add `TestCudaMallocAsync` class for Async tests ( to separate them from `TestCudaComm`) - Move individual tests from `TestCuda` to `TestCudaMultiGPU` - Move `_create_scaling_models_optimizers` and `_create_scaling_case` to `torch.testing._internal.common_cuda` - Add newly created `test_cuda_multigpu` to the multigpu periodic test <!-- copilot:summary --> ### <samp>🤖 Generated by Copilot at f4d46fa</samp> This pull request fixes a flaky test and improves the testing of gradient scaling on multiple GPUs. It adds verbose output for two CUDA tests, and refactors some common code into helper functions in `torch/testing/_internal/common_cuda.py`. Pull Request resolved: https://github.com/pytorch/pytorch/pull/104059 Approved by: https://github.com/huydhn
This commit is contained in:
committed by
PyTorch MergeBot
parent
572ff2779b
commit
c3e4a67905
@ -8,6 +8,7 @@
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common.sh"
|
||||
|
||||
echo "Testing pytorch"
|
||||
time python test/run_test.py --include test_cuda_multigpu test_cuda_primary_ctx --verbose
|
||||
|
||||
# Disabling tests to see if they solve timeout issues; see https://github.com/pytorch/pytorch/issues/70015
|
||||
# python tools/download_mnist.py --quiet -d test/cpp/api/mnist
|
||||
|
1742
test/test_cuda.py
1742
test/test_cuda.py
File diff suppressed because it is too large
Load Diff
1721
test/test_cuda_multigpu.py
Normal file
1721
test/test_cuda_multigpu.py
Normal file
File diff suppressed because it is too large
Load Diff
@ -199,6 +199,40 @@ def _check_hipsparse_generic_available():
|
||||
TEST_CUSPARSE_GENERIC = _check_cusparse_generic_available()
|
||||
TEST_HIPSPARSE_GENERIC = _check_hipsparse_generic_available()
|
||||
|
||||
# Shared by test_cuda.py and test_multigpu.py
|
||||
def _create_scaling_models_optimizers(device="cuda", optimizer_ctor=torch.optim.SGD, optimizer_kwargs=None):
|
||||
# Create a module+optimizer that will use scaling, and a control module+optimizer
|
||||
# that will not use scaling, against which the scaling-enabled module+optimizer can be compared.
|
||||
mod_control = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device)
|
||||
mod_scaling = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device)
|
||||
with torch.no_grad():
|
||||
for c, s in zip(mod_control.parameters(), mod_scaling.parameters()):
|
||||
s.copy_(c)
|
||||
|
||||
kwargs = {"lr": 1.0}
|
||||
if optimizer_kwargs is not None:
|
||||
kwargs.update(optimizer_kwargs)
|
||||
opt_control = optimizer_ctor(mod_control.parameters(), **kwargs)
|
||||
opt_scaling = optimizer_ctor(mod_scaling.parameters(), **kwargs)
|
||||
|
||||
return mod_control, mod_scaling, opt_control, opt_scaling
|
||||
|
||||
|
||||
def _create_scaling_case(device="cuda", dtype=torch.float, optimizer_ctor=torch.optim.SGD, optimizer_kwargs=None):
|
||||
data = [(torch.randn((8, 8), dtype=dtype, device=device), torch.randn((8, 8), dtype=dtype, device=device)),
|
||||
(torch.randn((8, 8), dtype=dtype, device=device), torch.randn((8, 8), dtype=dtype, device=device)),
|
||||
(torch.randn((8, 8), dtype=dtype, device=device), torch.randn((8, 8), dtype=dtype, device=device)),
|
||||
(torch.randn((8, 8), dtype=dtype, device=device), torch.randn((8, 8), dtype=dtype, device=device))]
|
||||
|
||||
loss_fn = torch.nn.MSELoss().cuda()
|
||||
|
||||
skip_iter = 2
|
||||
|
||||
return _create_scaling_models_optimizers(
|
||||
device=device, optimizer_ctor=optimizer_ctor, optimizer_kwargs=optimizer_kwargs,
|
||||
) + (data, loss_fn, skip_iter)
|
||||
|
||||
|
||||
# Importing this module should NOT eagerly initialize CUDA
|
||||
if not CUDA_ALREADY_INITIALIZED_ON_IMPORT:
|
||||
assert not torch.cuda.is_initialized()
|
||||
|
Reference in New Issue
Block a user