mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[c10d] Fix extra CUDA context created by barrier (#149144)
Fixes #149119. In ProcessGroup.hpp, we create a dummy tensor for dispatching. This requires a correct device index. This PR uses `device_id` given by user when calling `init_process_group`. This PR also uses `torch._C._get_accelerator()` to determine the device type. Pull Request resolved: https://github.com/pytorch/pytorch/pull/149144 Approved by: https://github.com/XilunWu, https://github.com/fduwjj, https://github.com/cyyever
This commit is contained in:
@ -3516,17 +3516,6 @@ class CommTest(test_c10d_common.AbstractCommTest, MultiProcessTestCase):
|
||||
|
||||
c10d.barrier(device_ids=[self.rank])
|
||||
|
||||
@requires_nccl()
|
||||
@skip_if_lt_x_gpu(2)
|
||||
def test_nccl_barrier_device_ids_function_argument(self):
|
||||
store = c10d.FileStore(self.file_name, self.world_size)
|
||||
c10d.init_process_group(
|
||||
backend="nccl", rank=self.rank, world_size=self.world_size, store=store
|
||||
)
|
||||
|
||||
with self.assertRaisesRegex(TypeError, "Invalid function argument"):
|
||||
c10d.barrier(device_ids=self.rank)
|
||||
|
||||
@requires_nccl()
|
||||
@skip_if_lt_x_gpu(2)
|
||||
def test_unwaited(self) -> None:
|
||||
|
Reference in New Issue
Block a user