mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
[BE][6/6] fix typos in test/ (test/distributed/) (#157640)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/157640 Approved by: https://github.com/yewentao256, https://github.com/malfet
This commit is contained in:
committed by
PyTorch MergeBot
parent
4283d96bcd
commit
0d17029fea
@ -1169,7 +1169,6 @@ exclude_patterns = [
|
||||
'aten/src/ATen/[a-mA-M]*/**',
|
||||
'test/**',
|
||||
'test/[a-hA-h]*/**',
|
||||
'test/distributed/**',
|
||||
'torch/_*/**',
|
||||
'torch/distributed/tensor/**',
|
||||
]
|
||||
|
@ -69,7 +69,7 @@ class ReplicateStateDictTest(MultiProcessTestCase):
|
||||
|
||||
def test_replicate_non_root_multiple_save_load(self):
|
||||
"""
|
||||
Tests tha replicate() on multiple submodules matches
|
||||
Tests the replicate() on multiple submodules matches
|
||||
local module state_dict.
|
||||
"""
|
||||
self._init_pg()
|
||||
|
@ -1733,7 +1733,7 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
|
||||
self.assertEqual(remote_device_before.rank(), remote_device_after.rank())
|
||||
self.assertEqual(str(remote_device_after.device()), "cpu")
|
||||
|
||||
# ensure metdata also get changed to CPU
|
||||
# ensure metadata also get changed to CPU
|
||||
metas = new_st.metadata().shards_metadata
|
||||
for meta in metas:
|
||||
self.assertEqual(str(meta.placement.device()), "cpu")
|
||||
@ -1764,7 +1764,7 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
|
||||
self.assertEqual(remote_device_before.rank(), remote_device_after.rank())
|
||||
self.assertEqual(str(remote_device_after.device()), "cpu")
|
||||
|
||||
# ensure metdata also get changed to CPU
|
||||
# ensure metadata also get changed to CPU
|
||||
metas = new_st.metadata().shards_metadata
|
||||
for meta in metas:
|
||||
self.assertEqual(str(meta.placement.device()), "cpu")
|
||||
@ -1820,7 +1820,7 @@ class TestShardedTensorEnumerable(ShardedTensorTestBase):
|
||||
self.assertEqual(str(remote_device_before.device().type), "cpu")
|
||||
self.assertEqual(str(remote_device_after.device().type), "cuda")
|
||||
|
||||
# ensure metdata also get changed to GPU
|
||||
# ensure metadata also get changed to GPU
|
||||
metas = new_st_gpu.metadata().shards_metadata
|
||||
for meta in metas:
|
||||
self.assertEqual(str(meta.placement.device().type), "cuda")
|
||||
|
@ -129,7 +129,7 @@ class TestTrackerFullyShard1DTrainingCore(FSDPTest):
|
||||
@skip_if_lt_x_gpu(2)
|
||||
def test_tracker_non_root_forward_backward(self):
|
||||
"""
|
||||
Tests tracker accracy when running forward/backward through a non-root.
|
||||
Tests tracker accuracy when running forward/backward through a non-root.
|
||||
"""
|
||||
debug = False
|
||||
dev = torch.device(torch.cuda.current_device())
|
||||
|
@ -211,7 +211,7 @@ class TestSACILP(TestCase):
|
||||
|
||||
|
||||
class TestOptimalCheckpointingPolicy(TestCase):
|
||||
# tests are adpated from tests in xformers
|
||||
# tests are adapted from tests in xformers
|
||||
# https://github.com/facebookresearch/xformers/blob/c6c0ac31f1b08542a0bc27278c6ed10f825f6963/tests/test_checkpoint.py#L222
|
||||
def setUp(self):
|
||||
super().setUp()
|
||||
|
@ -72,7 +72,7 @@ class TestFSDPWithEP(DTensorTestBase, VerifyStateDictMixin):
|
||||
mesh_fsdp_tp = init_device_mesh(
|
||||
self.device_type, (2, 4), mesh_dim_names=("dp", "tp")
|
||||
)
|
||||
# TODO: we are using an internal API atm. Change to a publich API once it is ready.
|
||||
# TODO: we are using an internal API atm. Change to a public API once it is ready.
|
||||
mesh_fsdp_ep = _mesh_resources.create_child_mesh(mesh_fsdp_tp, ("dp",))
|
||||
del _mesh_resources.child_to_parent_mapping[mesh_fsdp_ep]
|
||||
|
||||
|
@ -109,7 +109,7 @@ class TestStateDict(DTensorTestBase, VerifyStateDictMixin):
|
||||
for d_optim in _dist_optim:
|
||||
d_optim.step()
|
||||
|
||||
# We need to ensure gradients don't exist, this the invarient of using DSD.
|
||||
# We need to ensure gradients don't exist, this the invariant of using DSD.
|
||||
optim.zero_grad()
|
||||
|
||||
# Get the state_dict, and compare the result
|
||||
@ -135,7 +135,7 @@ class TestStateDict(DTensorTestBase, VerifyStateDictMixin):
|
||||
# We won't be able to load the partial state_dict back.
|
||||
return
|
||||
# Since we already have the state_dict saved before, no need to call DCP.
|
||||
# We can directly load them back. This asser is to ensure that optimizer
|
||||
# We can directly load them back. This assert is to ensure that optimizer
|
||||
# state storage are initialized.
|
||||
# self.assertEqual(len(curr_dist_osd[STATE]), len(dist_osd[STATE]))
|
||||
set_model_state_dict(
|
||||
|
@ -140,7 +140,17 @@ class RendezvousParametersTest(TestCase):
|
||||
self.assertFalse(params.get_as_bool("dummy_param"))
|
||||
|
||||
def test_get_as_bool_raises_error_if_value_is_invalid(self) -> None:
|
||||
for value in ["01", "Flse", "Ture", "g", "4", "_", "truefalse", 2, -1]:
|
||||
for value in [
|
||||
"01",
|
||||
"Flse", # codespell:ignore
|
||||
"Ture", # codespell:ignore
|
||||
"g",
|
||||
"4",
|
||||
"_",
|
||||
"truefalse",
|
||||
2,
|
||||
-1,
|
||||
]:
|
||||
with self.subTest(value=value):
|
||||
self._kwargs["dummy_param"] = value
|
||||
|
||||
|
@ -71,9 +71,9 @@ class WorkerServerTest(TestCase):
|
||||
self.assertEqual(resp.status, 200)
|
||||
self.assertIn("ping", json.loads(resp.data))
|
||||
|
||||
resp = pool.request("POST", "/handler/nonexistant")
|
||||
resp = pool.request("POST", "/handler/nonexistent")
|
||||
self.assertEqual(resp.status, 404)
|
||||
self.assertIn(b"Handler nonexistant not found:", resp.data)
|
||||
self.assertIn(b"Handler nonexistent not found:", resp.data)
|
||||
|
||||
@requires_cuda
|
||||
def test_dump_nccl_trace_pickle(self) -> None:
|
||||
@ -207,8 +207,8 @@ class WorkerServerTest(TestCase):
|
||||
def test_get_handler_nonexistant(self) -> None:
|
||||
from torch._C._distributed_c10d import _get_handler
|
||||
|
||||
with self.assertRaisesRegex(ValueError, "Failed to find handler nonexistant"):
|
||||
_get_handler("nonexistant")
|
||||
with self.assertRaisesRegex(ValueError, "Failed to find handler nonexistent"):
|
||||
_get_handler("nonexistent")
|
||||
|
||||
def test_get_handler_names(self) -> None:
|
||||
from torch._C._distributed_c10d import _get_handler_names
|
||||
|
@ -158,7 +158,7 @@ class TestFSDPMemory(FSDPTest):
|
||||
output = cmp(results, expected)
|
||||
self.assertEqual(output, "")
|
||||
|
||||
@unittest.skipIf(TEST_HPU, "Memory will be differnt for CUDA and HPU, skipping")
|
||||
@unittest.skipIf(TEST_HPU, "Memory will be different for CUDA and HPU, skipping")
|
||||
@skip_if_lt_x_gpu(2)
|
||||
@parametrize("ckpt", ["no_ckpt", "ckpt"])
|
||||
def test_fsdp_memory(self, ckpt):
|
||||
|
@ -45,7 +45,7 @@ class ScheduleVShaped(PipelineScheduleMulti):
|
||||
)
|
||||
|
||||
# Go through one microbatch
|
||||
# Note(whc) - it might be easier to work with thes schedules by writing them as a list of
|
||||
# Note(whc) - it might be easier to work with this schedules by writing them as a list of
|
||||
# ["0F0", ...] and then parsing them in the test infra to turn them into actions.
|
||||
self.pipeline_order = {
|
||||
0: [
|
||||
|
@ -22,7 +22,7 @@ from torch.testing._internal.distributed.rpc_utils import (
|
||||
|
||||
|
||||
# On CircleCI these tests are already run on CPU jobs, thus to save resources do
|
||||
# not run them on GPU jobs, since thet wouldn't provide additional test signal.
|
||||
# not run them on GPU jobs, since they wouldn't provide additional test signal.
|
||||
if not (IS_CI and torch.cuda.is_available()):
|
||||
globals().update(
|
||||
generate_tests(
|
||||
|
@ -23,7 +23,7 @@ from torch.testing._internal.distributed.rpc_utils import (
|
||||
|
||||
|
||||
# On CircleCI these tests are already run on CPU jobs, thus to save resources do
|
||||
# not run them on GPU jobs, since thet wouldn't provide additional test signal.
|
||||
# not run them on GPU jobs, since they wouldn't provide additional test signal.
|
||||
if not (IS_CI and torch.cuda.is_available()):
|
||||
globals().update(
|
||||
generate_tests(
|
||||
|
@ -90,7 +90,7 @@ class TestLocalMap(DTensorTestBase):
|
||||
) # row-wisely sharded W tensor
|
||||
|
||||
# Test 1: use the function returned from calling local_map
|
||||
# get the function wrapped with DTensor/Tensor convertion
|
||||
# get the function wrapped with DTensor/Tensor conversion
|
||||
# mm_allreduce_forward is a function that applies to Tensors with manual collective
|
||||
# local_mm_allreduce_forward is the function that does the same but applies to
|
||||
# DTensors' `_local_tensor`.
|
||||
|
@ -85,7 +85,7 @@ class TensorParallelTest(DTensorTestBase):
|
||||
with torch.no_grad():
|
||||
tp_res = tp_model(*inputs)
|
||||
self.assertEqual(res, tp_res)
|
||||
# Expect all_gather to be inserted to distributed sharded fc resutls
|
||||
# Expect all_gather to be inserted to distributed sharded fc results
|
||||
self.assert_has_c10d_ops(
|
||||
tp_exported_program.graph_module,
|
||||
{
|
||||
|
@ -438,7 +438,7 @@ class DTensorTest(DTensorTestBase):
|
||||
self.assertEqual(type(out_view), AsyncCollectiveTensor)
|
||||
self.assertFalse(out.completed)
|
||||
|
||||
# Use the daa, requiring a sync
|
||||
# Use the data, requiring a sync
|
||||
ref = torch.ones((4, 2), device=self.device_type) + 1
|
||||
ref = ref.view(-1)
|
||||
out_data = out_view + 1
|
||||
|
@ -220,7 +220,7 @@ def forward(self, b_parametrizations_buffer_original0, x):
|
||||
group1 = x.get_group(mesh_dim=1)
|
||||
return size, coord, group0, group1
|
||||
|
||||
# Cant be fullgraph=True because ProcessGroup is not reconstructible in dynamo
|
||||
# Can't be fullgraph=True because ProcessGroup is not reconstructible in dynamo
|
||||
compiled_fn = torch.compile(backend="aot_eager")(fn)
|
||||
|
||||
mesh = DeviceMesh(self.device_type, torch.arange(self.world_size).unsqueeze(1))
|
||||
|
@ -193,7 +193,7 @@ class TestEmbeddingOp(DTensorTestBase):
|
||||
|
||||
from torch.distributed.tensor._ops._embedding_ops import _MaskPartial
|
||||
|
||||
# case 1: two embeddings with the same shape, thus sharing the underying _MaskPartial
|
||||
# case 1: two embeddings with the same shape, thus sharing the underlying _MaskPartial
|
||||
# and MaskBuffer, because of cache hit from sharding propagation
|
||||
|
||||
emb1 = torch.nn.Embedding(10, 23, device=self.device_type)
|
||||
|
@ -65,7 +65,7 @@ class TestEinsumDims(TestCase):
|
||||
self.assertEqual(edims.lhs_out_only_dims, ["c"])
|
||||
self.assertEqual(edims.rhs_out_only_dims, [])
|
||||
|
||||
equation = "abd,bf->abfd"
|
||||
equation = "abd,bf->abfd" # codespell:ignore
|
||||
input_dims, output_dim = EinsumDims.parse_equation(equation)
|
||||
edims = EinsumDims.parse_dims(input_dims, output_dim)
|
||||
|
||||
|
@ -635,7 +635,7 @@ class MultiDimRedistributeTest(DTensorTestBase):
|
||||
dt = distribute_tensor(full_tensor, device_mesh, repl_inputs)
|
||||
|
||||
if repl_inputs != inputs:
|
||||
# create a new DTensor reinterpreting some of the replicated entires as "Partial"
|
||||
# create a new DTensor reinterpreting some of the replicated entries as "Partial"
|
||||
dt = DTensor.from_local(
|
||||
dt.to_local(), device_mesh, inputs, run_check=False
|
||||
)
|
||||
|
@ -150,7 +150,7 @@ class DTensorXLAIntegrationTest(TestCase):
|
||||
|
||||
def shard_params(mod_name, mod, mesh):
|
||||
shard_spec = [Shard(0)]
|
||||
# annoate fc1 and fc2
|
||||
# annotate fc1 and fc2
|
||||
if isinstance(mod, nn.Linear):
|
||||
for _, param in mod.named_parameters():
|
||||
# annotate the parameter tensors directly
|
||||
|
@ -601,7 +601,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
|
||||
|
||||
def _helper_test_extra_cuda_context_by_nvml(self):
|
||||
"""
|
||||
A helper for `test_extra_cuda_context`, if pynvml is avaiable.
|
||||
A helper for `test_extra_cuda_context`, if pynvml is available.
|
||||
pynvml provides python bindings for NVIDIA NVML functionalities.
|
||||
Here we are interested in: nvmlDeviceGetComputeRunningProcesses
|
||||
"""
|
||||
@ -634,7 +634,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
|
||||
|
||||
def _helper_test_extra_cuda_context_by_memory(self):
|
||||
"""
|
||||
A helper for `test_extra_cuda_context`, if pynvml is NOT avaiable.
|
||||
A helper for `test_extra_cuda_context`, if pynvml is NOT available.
|
||||
If extra context is created, it would manifest into device 0's memory usage.
|
||||
"""
|
||||
device = torch.device(f"cuda:{self.rank:d}")
|
||||
@ -1112,7 +1112,7 @@ class ProcessGroupNCCLGroupTest(MultiProcessTestCase):
|
||||
os.environ["TORCH_NCCL_NONBLOCKING_TIMEOUT"] = "100"
|
||||
store = c10d.FileStore(self.file_name, self.world_size)
|
||||
device = torch.device(f"cuda:{self.rank}")
|
||||
# bound device to triger eager init mode
|
||||
# bound device to trigger eager init mode
|
||||
pg = self._create_process_group_nccl(store, self.opts(), device_id=device)
|
||||
backend = pg._get_backend(torch.device(device))
|
||||
self.assertEqual(backend.comm_split_count(), 0)
|
||||
@ -2995,7 +2995,7 @@ class NcclErrorHandlingTest(MultiProcessTestCase):
|
||||
time.sleep(4)
|
||||
self.assertEqual(process_group.get_error(), ErrorType.REMOTE_ERROR)
|
||||
|
||||
# Mimicing all ranks sensing the timeout, abort
|
||||
# Mimicking all ranks sensing the timeout, abort
|
||||
process_group.abort()
|
||||
|
||||
if prev_nccl_async_error_handling is not None:
|
||||
@ -4291,7 +4291,7 @@ class NCCLTraceTestBase(MultiProcessTestCase):
|
||||
|
||||
def _join_processes(self, fn):
|
||||
# We need to patch sys.exit() as skip_if will use sys.exit() and
|
||||
# the exit code from the this process will not be catched.
|
||||
# the exit code from the this process will not be caught.
|
||||
with mock.patch("sys.exit"):
|
||||
fn()
|
||||
super()._join_processes(fn)
|
||||
|
@ -1814,7 +1814,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):
|
||||
|
||||
Note: comptime prints the guards before the time they get installed or not installed, so in both cases
|
||||
(skip or no skip) the same guards get printed. The difference is that in the skip case, they show up
|
||||
with a special 'guard source' which will cuase them to not be installed. So all we check for is the expected
|
||||
with a special 'guard source' which will cause them to not be installed. So all we check for is the expected
|
||||
guard source 'local_fsdp_module'.
|
||||
"""
|
||||
global GUARDS_FILE
|
||||
@ -1871,7 +1871,7 @@ class TestSingleProc(DynamoDistributedSingleProcTestCase):
|
||||
|
||||
def test_fsdp_skip_register_attr_or_module(self):
|
||||
"""
|
||||
ensure FSDP module is not registered as attrbutes
|
||||
ensure FSDP module is not registered as attributes
|
||||
in the fx graph
|
||||
see `not source.guard_source().is_fsdp_module()`
|
||||
before calling `register_attr_or_module`
|
||||
|
@ -824,7 +824,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
out = compiled(inputs, **self.get_world_trs())
|
||||
code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
|
||||
# NOTE: Make sure we are not unneccessarily copying the outputs of
|
||||
# NOTE: Make sure we are not unnecessarily copying the outputs of
|
||||
# wait_tensors before they are returned from the graph.
|
||||
(
|
||||
FileCheck()
|
||||
@ -891,7 +891,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
|
||||
# NOTE: Make sure we are not unneccessarily copying the outputs of
|
||||
# NOTE: Make sure we are not unnecessarily copying the outputs of
|
||||
# wait_tensors before they are returned from the graph.
|
||||
(
|
||||
FileCheck()
|
||||
@ -1356,7 +1356,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
|
||||
# NOTE: Make sure we are not unneccessarily copying the outputs of
|
||||
# NOTE: Make sure we are not unnecessarily copying the outputs of
|
||||
# wait_tensors before they are returned from the graph.
|
||||
(
|
||||
FileCheck()
|
||||
@ -1403,7 +1403,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
|
||||
# NOTE: The first return value should be the output of the first wait_tensor.
|
||||
# We want to make sure no unneccessary copy is made.
|
||||
# We want to make sure no unnecessary copy is made.
|
||||
(
|
||||
FileCheck()
|
||||
.check("buf0 = empty_strided")
|
||||
@ -1474,7 +1474,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
|
||||
# NOTE: The first return value should be the output of the first wait_tensor.
|
||||
# We want to make sure no unneccessary copy is made.
|
||||
# We want to make sure no unnecessary copy is made.
|
||||
(
|
||||
FileCheck()
|
||||
.check("buf0 = empty_strided")
|
||||
@ -1548,7 +1548,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, *inputs, **self.get_world_trs())
|
||||
# NOTE: The first return value should be the output of the first wait_tensor.
|
||||
# We want to make sure no unneccessary copy is made.
|
||||
# We want to make sure no unnecessary copy is made.
|
||||
(FileCheck().check("all_gather_into_tensor_out").run(code))
|
||||
out = compiled(*inputs, **self.get_world_trs())
|
||||
correct = func(*inputs, **self.get_world_trs())
|
||||
@ -1598,7 +1598,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, *inputs, **self.get_world_trs())
|
||||
# NOTE: The first return value should be the output of the first wait_tensor.
|
||||
# We want to make sure no unneccessary copy is made.
|
||||
# We want to make sure no unnecessary copy is made.
|
||||
(
|
||||
FileCheck()
|
||||
.check_count(
|
||||
@ -1689,7 +1689,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, *inputs, **self.get_world_trs())
|
||||
# NOTE: The first return value should be the output of the first wait_tensor.
|
||||
# We want to make sure no unneccessary copy is made.
|
||||
# We want to make sure no unnecessary copy is made.
|
||||
(
|
||||
FileCheck()
|
||||
.check_count(
|
||||
@ -1785,7 +1785,7 @@ class TestCollectivesInductor(DynamoDistributedSingleProcTestCase):
|
||||
compiled = torch.compile(func)
|
||||
code = run_and_get_triton_code(compiled, inputs, **self.get_world_trs())
|
||||
# NOTE: The first return value should be the output of the first wait_tensor.
|
||||
# We want to make sure no unneccessary copy is made.
|
||||
# We want to make sure no unnecessary copy is made.
|
||||
(
|
||||
FileCheck()
|
||||
.check("all_gather")
|
||||
|
@ -232,7 +232,7 @@ class NVSHMEMSymmetricMemoryTest(MultiProcContinousTest):
|
||||
)
|
||||
out = symm_mem.empty(max_out_numel, dtype=dtype, device=self.device).fill_(-1)
|
||||
# 3 rows: input splits, output splits, output offsets
|
||||
# Initiallizing all values to -1 to check if they are updated
|
||||
# Initializing all values to -1 to check if they are updated
|
||||
in_out_splits = symm_mem.empty(
|
||||
(3, nsplits), dtype=torch.int64, device=self.device
|
||||
).fill_(-1)
|
||||
|
@ -376,7 +376,7 @@ if not TEST_WITH_DEV_DBG_ASAN:
|
||||
):
|
||||
self._create_wrapper_pg(with_new_group=True)
|
||||
# nothing to assert, isinstance(pg, _ProcessGroupWrapper)
|
||||
# should never be invoked since it is preceeded by
|
||||
# should never be invoked since it is proceeded by
|
||||
# _GLOO_AVAILABLE check, this test will fail on
|
||||
# an unexpected NameError if not.
|
||||
|
||||
|
@ -837,9 +837,9 @@ class RendezvousTCPTest(TestCase):
|
||||
# not respected, it will take much longer to timeout.
|
||||
start = time.time()
|
||||
with self.assertRaisesRegex(
|
||||
DistStoreError, "wait timeout after 100ms, keys: /nonexistant key"
|
||||
DistStoreError, "wait timeout after 100ms, keys: /nonexistent key"
|
||||
):
|
||||
store0.get("nonexistant key")
|
||||
store0.get("nonexistent key")
|
||||
|
||||
end = time.time()
|
||||
time_diff = end - start
|
||||
@ -1066,7 +1066,7 @@ class TimeoutTest(TestCase):
|
||||
wait_for_workers=False,
|
||||
)
|
||||
|
||||
ths = []
|
||||
threads = []
|
||||
for i in range(2):
|
||||
t = threading.Thread(
|
||||
target=run,
|
||||
@ -1076,16 +1076,16 @@ class TimeoutTest(TestCase):
|
||||
),
|
||||
)
|
||||
t.start()
|
||||
ths.append(t)
|
||||
threads.append(t)
|
||||
|
||||
def handler(a, b):
|
||||
pass
|
||||
|
||||
signal.signal(signal.SIGUSR1, handler)
|
||||
time.sleep(1)
|
||||
signal.pthread_kill(ths[1].ident, signal.SIGUSR1)
|
||||
signal.pthread_kill(threads[1].ident, signal.SIGUSR1)
|
||||
|
||||
for t in ths:
|
||||
for t in threads:
|
||||
t.join()
|
||||
self.assertTrue(rank_res[0], "rank0")
|
||||
self.assertTrue(rank_res[1], "rank1")
|
||||
|
@ -664,7 +664,7 @@ class SymmMemNegativeTest(MultiProcessTestCase):
|
||||
|
||||
# These timeout tests are skipped on ROCm because timeout calls trap(), which
|
||||
# is handled differently inside hip runtime. It collects gpu coredump and causes
|
||||
# the linux kernel to create a core dump of the host application. The funcitonality
|
||||
# the linux kernel to create a core dump of the host application. The functionality
|
||||
# is there, meaning timeout is happening correctly. However, there isn't a nice way
|
||||
# to test it as the current executing thread will coredump and exit.
|
||||
@skipIfRocm
|
||||
@ -690,7 +690,7 @@ class SymmMemNegativeTest(MultiProcessTestCase):
|
||||
|
||||
# These timeout tests are skipped on ROCm because timeout calls trap(), which
|
||||
# is handled differently inside hip runtime. It collects gpu coredump and causes
|
||||
# the linux kernel to create a core dump of the host application. The funcitonality
|
||||
# the linux kernel to create a core dump of the host application. The functionality
|
||||
# is there, meaning timeout is happening correctly. However, there isn't a nice way
|
||||
# to test it as the current executing thread will coredump and exit.
|
||||
@skipIfRocm
|
||||
@ -719,7 +719,7 @@ class SymmMemNegativeTest(MultiProcessTestCase):
|
||||
|
||||
# These timeout tests are skipped on ROCm because timeout calls trap(), which
|
||||
# is handled differently inside hip runtime. It collects gpu coredump and causes
|
||||
# the linux kernel to create a core dump of the host application. The funcitonality
|
||||
# the linux kernel to create a core dump of the host application. The functionality
|
||||
# is there, meaning timeout is happening correctly. However, there isn't a nice way
|
||||
# to test it as the current executing thread will coredump and exit.
|
||||
@skipIfRocm
|
||||
|
Reference in New Issue
Block a user