mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Revert "Optimize multi_tensor_apply (take 2) (#119764)"
This reverts commit 0b68a28c87df2c6eb2cf530be4659b5a2f8a95b0.
Reverted https://github.com/pytorch/pytorch/pull/119764 on behalf of https://github.com/huydhn due to Sorry for reverting your change but it is failing ROCm job in trunk 0b68a28c87
. Please help take a look and reland the change ([comment](https://github.com/pytorch/pytorch/pull/119764#issuecomment-2014190124))
This commit is contained in:
@ -649,26 +649,14 @@ class TestForeach(TestCase):
|
||||
|
||||
@onlyCUDA
|
||||
@ops(foreach_reduce_op_db, allowed_dtypes=floating_types())
|
||||
@parametrize("use_cuda_graph", (False, True))
|
||||
def test_big_num_tensors(self, device, dtype, op, use_cuda_graph):
|
||||
def test_big_num_tensors(self, device, dtype, op):
|
||||
N = 600
|
||||
tensorlist = [make_tensor((2, 3), dtype=dtype, device=device, noncontiguous=False) for _ in range(N)]
|
||||
fn, ref_fn, *_ = self._get_funcs(op)
|
||||
|
||||
import math
|
||||
for ord in (1, 2, math.inf):
|
||||
if not use_cuda_graph:
|
||||
actual = fn(inputs=[tensorlist], is_cuda=True, expect_fastpath=True, ord=ord, zero_size=False)
|
||||
else:
|
||||
# When using CUDA graphs and the tensor metadata doesn't fit in
|
||||
# the static kernel argument space, multi_tensor_apply creates
|
||||
# the launch arguments once, uses cudaUserObject_t to tie its
|
||||
# lifetime to the graph, and reuses it throughout replays. This
|
||||
# test verifies multi_tensor_apply's behavior in the scenario.
|
||||
g = torch.cuda.CUDAGraph()
|
||||
with torch.cuda.graph(g):
|
||||
actual = fn.func(tensorlist, ord=ord)
|
||||
g.replay()
|
||||
actual = fn(inputs=[tensorlist], is_cuda=True, expect_fastpath=True, ord=ord, zero_size=False)
|
||||
expect = ref_fn(inputs=[tensorlist], ord=ord)
|
||||
|
||||
self.assertEqual(expect, actual, equal_nan=True)
|
||||
|
Reference in New Issue
Block a user