mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ROCm] HIP Lazy Streams (#119996)
For ROCm/HIP, each stream is lazily initialized rather than creating all streams when the first stream is requested. HIP streams are not as lightweight as CUDA streams; the pooling strategy can affect performance. Pull Request resolved: https://github.com/pytorch/pytorch/pull/119996 Approved by: https://github.com/ezyang
This commit is contained in:
committed by
PyTorch MergeBot
parent
26fbbc3e84
commit
d3839b624b
@ -74,7 +74,14 @@ class TestCudaTrace(TestCase):
|
||||
def test_stream_creation_callback(self):
|
||||
cuda_trace.register_callback_for_cuda_stream_creation(self.mock)
|
||||
|
||||
torch.cuda.Stream()
|
||||
# see Note [HIP Lazy Streams]
|
||||
if torch.version.hip:
|
||||
user_stream = torch.cuda.Stream()
|
||||
with torch.cuda.stream(user_stream):
|
||||
tensor = torch.ones(5, device="cuda")
|
||||
else:
|
||||
torch.cuda.Stream()
|
||||
|
||||
self.mock.assert_called()
|
||||
|
||||
def test_device_synchronization_callback(self):
|
||||
|
Reference in New Issue
Block a user