[Mem Snapshot] Add Metadata Field (#165490)

Summary:
The implementation adds the ability to:

Set custom metadata strings that will be attached to all subsequent allocations
Clear or change the metadata at any point
View the metadata in memory snapshots via _dump_snapshot()

Test Plan: Added test in test_cuda.py and check manually in snapshot to see that metadata was added.

Differential Revision: D84654933

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165490
Approved by: https://github.com/yushangdi
This commit is contained in:
Shivam Raikundalia
2025-10-17 23:46:02 +00:00
committed by PyTorch MergeBot
parent 69c33898fa
commit a25a649e70
7 changed files with 103 additions and 3 deletions

View File

@ -4378,6 +4378,28 @@ class TestCudaMallocAsync(TestCase):
finally:
torch.cuda.memory._record_memory_history(None)
@unittest.skipIf(
TEST_CUDAMALLOCASYNC, "setContextRecorder not supported by CUDAMallocAsync"
)
@requiresCppContext
def test_memory_plots_metadata(self):
for context in ["alloc", "all", "state"]:
try:
torch._C._cuda_clearCublasWorkspaces()
torch.cuda.memory.empty_cache()
torch.cuda.memory._set_memory_metadata("metadata test")
torch.cuda.memory._record_memory_history(context="all")
x = torch.rand(3, 4, device="cuda")
del x
torch.cuda.memory.empty_cache()
torch.cuda.memory._set_memory_metadata("")
ss = torch.cuda.memory._snapshot()
for event in ss["device_traces"][0]:
self.assertTrue(event["user_metadata"] == "metadata test")
finally:
torch.cuda.memory._record_memory_history(None)
@unittest.skipIf(
TEST_CUDAMALLOCASYNC, "setContextRecorder not supported by CUDAMallocAsync"
)