mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[MPS] Chunk fillBuffer into 4Gb slices (#164108)
To avoid regression on MacOS 26, which one could observe by running the following script ```swift import Metal let bufferSize = 1<<32 + 4 guard let device = MTLCreateSystemDefaultDevice() else { fatalError("No Metal device found") } guard let buffer = device.makeBuffer(length: bufferSize, options: .storageModeShared) else { fatalError("Failed to create buffer") } guard let cmdQueue = device.makeCommandQueue() else { fatalError("Failed to create command queue") } guard let cmdBuffer = cmdQueue.makeCommandBuffer() else { fatalError("Failed to create command buffer") } guard let blitEncoder = cmdBuffer.makeBlitCommandEncoder() else { fatalError("Failed to create blit encoder") } blitEncoder.fill(buffer: buffer, range: 0..<bufferSize, value: 0x42) blitEncoder.endEncoding() cmdBuffer.commit() cmdBuffer.waitUntilCompleted() let tailOffs = 8 let hostPtr = buffer.contents().bindMemory(to: UInt8.self, capacity: bufferSize) let tail = Array(UnsafeBufferPointer(start: hostPtr + (bufferSize - tailOffs), count: tailOffs)) for (idx, val) in tail.enumerated() { print("Offs 0x\(String(bufferSize - tailOffs + idx, radix: 16)): 0x\(String(val, radix: 16))") } ``` Test plan: run `test_indexing.py` on MacOS-26 Fixes https://github.com/pytorch/pytorch/issues/161265 Pull Request resolved: https://github.com/pytorch/pytorch/pull/164108 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
9e792f583a
commit
6db1b9dd21
@ -158,7 +158,18 @@ void MPSStream::fill(id<MTLBuffer> buffer, uint8_t value, size_t length, size_t
|
||||
endKernelCoalescing();
|
||||
id<MTLBlitCommandEncoder> blitEncoder = [commandBuffer() blitCommandEncoder];
|
||||
|
||||
[blitEncoder fillBuffer:buffer range:NSMakeRange(offset, length) value:value];
|
||||
// For some reason fillBufferfor stopped working for lengh > 4Gb on MacOS 26
|
||||
// See https://github.com/pytorch/pytorch/issues/163962
|
||||
// Workaround by batching copy commands into 4Gb chunks
|
||||
constexpr size_t max_copy_size = 0x100000000; // 4GB
|
||||
size_t bytes_filled = 0;
|
||||
size_t bytes_remains = length;
|
||||
while (bytes_remains > 0) {
|
||||
NSUInteger bytes_to_copy = std::min(max_copy_size, bytes_remains);
|
||||
[blitEncoder fillBuffer:buffer range:NSMakeRange(offset + bytes_filled, bytes_to_copy) value:value];
|
||||
bytes_filled += bytes_to_copy;
|
||||
bytes_remains -= bytes_to_copy;
|
||||
}
|
||||
[blitEncoder endEncoding];
|
||||
synchronize(syncType);
|
||||
}
|
||||
|
Reference in New Issue
Block a user