Use type-erased union for Buffer. (#322)

Summary:
Pull Request resolved: https://github.com/pytorch/tensorpipe/pull/322

Pull Request resolved: https://github.com/pytorch/pytorch/pull/54145

In order to merge the channel hierarchies, we need a generic `Buffer` type, that can wrap either a `CpuBuffer` or a `CudaBuffer`.
The constraints are that, since this type is used by the channels, it cannot explicitly refer to `CudaBuffer`. We propose here a type-erasure based solution, with small-buffer optimization to avoid heap-allocating the wrapped concrete buffer.
ghstack-source-id: 124131499

Test Plan: CI

Reviewed By: lw

Differential Revision: D27001339

fbshipit-source-id: 26d7dc19d69d7e3336df6fd4ff6ec118dc17c5b6
This commit is contained in:
Lucas Hosseini
2021-03-18 02:19:11 -07:00
committed by Facebook GitHub Bot
parent 133000fe7a
commit c618dc13d2
2 changed files with 41 additions and 23 deletions

View File

@ -1,5 +1,6 @@
#include <gtest/gtest.h>
#include <tensorpipe/common/cpu_buffer.h>
#include <tensorpipe/core/message.h>
#include <torch/csrc/distributed/rpc/tensorpipe_utils.h>
#include <torch/torch.h>
@ -42,7 +43,8 @@ TEST(TensorpipeSerialize, Base) {
recvingTpMessage.tensors.reserve(sendingTpMessage.tensors.size());
for (auto& tpTensor : sendingTpMessage.tensors) {
tensorpipe::Message::Tensor t;
t.buffer = tensorpipe::CpuBuffer{nullptr, tpTensor.buffer.cpu.length};
t.buffer = tensorpipe::CpuBuffer{
nullptr, tpTensor.buffer.unwrap<tensorpipe::CpuBuffer>().length};
t.metadata = tpTensor.metadata;
recvingTpMessage.tensors.push_back(std::move(t));
}
@ -68,9 +70,9 @@ TEST(TensorpipeSerialize, Base) {
tensorpipe::Message::Tensor& srcTensor = sendingTpMessage.tensors[i];
tensorpipe::Message::Tensor& dstTensor = recvingTpMessage.tensors[i];
memcpy(
dstTensor.buffer.cpu.ptr,
srcTensor.buffer.cpu.ptr,
srcTensor.buffer.cpu.length);
dstTensor.buffer.unwrap<tensorpipe::CpuBuffer>().ptr,
srcTensor.buffer.unwrap<tensorpipe::CpuBuffer>().ptr,
srcTensor.buffer.unwrap<tensorpipe::CpuBuffer>().length);
}
// Mimic read() callback:
@ -113,10 +115,17 @@ TEST(TensorpipeSerialize, RecopySparseTensors) {
EXPECT_TRUE(torch::equal(main, tpBuffers.tensors[0]));
EXPECT_TRUE(torch::equal(tiny, tpBuffers.tensors[1]));
// Test cloned storage
EXPECT_EQ(main.storage().data(), sendingTpMessage.tensors[0].buffer.cpu.ptr);
EXPECT_NE(tiny.storage().data(), sendingTpMessage.tensors[1].buffer.cpu.ptr);
EXPECT_EQ(
tiny.element_size() * k1K, sendingTpMessage.tensors[1].buffer.cpu.length);
main.storage().data(),
sendingTpMessage.tensors[0].buffer.unwrap<tensorpipe::CpuBuffer>().ptr);
EXPECT_NE(
tiny.storage().data(),
sendingTpMessage.tensors[1].buffer.unwrap<tensorpipe::CpuBuffer>().ptr);
EXPECT_EQ(
tiny.element_size() * k1K,
sendingTpMessage.tensors[1]
.buffer.unwrap<tensorpipe::CpuBuffer>()
.length);
}
TEST(TensorpipeSerialize, NoDeleterTensors) {
@ -141,24 +150,32 @@ TEST(TensorpipeSerialize, NoDeleterTensors) {
EXPECT_EQ(sendingTpMessage.tensors.size(), 2);
EXPECT_EQ(
tpBuffers.copiedTensors[0].size(),
sendingTpMessage.tensors[0].buffer.cpu.length);
sendingTpMessage.tensors[0]
.buffer.unwrap<tensorpipe::CpuBuffer>()
.length);
EXPECT_EQ(
tpBuffers.copiedTensors[1].size(),
sendingTpMessage.tensors[1].buffer.cpu.length);
sendingTpMessage.tensors[1]
.buffer.unwrap<tensorpipe::CpuBuffer>()
.length);
EXPECT_EQ(
tpBuffers.copiedTensors[0].data(),
sendingTpMessage.tensors[0].buffer.cpu.ptr);
sendingTpMessage.tensors[0].buffer.unwrap<tensorpipe::CpuBuffer>().ptr);
EXPECT_EQ(
tpBuffers.copiedTensors[1].data(),
sendingTpMessage.tensors[1].buffer.cpu.ptr);
sendingTpMessage.tensors[1].buffer.unwrap<tensorpipe::CpuBuffer>().ptr);
EXPECT_TRUE(
memcmp(
tpBuffers.copiedTensors[0].data(),
t1.storage().data(),
sendingTpMessage.tensors[0].buffer.cpu.length) == 0);
sendingTpMessage.tensors[0]
.buffer.unwrap<tensorpipe::CpuBuffer>()
.length) == 0);
EXPECT_TRUE(
memcmp(
tpBuffers.copiedTensors[1].data(),
t2.storage().data(),
sendingTpMessage.tensors[1].buffer.cpu.length) == 0);
sendingTpMessage.tensors[1]
.buffer.unwrap<tensorpipe::CpuBuffer>()
.length) == 0);
}