mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
This reverts commit a0bc865123dba047aa1507e281bf2462780cf271.
Reverted https://github.com/pytorch/pytorch/pull/158928 on behalf of https://github.com/clee2000 due to broke cpp static runtime test? [GH job link](https://github.com/pytorch/pytorch/actions/runs/16517697273/job/46715871457) [HUD commit link](a0bc865123
) ([comment](https://github.com/pytorch/pytorch/pull/158928#issuecomment-3118554478))
235 lines
6.7 KiB
C++
235 lines
6.7 KiB
C++
#include <gtest/gtest.h>
|
|
#include <torch/csrc/jit/tensorexpr/ir_simplifier.h>
|
|
#include <torch/csrc/jit/tensorexpr/llvm_codegen.h>
|
|
#include <torch/csrc/jit/tensorexpr/loopnest.h>
|
|
#include <torch/csrc/jit/tensorexpr/operators/conv2d.h>
|
|
#include <torch/csrc/jit/tensorexpr/tensor.h>
|
|
#include <torch/torch.h>
|
|
|
|
namespace torch {
|
|
namespace jit {
|
|
|
|
namespace te = torch::jit::tensorexpr;
|
|
namespace F = torch::nn::functional;
|
|
|
|
#ifdef TORCH_ENABLE_LLVM
|
|
|
|
// Generate test data with few bits of precision, to minimize error
|
|
// accumulation from floating-point reordering.
|
|
static at::Tensor genTestData(c10::IntArrayRef args) {
|
|
return at::trunc(at::randn(args) * 256.0f) / 256.0f;
|
|
}
|
|
|
|
TEST(Conv, DepthwiseConv2D) {
|
|
constexpr int N = 1, C = 72, H = 56, W = 56;
|
|
constexpr int K = 72, R = 3, S = 3;
|
|
constexpr int kPad = 1, kStride = 2, kGroups = C;
|
|
constexpr int CperG = C / kGroups;
|
|
|
|
te::BufHandle input("input", {N, C, H, W}, te::kFloat);
|
|
te::BufHandle weight("weight", {K, CperG, R, S}, te::kFloat);
|
|
te::BufHandle bias("bias", {K}, te::kFloat);
|
|
te::Tensor output =
|
|
te::conv2d_depthwise(input, weight, bias, kStride, kPad, kGroups);
|
|
|
|
te::LoopNest loop({output});
|
|
loop.simplify();
|
|
loop.prepareForCodegen();
|
|
te::LLVMCodeGen cg(loop.root_stmt(), {input, weight, bias, output});
|
|
|
|
auto it = genTestData({N, C, H, W});
|
|
auto wt = genTestData({K, CperG, R, S});
|
|
auto bt = genTestData({K});
|
|
auto ref = at::conv2d(it, wt, bt, kStride, kPad, /*dilation=*/1, kGroups);
|
|
auto ot = at::zeros_like(ref);
|
|
cg.call(
|
|
{it.data_ptr<float>(),
|
|
wt.data_ptr<float>(),
|
|
bt.data_ptr<float>(),
|
|
ot.data_ptr<float>()});
|
|
|
|
ASSERT_TRUE(at::allclose(ref, ot));
|
|
}
|
|
|
|
TEST(Conv, DepthwiseConv2DNoBias) {
|
|
constexpr int N = 1, C = 72, H = 56, W = 56;
|
|
constexpr int K = 72, R = 3, S = 3;
|
|
constexpr int kPad = 1, kStride = 2, kGroups = C;
|
|
constexpr int CperG = C / kGroups;
|
|
|
|
te::BufHandle input("input", {N, C, H, W}, te::kFloat);
|
|
te::BufHandle weight("weight", {K, CperG, R, S}, te::kFloat);
|
|
te::Tensor output =
|
|
te::conv2d_depthwise(input, weight, kStride, kPad, kGroups);
|
|
|
|
te::LoopNest loop({output});
|
|
loop.simplify();
|
|
loop.prepareForCodegen();
|
|
te::LLVMCodeGen cg(loop.root_stmt(), {input, weight, output});
|
|
|
|
auto it = genTestData({N, C, H, W});
|
|
auto wt = genTestData({K, CperG, R, S});
|
|
auto ref =
|
|
at::conv2d(it, wt, at::Tensor(), kStride, kPad, /*dilation=*/1, kGroups);
|
|
auto ot = at::zeros_like(ref);
|
|
cg.call({it.data_ptr<float>(), wt.data_ptr<float>(), ot.data_ptr<float>()});
|
|
|
|
ASSERT_TRUE(at::allclose(ref, ot));
|
|
}
|
|
|
|
TEST(Conv, DepthwiseConv2DDynamicShapes) {
|
|
te::VarHandle N_var("N", te::kInt);
|
|
te::VarHandle C_var("C", te::kInt);
|
|
te::VarHandle H_var("H", te::kInt);
|
|
te::VarHandle W_var("W", te::kInt);
|
|
te::VarHandle K_var("K", te::kInt);
|
|
te::VarHandle CperG_var("CperG", te::kInt);
|
|
te::VarHandle R_var("R", te::kInt);
|
|
te::VarHandle S_var("S", te::kInt);
|
|
te::VarHandle kPad_var("kPad", te::kInt);
|
|
te::VarHandle kStride_var("kStride", te::kInt);
|
|
te::VarHandle kGroups_var("kGroups", te::kInt);
|
|
|
|
te::BufHandle input("input", {N_var, C_var, H_var, W_var}, te::kFloat);
|
|
te::BufHandle weight("weight", {K_var, CperG_var, R_var, S_var}, te::kFloat);
|
|
te::Tensor output = te::conv2d_depthwise(
|
|
input,
|
|
weight,
|
|
N_var,
|
|
C_var,
|
|
H_var,
|
|
W_var,
|
|
K_var,
|
|
CperG_var,
|
|
R_var,
|
|
S_var,
|
|
kStride_var,
|
|
kPad_var,
|
|
kGroups_var);
|
|
|
|
te::LoopNest loop({output});
|
|
loop.simplify();
|
|
loop.prepareForCodegen();
|
|
std::vector<te::CodeGen::BufferArg> buffer_args = {
|
|
input,
|
|
weight,
|
|
N_var,
|
|
C_var,
|
|
H_var,
|
|
W_var,
|
|
K_var,
|
|
CperG_var,
|
|
R_var,
|
|
S_var,
|
|
kPad_var,
|
|
kStride_var,
|
|
kGroups_var,
|
|
output};
|
|
te::LLVMCodeGen cg(loop.root_stmt(), buffer_args);
|
|
|
|
constexpr int N = 1, C = 72, H = 56, W = 56;
|
|
constexpr int K = 72, R = 3, S = 3;
|
|
constexpr int kPad = 1, kStride = 2, kGroups = C;
|
|
constexpr int CperG = C / kGroups;
|
|
|
|
auto it = genTestData({N, C, H, W});
|
|
auto wt = genTestData({K, CperG, R, S});
|
|
auto ref =
|
|
at::conv2d(it, wt, at::Tensor(), kStride, kPad, /*dilation=*/1, kGroups);
|
|
auto ot = at::zeros_like(ref);
|
|
std::vector<te::CodeGen::CallArg> call_args = {
|
|
it.data_ptr<float>(),
|
|
wt.data_ptr<float>(),
|
|
N,
|
|
C,
|
|
H,
|
|
W,
|
|
K,
|
|
CperG,
|
|
R,
|
|
S,
|
|
kPad,
|
|
kStride,
|
|
kGroups,
|
|
ot.data_ptr<float>()};
|
|
cg.call(call_args);
|
|
|
|
ASSERT_TRUE(at::allclose(ref, ot));
|
|
}
|
|
|
|
#endif
|
|
|
|
TEST(Conv, Conv2D) {
|
|
// Input dimensions.
|
|
constexpr int N = 1;
|
|
constexpr int C = 3;
|
|
constexpr int H = 11;
|
|
constexpr int W = 11;
|
|
|
|
// Filter dimensions.
|
|
constexpr int K = 8;
|
|
constexpr int R = 3;
|
|
constexpr int S = 3;
|
|
|
|
// Output dims.
|
|
constexpr int OH = H - R + 1;
|
|
constexpr int OW = W - S + 1;
|
|
|
|
// Compute reference result.
|
|
at::Tensor input = torch::randn({N, C, H, W});
|
|
at::Tensor filter = torch::randn({K, C, R, S});
|
|
at::Tensor ref = F::conv2d(input, filter);
|
|
|
|
// Double check the output size is as expected.
|
|
ASSERT_EQ(ref.size(0), N);
|
|
ASSERT_EQ(ref.size(1), K);
|
|
ASSERT_EQ(ref.size(2), OH);
|
|
ASSERT_EQ(ref.size(3), OW);
|
|
|
|
te::BufHandle inputB("input", {N, C, H, W}, te::kFloat);
|
|
te::BufHandle filterB("filter", {K, C, R, S}, te::kFloat);
|
|
|
|
te::Tensor conv = te::Reduce(
|
|
"conv",
|
|
{N, K, OH, OW},
|
|
te::Sum(),
|
|
// FIXME: We have to use a `std::vector` parameter here and then unpack
|
|
// it, because we don't have an overload allowing for an arbitrary number
|
|
// of ExprHandle/VarHandle parameters.
|
|
[&](const std::vector<te::VarHandle>& v) {
|
|
auto const& n = v[0];
|
|
auto const& k = v[1];
|
|
auto const& oh = v[2];
|
|
auto const& ow = v[3];
|
|
auto const& c = v[4];
|
|
auto const& r = v[5];
|
|
auto const& s = v[6];
|
|
// FIXME: We have to use `call` and construct a `std::vector` here
|
|
// because the `operator()` overload is only specialized for a small
|
|
// number of arguments.
|
|
return inputB.load(n, c, oh + r, ow + s) * filterB.load(k, c, r, s);
|
|
},
|
|
// FIXME: If you forget one of the reduction dims, you get a segfault.
|
|
// Could that be caught by a verifier?
|
|
{C, R, S});
|
|
|
|
// FIXME: It'd be nice to have a single header that pulls in things like
|
|
// LoopNest, IRSimplifier, etc.
|
|
te::LoopNest loop({conv});
|
|
loop.prepareForCodegen();
|
|
te::StmtPtr s = loop.root_stmt();
|
|
s = te::IRSimplifier::simplify(s);
|
|
|
|
at::Tensor result = at::empty_like(ref);
|
|
te::SimpleIREvaluator cg(s, {inputB, filterB, conv});
|
|
cg.call(
|
|
{input.data_ptr<float>(),
|
|
filter.data_ptr<float>(),
|
|
result.data_ptr<float>()});
|
|
|
|
ASSERT_TRUE(at::allclose(ref, result, 1e-3, 1e-3));
|
|
}
|
|
|
|
} // namespace jit
|
|
} // namespace torch
|