Integrate XNNPACK with custom class for packing weights. (#34047)

Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/34047

This PR integrates the added xnnpack conv2d and linear op via
custom class registration for packed weights. The packed struct
is serializable.

Test Plan:
python test test/test_xnnpack_integration.py

Imported from OSS

Differential Revision: D20185657

fbshipit-source-id: fc7e692d8f913e493b293b02d92f4e78536d7698
This commit is contained in:
Kimish Patel
2020-03-14 12:48:24 -07:00
committed by Facebook GitHub Bot
parent e23a9dc140
commit 4c30fc7238
17 changed files with 1029 additions and 300 deletions

View File

@ -141,6 +141,14 @@ const std::vector<at::QEngine>& Context::supportedQEngines() const {
return supported_qengines;
}
bool Context::isXNNPACKAvailable() const {
#ifdef USE_XNNPACK
return true;
#else
return false;
#endif
}
bool Context::setFlushDenormal(bool on) {
return at::cpu::set_flush_denormal(on);
}

View File

@ -109,6 +109,7 @@ class CAFFE2_API Context {
at::QEngine qEngine() const;
void setQEngine(at::QEngine e);
const std::vector<at::QEngine>& supportedQEngines() const;
bool isXNNPACKAvailable() const;
private:
void initCUDAIfNeeded(DeviceType p) {

View File

@ -778,10 +778,6 @@
- func: conv2d(Tensor input, Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1) -> Tensor
- func: _conv2d_prepack(Tensor weight, Tensor? bias=None, int[2] stride=1, int[2] padding=0, int[2] dilation=1, int groups=1, float? output_min=None, float? output_max=None) -> Tensor
- func: _conv2d_packed(Tensor packed_weight, Tensor input) -> Tensor
- func: conv3d(Tensor input, Tensor weight, Tensor? bias=None, int[3] stride=1, int[3] padding=0, int[3] dilation=1, int groups=1) -> Tensor
- func: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
@ -1577,10 +1573,6 @@
- func: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
python_module: nn
- func: _linear_prepack(Tensor weight, Tensor? bias=None, float? output_min=None, float? output_max=None) -> Tensor
- func: _linear_packed(Tensor packed_weight, Tensor input) -> Tensor
- func: mkldnn_linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
python_module: nn
dispatch:

View File

@ -9,6 +9,53 @@
namespace at {
namespace native {
namespace xnnpack {
struct Deleter final {
void operator()(const xnn_operator_t op) const {
xnn_delete_operator(op);
}
};
using Operator = std::unique_ptr<xnn_operator, Deleter>;
struct ContextLinear final {
Operator op;
int64_t output_channels;
ContextLinear() = delete;
ContextLinear(Operator&& o, int64_t o_channels) {
op = std::move(o);
output_channels = o_channels;
}
static constexpr float kMin = -std::numeric_limits<float>::infinity();
static constexpr float kMax = std::numeric_limits<float>::infinity();
};
struct ContextConv2D final {
Operator op;
std::array<int64_t, 4> weight_size_;
std::array<int64_t, 2> padding_;
std::array<int64_t, 2> stride_;
std::array<int64_t, 2> dilation_;
ContextConv2D() = delete;
ContextConv2D(
Operator&& o,
std::array<int64_t, 4> weight_size,
std::array<int64_t, 2> padding,
std::array<int64_t, 2> stride,
std::array<int64_t, 2> dilation)
: op(std::move(o)),
weight_size_(weight_size),
padding_(padding),
stride_(stride),
dilation_(dilation) {}
static constexpr float kMin = -std::numeric_limits<float>::infinity();
static constexpr float kMax = std::numeric_limits<float>::infinity();
};
namespace internal {
struct Layout final {
@ -64,14 +111,6 @@ struct Layout final {
};
};
struct Deleter final {
void operator()(const xnn_operator_t op) const {
xnn_delete_operator(op);
}
};
using Operator = std::unique_ptr<xnn_operator, Deleter>;
bool available();
} // namespace internal

View File

@ -1,10 +1,10 @@
#ifdef USE_XNNPACK
#include <ATen/cpp_custom_type_hack.h>
#include <ATen/native/xnnpack/Common.h>
#include <ATen/native/ConvUtils.h>
#include <ATen/native/utils/ParamUtils.h>
#include <ATen/native/xnnpack/Common.h>
#include <ATen/native/xnnpack/Factory.h>
#include <ATen/native/xnnpack/Convolution.h>
namespace at {
namespace native {
@ -12,18 +12,6 @@ namespace xnnpack {
namespace internal {
namespace convolution2d {
struct Context final {
Operator convolution_op;
std::vector<int64_t> weight_size;
std::vector<int64_t> padding;
std::vector<int64_t> stride;
std::vector<int64_t> dilation;
static constexpr float kMin = -std::numeric_limits<float>::infinity();
static constexpr float kMax = std::numeric_limits<float>::infinity();
};
namespace {
// Supports NHWC and NCHW FP32 convolutions with any valid
@ -79,72 +67,6 @@ bool available(
true;
}
Context create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef padding_,
const IntArrayRef stride_,
const IntArrayRef dilation_,
const int64_t groups,
const float output_min,
const float output_max) {
const auto padding = expand_param_if_needed(padding_, "padding", 2);
const auto stride = expand_param_if_needed(stride_, "stride", 2);
const auto dilation = expand_param_if_needed(dilation_, "dilation", 2);
const Tensor weight_nhwc = weight.contiguous(MemoryFormat::ChannelsLast);
TORCH_CHECK(
available(
weight_nhwc,
bias,
padding,
stride,
dilation,
groups,
output_min,
output_max),
"XNNPACK Convolution not available! "
"Reason: The provided (weight, bias, padding, stride, dilation, groups, output_min, output_max) "
"parameters are either invalid individually or their combination is not supported by XNNPACK.");
xnn_operator_t convolution_op{};
const xnn_status create_status = xnn_create_convolution2d_nhwc_f32(
padding[Layout::Parameter::height], // input_padding_top
padding[Layout::Parameter::width], // input_padding_right
padding[Layout::Parameter::height], // input_padding_bottom
padding[Layout::Parameter::width], // input_padding_left
weight_nhwc.size(Layout::Filter::height), // kernel_height
weight_nhwc.size(Layout::Filter::width), // kernel_width
stride[Layout::Parameter::height], // subsampling_height
stride[Layout::Parameter::width], // subsampling_width
dilation[Layout::Parameter::height], // dilation_height
dilation[Layout::Parameter::width], // dilation_width
groups, // groups
weight_nhwc.size(Layout::Filter::input), // group_input_channels
weight_nhwc.size(Layout::Filter::output) / groups, // group_output_channels
weight_nhwc.size(Layout::Filter::input) * groups, // input_pixel_stride
weight_nhwc.size(Layout::Filter::output), // output_pixel_stride
weight_nhwc.data_ptr<float>(), // kernel
(bias && bias->defined()) ? bias->data_ptr<float>() : nullptr, // bias
output_min, // output_min
output_max, // output_max
0u, // flags
&convolution_op); // operator
TORCH_CHECK(
xnn_status_success == create_status,
"xnn_create_convolution2d_nhwc_f32 failed!");
return Context{
Operator(convolution_op),
weight_nhwc.sizes().vec(),
padding,
stride,
dilation,
};
}
// TODO: Decouple and improve error handling and messages.
bool usable(const Tensor& input) {
// Input
@ -159,42 +81,43 @@ bool usable(const Tensor& input) {
}
Tensor run(
const Context& context,
const ContextConv2D& context,
const Tensor& input) {
using namespace internal;
const Tensor input_nhwc = input.contiguous(MemoryFormat::ChannelsLast);
const Tensor padded_input_nhwc = allocate_padded_if_needed(input_nhwc);
TORCH_CHECK(
usable(input_nhwc),
usable(padded_input_nhwc),
"XNNPACK Convolution not usable! "
"Reason: The provided input tensor is either invalid or unsupported by XNNPACK.");
Tensor output = empty_with_tail_padding(
conv_output_size(
input_nhwc.sizes(),
context.weight_size,
context.padding,
context.stride,
context.dilation),
input_nhwc.options().dtype(),
padded_input_nhwc.sizes(),
context.weight_size_,
context.padding_,
context.stride_,
context.dilation_),
padded_input_nhwc.options().dtype(),
MemoryFormat::ChannelsLast);
const xnn_status setup_status = xnn_setup_convolution2d_nhwc_f32(
context.convolution_op.get(), // operator
input_nhwc.size(Layout::Activation4D::batch), // batch_size
input_nhwc.size(Layout::Activation4D::height), // input_height
input_nhwc.size(Layout::Activation4D::width), // input_width
input_nhwc.data_ptr<float>(), // input
output.data_ptr<float>(), // output
nullptr); // threadpool
context.op.get(), // operator
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
padded_input_nhwc.size(Layout::Activation4D::height), // input_height
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
padded_input_nhwc.data_ptr<float>(), // input
output.data_ptr<float>(), // output
nullptr); // threadpool
TORCH_CHECK(
xnn_status_success == setup_status,
"xnn_setup_convolution2d_nhwc_f32 failed!");
const xnn_status run_status = xnn_run_operator(
context.convolution_op.get(), // operator
context.op.get(), // operator
nullptr); // threadpool
TORCH_INTERNAL_ASSERT(
@ -228,6 +151,101 @@ Tensor create_and_run(
}
} // namespace
ContextConv2D create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef padding,
const IntArrayRef stride,
const IntArrayRef dilation,
const int64_t groups,
const float output_min,
const float output_max) {
const auto padding_expanded = expand_param_if_needed(padding, "padding", 2);
const auto stride_expanded = expand_param_if_needed(stride, "stride", 2);
const auto dilation_expanded = expand_param_if_needed(dilation, "dilation", 2);
const Tensor weight_nhwc = weight.contiguous(MemoryFormat::ChannelsLast);
TORCH_CHECK(
available(
weight_nhwc,
bias,
padding_expanded,
stride_expanded,
dilation_expanded,
groups,
output_min,
output_max),
"xnnpack::convolution not available! "
"Reason: The provided (weight, bias, padding, stride, dilation, groups, output_min, output_max) "
"parameters are either invalid individually or their combination is not supported by XNNPACK.");
xnn_operator_t convolution_op{};
const xnn_status create_status = xnn_create_convolution2d_nhwc_f32(
padding_expanded[Layout::Parameter::height], // input_padding_top
padding_expanded[Layout::Parameter::width], // input_padding_right
padding_expanded[Layout::Parameter::height], // input_padding_bottom
padding_expanded[Layout::Parameter::width], // input_padding_left
weight_nhwc.size(Layout::Filter::height), // kernel_height
weight_nhwc.size(Layout::Filter::width), // kernel_width
stride_expanded[Layout::Parameter::height], // subsampling_height
stride_expanded[Layout::Parameter::width], // subsampling_width
dilation_expanded[Layout::Parameter::height], // dilation_height
dilation_expanded[Layout::Parameter::width], // dilation_width
groups, // groups
weight_nhwc.size(Layout::Filter::input), // group_input_channels
weight_nhwc.size(Layout::Filter::output) / groups, // group_output_channels
weight_nhwc.size(Layout::Filter::input) * groups, // input_pixel_stride
weight_nhwc.size(Layout::Filter::output), // output_pixel_stride
weight_nhwc.data_ptr<float>(), // kernel
(bias && bias->defined()) ? bias->data_ptr<float>() : nullptr, // bias
output_min, // output_min
output_max, // output_max
0u, // flags
&convolution_op); // operator
TORCH_CHECK(
xnn_status_success == create_status,
"xnn_create_convolution2d_nhwc_f32 failed!");
return ContextConv2D{
Operator(convolution_op),
{weight_nhwc.sizes()[0], weight_nhwc.sizes()[1],
weight_nhwc.sizes()[2], weight_nhwc.sizes()[3]},
{padding_expanded[0], padding_expanded[1]},
{stride_expanded[0], stride_expanded[1]},
{dilation_expanded[0], dilation_expanded[1]}
};
}
c10::intrusive_ptr<xnnpack::XNNPackConv2dOpContext> Conv2dPrePack::operator()(
Tensor weight,
c10::optional<Tensor> bias,
std::vector<int64_t> stride,
std::vector<int64_t> padding,
std::vector<int64_t> dilation,
int64_t groups
) {
return xnnpack::XNNPackConv2dOpContext::create_context(
std::move(weight),
std::move(bias),
std::move(padding),
std::move(stride),
std::move(dilation),
groups,
{},
{});
}
Tensor Conv2dPacked::operator()(
const Tensor& input,
const c10::intrusive_ptr<xnnpack::XNNPackConv2dOpContext>& op_context) {
return
xnnpack::internal::convolution2d::run(
(op_context.get())->get_context(), input);
}
} // namespace convolution2d
} // namespace internal
@ -246,8 +264,8 @@ bool use_convolution2d(
stride,
dilation,
groups,
internal::convolution2d::Context::kMin,
internal::convolution2d::Context::kMax) &&
ContextConv2D::kMin,
ContextConv2D::kMax) &&
internal::convolution2d::usable(input);
}
@ -267,50 +285,13 @@ Tensor convolution2d(
stride,
dilation,
groups,
internal::convolution2d::Context::kMin,
internal::convolution2d::Context::kMax);
ContextConv2D::kMin,
ContextConv2D::kMax);
}
} // namespace xnnpack
at::Tensor _conv2d_prepack(
const Tensor& weight,
const Tensor& bias,
const IntArrayRef stride,
const IntArrayRef padding,
const IntArrayRef dilation,
const int64_t groups,
const c10::optional<double> output_min,
const c10::optional<double> output_max) {
return cpp_custom_type_hack::create(
std::make_unique<xnnpack::internal::convolution2d::Context>(
xnnpack::internal::convolution2d::create(
weight,
bias,
padding.vec(),
stride.vec(),
dilation.vec(),
groups,
output_min ? *output_min : xnnpack::internal::convolution2d::Context::kMin,
output_max ? *output_max : xnnpack::internal::convolution2d::Context::kMax)),
weight.options());
}
at::Tensor _conv2d_packed(
const Tensor& packed_weight,
const Tensor& input) {
return xnnpack::internal::convolution2d::run(
cpp_custom_type_hack::cast<xnnpack::internal::convolution2d::Context>(packed_weight),
input);
}
} // namespace native
} // namespace at
namespace caffe2 {
CAFFE_KNOWN_TYPE(at::native::xnnpack::internal::convolution2d::Context);
} // namespace caffe2
#endif /* USE_XNNPACK */

View File

@ -0,0 +1,49 @@
#pragma once
#ifdef USE_XNNPACK
#include <ATen/Tensor.h>
#include <ATen/native/xnnpack/Common.h>
#include <ATen/native/xnnpack/OpContext.h>
namespace at {
namespace native {
namespace xnnpack {
namespace internal {
namespace convolution2d {
class Conv2dPrePack final : public torch::OperatorKernel {
public:
c10::intrusive_ptr<xnnpack::XNNPackConv2dOpContext> operator()(
Tensor weight,
c10::optional<Tensor> bias,
std::vector<int64_t> padding,
std::vector<int64_t> stride,
std::vector<int64_t> dilation,
int64_t groups);
};
class Conv2dPacked final : public torch::OperatorKernel {
public:
Tensor operator()(
const Tensor& input,
const c10::intrusive_ptr<xnnpack::XNNPackConv2dOpContext>& op_context);
};
ContextConv2D create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const IntArrayRef padding,
const IntArrayRef stride,
const IntArrayRef dilation,
const int64_t groups,
const float output_min,
const float output_max);
} // namespace convolution2d
} // namespace internal
} // namespace xnnpack
} // namespace native
} // namespace at
#endif /* USE_XNNPACK */

View File

@ -8,11 +8,16 @@ namespace native {
namespace xnnpack {
namespace internal {
GuardingAllocator<0u, XNN_EXTRA_BYTES>* get_guarding_allocator() {
static GuardingAllocator<0u, XNN_EXTRA_BYTES> allocator;
return &allocator;
}
Tensor empty_with_tail_padding(
const IntArrayRef size,
const caffe2::TypeMeta dtype,
const c10::MemoryFormat memory_format) {
static GuardingAllocator<0u, XNN_EXTRA_BYTES> allocator;
auto* allocator_ptr = get_guarding_allocator();
const int64_t nelements = prod_intlist(size);
@ -21,8 +26,8 @@ Tensor empty_with_tail_padding(
c10::Storage{
dtype,
nelements,
allocator.allocate(nelements * dtype.itemsize()),
&allocator,
allocator_ptr->allocate(nelements * dtype.itemsize()),
allocator_ptr,
/*resizable=*/true,
},
DispatchKeySet{DispatchKey::CPUTensorId}));
@ -30,6 +35,19 @@ Tensor empty_with_tail_padding(
return tensor.resize_(size, memory_format);
}
Tensor allocate_padded_if_needed(const Tensor& input_contig) {
const auto* allocator = input_contig.storage().allocator();
const auto* guarding_allocator = get_guarding_allocator();
if (allocator == guarding_allocator) {
return input_contig;
}
Tensor padded_input =
empty_with_tail_padding(input_contig.sizes(), input_contig.options().dtype(),
input_contig.suggest_memory_format());
padded_input.copy_(input_contig);
return padded_input;
}
} // namespace internal
} // namespace xnnpack
} // namespace native

View File

@ -9,6 +9,8 @@ namespace native {
namespace xnnpack {
namespace internal {
Tensor allocate_padded_if_needed(const Tensor& input_contig);
// TODO: Remove this function when at::native::empty() is modified to accept a
// custom memory allocator.

View File

@ -1,8 +1,8 @@
#ifdef USE_XNNPACK
#include <ATen/cpp_custom_type_hack.h>
#include <ATen/native/xnnpack/Common.h>
#include <ATen/native/xnnpack/Factory.h>
#include <ATen/native/xnnpack/Linear.h>
namespace at {
namespace native {
@ -10,17 +10,6 @@ namespace xnnpack {
namespace internal {
namespace linear {
struct Context final {
Operator linear_op;
struct Output final {
int64_t channels;
} output;
static constexpr float kMin = -std::numeric_limits<float>::infinity();
static constexpr float kMax = std::numeric_limits<float>::infinity();
};
namespace {
// Supports NHWC and NCHW FP32 linear operators.
@ -33,62 +22,19 @@ bool available(
const float output_max) {
// XNNPACK
return xnnpack::internal::available() &&
// Weight
(2 == weight.ndimension()) &&
(c10::DeviceType::CPU == weight.device().type()) &&
(kFloat == weight.scalar_type()) &&
// Bias
((bias && bias->defined()) ? ((1 == bias->ndimension()) &&
(c10::DeviceType::CPU == bias->device().type()) &&
(kFloat == bias->scalar_type()) &&
(weight.size(Layout::Filter::output)) == bias->size(0))
: true) &&
// Output Min / Max
(output_max > output_min) &&
true;
}
Context create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const float output_min,
const float output_max) {
const Tensor weight_contig = weight.contiguous();
TORCH_CHECK(
available(
weight_contig,
bias,
output_min,
output_max),
"XNNPACK Linear not available! "
"Reason: The provided (weight, bias, output_min, output_max) parameters are "
"either invalid individually or their combination is not supported by XNNPACK.");
xnn_operator_t linear_op{};
const xnn_status create_status = xnn_create_fully_connected_nc_f32(
weight_contig.size(Layout::Filter::input), // input_channels
weight_contig.size(Layout::Filter::output), // output_channels
weight_contig.size(Layout::Filter::input), // input_pixel_stride
weight_contig.size(Layout::Filter::output), // output_pixel_stride
weight_contig.data_ptr<float>(), // kernel
(bias && bias->defined()) ? bias->data_ptr<float>() : nullptr, // bias
output_min, // output_min
output_max, // output_max
0u, // flags
&linear_op); // operator
TORCH_CHECK(
xnn_status_success == create_status,
"xnn_create_fully_connected_nc_f32 failed!");
return Context{
Operator(linear_op),
{
weight_contig.size(Layout::Filter::output),
}
};
// Weight
(2 == weight.ndimension()) &&
(c10::DeviceType::CPU == weight.device().type()) &&
(kFloat == weight.scalar_type()) &&
// Bias
((bias && bias->defined()) ? ((1 == bias->ndimension()) &&
(c10::DeviceType::CPU == bias->device().type()) &&
(kFloat == bias->scalar_type()) &&
(weight.size(Layout::Filter::output)) == bias->size(0))
: true) &&
// Output Min / Max
(output_max > output_min) &&
true;
}
// TODO: Decouple and improve error handling and messages.
@ -101,30 +47,30 @@ bool usable(const Tensor& input) {
}
Tensor run(
const Context& context,
const ContextLinear& context,
const Tensor& input) {
using namespace internal;
const Tensor& input_contig = input.contiguous();
const Tensor padded_input = allocate_padded_if_needed(input.contiguous());
TORCH_CHECK(
usable(input_contig),
usable(padded_input),
"XNNPACK Linear not usable! "
"Reason: The provided input tensor is either invalid or unsupported by XNNPACK.");
const IntArrayRef input_size = input_contig.sizes();
const IntArrayRef input_size = padded_input.sizes();
std::vector<int64_t> output_size(input_size.cbegin(), input_size.cend());
output_size.back() = context.output.channels;
output_size.back() = context.output_channels;
Tensor output = empty_with_tail_padding(
output_size,
input_contig.options().dtype(),
input_contig.suggest_memory_format());
padded_input.options().dtype(),
padded_input.suggest_memory_format());
const xnn_status setup_status = xnn_setup_fully_connected_nc_f32(
context.linear_op.get(), // operator
Layout::ActivationND::batch(input_contig.sizes()), // Batch,
input_contig.data_ptr<float>(), // input
context.op.get(), // operator
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
padded_input.data_ptr<float>(), // input
output.data_ptr<float>(), // output
nullptr); // threadpool
@ -133,7 +79,7 @@ Tensor run(
"xnn_setup_fully_connected_nc_f32 failed!");
const xnn_status run_status = xnn_run_operator(
context.linear_op.get(), // operator
context.op.get(), // operator
nullptr); // threadpool
TORCH_INTERNAL_ASSERT(
@ -159,6 +105,63 @@ Tensor create_and_run(
}
} // namespace
ContextLinear create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const float output_min,
const float output_max) {
const Tensor weight_contig = weight.contiguous();
TORCH_CHECK(
available(
weight_contig,
bias,
output_min,
output_max),
"XNNPACK Linear not available! "
"Reason: The provided (weight, bias, output_min, output_max) parameters are "
"either invalid individually or their combination is not supported by XNNPACK.");
xnn_operator_t linear_op{};
const xnn_status create_status = xnn_create_fully_connected_nc_f32(
weight_contig.size(Layout::Filter::input), // input_channels
weight_contig.size(Layout::Filter::output), // output_channels
weight_contig.size(Layout::Filter::input), // input_pixel_stride
weight_contig.size(Layout::Filter::output), // output_pixel_stride
weight_contig.data_ptr<float>(), // kernel
(bias && bias->defined()) ? bias->data_ptr<float>() : nullptr, // bias
output_min, // output_min
output_max, // output_max
0u, // flags
&linear_op); // operator
TORCH_CHECK(
xnn_status_success == create_status,
"xnn_create_fully_connected_nc_f32 failed!");
return ContextLinear(
Operator(linear_op),
weight_contig.size(Layout::Filter::output)
);
}
c10::intrusive_ptr<xnnpack::XNNPackLinearOpContext>
LinearPrePack::operator()(
Tensor weight,
c10::optional<Tensor> bias) {
return xnnpack::XNNPackLinearOpContext::create_context(
std::move(weight), std::move(bias), {}, {});
}
Tensor LinearPacked::operator()(
const Tensor& input,
const c10::intrusive_ptr<xnnpack::XNNPackLinearOpContext>& op_context) {
return
xnnpack::internal::linear::run((op_context.get())->get_context(), input);
}
} // namespace linear
} // namespace internal
@ -169,8 +172,8 @@ bool use_linear(
return internal::linear::available(
weight,
bias,
internal::linear::Context::kMin,
internal::linear::Context::kMax) &&
ContextLinear::kMin,
ContextLinear::kMax) &&
internal::linear::usable(input);
}
@ -182,42 +185,13 @@ Tensor linear(
input,
weight,
bias,
internal::linear::Context::kMin,
internal::linear::Context::kMax);
ContextLinear::kMin,
ContextLinear::kMax);
}
} // namespace xnnpack
Tensor _linear_prepack(
const Tensor& weight,
const Tensor& bias,
const c10::optional<double> output_min,
const c10::optional<double> output_max) {
return cpp_custom_type_hack::create(
std::make_unique<xnnpack::internal::linear::Context>(
xnnpack::internal::linear::create(
weight,
bias,
output_min ? *output_min : xnnpack::internal::linear::Context::kMin,
output_max ? *output_max : xnnpack::internal::linear::Context::kMax)),
weight.options());
}
Tensor _linear_packed(
const Tensor& packed_weight,
const Tensor& input) {
return xnnpack::internal::linear::run(
cpp_custom_type_hack::cast<xnnpack::internal::linear::Context>(packed_weight),
input);
}
} // namespace native
} // namespace at
namespace caffe2 {
CAFFE_KNOWN_TYPE(at::native::xnnpack::internal::linear::Context);
} // namespace caffe2
#endif /* USE_XNNPACK */

View File

@ -0,0 +1,40 @@
#pragma once
#ifdef USE_XNNPACK
#include <ATen/Tensor.h>
#include <ATen/native/xnnpack/Common.h>
#include <ATen/native/xnnpack/OpContext.h>
namespace at {
namespace native {
namespace xnnpack {
namespace internal {
namespace linear {
class LinearPrePack final : public torch::OperatorKernel {
public:
c10::intrusive_ptr<xnnpack::XNNPackLinearOpContext> operator()(
Tensor weight,
c10::optional<Tensor> bias);
};
class LinearPacked final : public torch::OperatorKernel {
public:
Tensor operator()(
const Tensor& input,
const c10::intrusive_ptr<xnnpack::XNNPackLinearOpContext>& op_context);
};
ContextLinear create(
const Tensor& weight,
const c10::optional<Tensor>& bias,
const float output_min,
const float output_max);
} // namespace linear
} // namespace internal
} // namespace xnnpack
} // namespace native
} // namespace at
#endif /* USE_XNNPACK */

View File

@ -0,0 +1,64 @@
#ifdef USE_XNNPACK
#include <ATen/native/xnnpack/Convolution.h>
#include <ATen/native/xnnpack/Linear.h>
#include <ATen/native/xnnpack/OpContext.h>
namespace at {
namespace native {
namespace xnnpack {
c10::intrusive_ptr<XNNPackLinearOpContext>
XNNPackLinearOpContext::create_context(
at::Tensor&& weight,
c10::optional<at::Tensor>&& bias,
const c10::optional<double> output_min,
const c10::optional<double> output_max) {
auto linear_op_context =
c10::make_intrusive<XNNPackLinearOpContext>(
std::move(weight),
std::move(bias),
xnnpack::internal::linear::create(
weight,
bias,
output_min ? *output_min : xnnpack::ContextLinear::kMin,
output_max ? *output_max : xnnpack::ContextLinear::kMax)
);
return linear_op_context;
}
c10::intrusive_ptr<XNNPackConv2dOpContext>
XNNPackConv2dOpContext::create_context(at::Tensor&& weight,
c10::optional<at::Tensor>&& bias,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& dilation,
int64_t groups,
const c10::optional<double> output_min,
const c10::optional<double> output_max) {
auto op_context =
xnnpack::internal::convolution2d::create(
weight,
bias,
padding,
stride,
dilation,
groups,
output_min ? *output_min : xnnpack::ContextConv2D::kMin,
output_max ? *output_max : xnnpack::ContextConv2D::kMax);
auto conv2d_op_context =
c10::make_intrusive<XNNPackConv2dOpContext>(
std::move(weight),
std::move(bias),
std::move(padding),
std::move(stride),
std::move(dilation),
groups,
std::move(op_context));
return conv2d_op_context;
}
} // xnnpack
} // native
} // at
#endif /* USE_XNNPACK */

View File

@ -0,0 +1,96 @@
#pragma once
#ifdef USE_XNNPACK
#include <ATen/core/ivalue.h>
#include <ATen/native/xnnpack/Common.h>
#include <ATen/Tensor.h>
namespace at {
namespace native {
namespace xnnpack {
using SerializationTypeLinearPrePack = std::tuple<Tensor, c10::optional<Tensor>>;
using SerializationTypeConv2dPrePack =
std::tuple<Tensor, c10::optional<Tensor>,
std::vector<int64_t>, std::vector<int64_t>, std::vector<int64_t>, int64_t>;
class XNNPackLinearOpContext : public torch::jit::CustomClassHolder {
private:
Tensor orig_weight_;
c10::optional<Tensor> orig_bias_;
ContextLinear op_context_;
public:
XNNPackLinearOpContext(Tensor&& weight,
c10::optional<Tensor>&& bias,
ContextLinear&& op_context) :
orig_weight_(std::move(weight)),
orig_bias_(std::move(bias)),
op_context_(std::move(op_context)) {}
const ContextLinear& get_context() const {
return op_context_;
}
SerializationTypeLinearPrePack unpack() {
return std::make_tuple(orig_weight_, orig_bias_);
}
static c10::intrusive_ptr<XNNPackLinearOpContext> create_context(Tensor&& weight,
c10::optional<Tensor>&& bias,
const c10::optional<double> output_min,
const c10::optional<double> output_max);
};
class XNNPackConv2dOpContext : public torch::jit::CustomClassHolder {
private:
Tensor orig_weight_;
c10::optional<Tensor> orig_bias_;
std::vector<int64_t> padding_;
std::vector<int64_t> stride_;
std::vector<int64_t> dilation_;
int64_t groups_;
ContextConv2D op_context_;
public:
XNNPackConv2dOpContext(Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& dilation,
uint64_t groups,
ContextConv2D&& op_context
) :
orig_weight_(std::move(weight)),
orig_bias_(std::move(bias)),
padding_(std::move(padding)),
stride_(std::move(stride)),
dilation_(std::move(dilation)),
groups_(groups),
op_context_(std::move(op_context)) {}
const ContextConv2D& get_context() const {
return op_context_;
}
SerializationTypeConv2dPrePack unpack() {
return std::make_tuple(orig_weight_, orig_bias_, padding_,
stride_, dilation_, groups_);
}
static c10::intrusive_ptr<XNNPackConv2dOpContext> create_context(Tensor&& weight,
c10::optional<Tensor>&& bias,
std::vector<int64_t>&& padding,
std::vector<int64_t>&& stride,
std::vector<int64_t>&& dilation,
int64_t groups,
const c10::optional<double> output_min,
const c10::optional<double> output_max);
};
} // xnnpack
} // native
} // at
#endif /* USE_XNNPACK */

View File

@ -0,0 +1,101 @@
#ifdef USE_XNNPACK
#include <ATen/core/op_registration/op_registration.h>
#include <ATen/native/xnnpack/Convolution.h>
#include <ATen/native/xnnpack/Linear.h>
#include <ATen/native/xnnpack/OpContext.h>
#include <ATen/Tensor.h>
#include <torch/custom_class.h>
namespace at {
namespace native {
namespace xnnpack {
namespace {
torch::jit::class_<XNNPackLinearOpContext> register_xnnpack_linear_op_context_class() {
static auto register_linear_op_context_class =
torch::jit::class_<XNNPackLinearOpContext>("XNNPackLinearOpContext")
.def_pickle(
[](const c10::intrusive_ptr<XNNPackLinearOpContext>& op_context)
-> SerializationTypeLinearPrePack { // __getstate__
Tensor weight;
c10::optional<Tensor> bias;
return op_context->unpack();
},
[](SerializationTypeLinearPrePack state)
-> c10::intrusive_ptr<
XNNPackLinearOpContext> { // __setstate__
return XNNPackLinearOpContext::create_context(
std::move(std::get<0>(state)),
std::move(std::get<1>(state)),
{},
{}
);
}
);
return register_linear_op_context_class;
}
torch::jit::class_<XNNPackConv2dOpContext> register_xnnpack_conv2d_op_context_class() {
static auto register_conv2d_op_context_class =
torch::jit::class_<XNNPackConv2dOpContext>("XNNPackConv2dOpContext")
.def_pickle(
[](const c10::intrusive_ptr<XNNPackConv2dOpContext>& op_context)
-> SerializationTypeConv2dPrePack { // __getstate__
Tensor weight;
std::vector<int64_t> padding, stride, dilation;
int64_t groups;
c10::optional<Tensor> bias;
return op_context->unpack();
},
[](SerializationTypeConv2dPrePack state)
-> c10::intrusive_ptr<
XNNPackConv2dOpContext> { // __setstate__
return XNNPackConv2dOpContext::create_context(
std::move(std::get<0>(state)),
std::move(std::get<1>(state)),
std::move(std::get<2>(state)),
std::move(std::get<3>(state)),
std::move(std::get<4>(state)),
std::move(std::get<5>(state)),
{},
{}
);
}
);
return register_conv2d_op_context_class;
}
static auto xnnpack_linear_op_context_class = register_xnnpack_linear_op_context_class();
static auto xnnpack_conv2d_op_context_class = register_xnnpack_conv2d_op_context_class();
// Op registeration
static auto registry =
// Registering under _xnnpack namespace for now. As we add more backend requiring similar functionality
// We can refactor the code and use a better namespace.
torch::RegisterOperators()
.op("_xnnpack::linear_prepack(Tensor W, Tensor? B=None) -> __torch__.torch.classes.XNNPackLinearOpContext",
torch::RegisterOperators::options().kernel<internal::linear::LinearPrePack>(
DispatchKey::CPUTensorId))
.op("_xnnpack::linear_packed(Tensor X, __torch__.torch.classes.XNNPackLinearOpContext W_prepack) -> Tensor Y",
torch::RegisterOperators::options().kernel<internal::linear::LinearPacked>(
DispatchKey::CPUTensorId))
.op("_xnnpack::conv2d_prepack(Tensor W, Tensor? B, int[2] stride, "
"int[2] padding, int[2] dilation, int groups) "
"-> __torch__.torch.classes.XNNPackConv2dOpContext",
torch::RegisterOperators::options().kernel<internal::convolution2d::Conv2dPrePack>(
DispatchKey::CPUTensorId))
.op("_xnnpack::conv2d_packed(Tensor X, "
"__torch__.torch.classes.XNNPackConv2dOpContext W_prepack) -> Tensor Y",
torch::RegisterOperators::options().kernel<internal::convolution2d::Conv2dPacked>(
DispatchKey::CPUTensorId));
} // namespace
} // xnnpack
} // native
} // at
namespace {
}
#endif /* USE_XNNPACK */

View File

@ -58,38 +58,6 @@ Tensor linear(
} // namespace xnnpack
at::Tensor _conv2d_prepack(
const Tensor&,
const Tensor&,
const IntArrayRef,
const IntArrayRef,
const IntArrayRef,
const int64_t,
const c10::optional<double>,
const c10::optional<double>) {
TORCH_CHECK(false, xnnpack::internal::kError);
}
at::Tensor _conv2d_packed(
const Tensor&,
const Tensor&) {
TORCH_CHECK(false, xnnpack::internal::kError);
}
Tensor _linear_prepack(
const Tensor&,
const Tensor&,
const c10::optional<double>,
const c10::optional<double>) {
TORCH_CHECK(false, xnnpack::internal::kError);
}
Tensor _linear_packed(
const Tensor&,
const Tensor&) {
TORCH_CHECK(false, xnnpack::internal::kError);
}
} // namespace native
} // namespace at

View File

@ -0,0 +1,364 @@
from __future__ import division
import unittest
import torch
import torch.backends.xnnpack
from torch.nn import functional as F
import torch.testing._internal.hypothesis_utils as hu
from torch.testing._internal.common_utils import TestCase, run_tests
from hypothesis import given, assume
from hypothesis import strategies as st
import io
@unittest.skipUnless(torch.backends.xnnpack.enabled,
" XNNPACK must be enabled for these tests."
" Please build with USE_XNNPACK=1.")
class TestXNNPACKOps(TestCase):
@given(batch_size=st.integers(0, 3),
data_shape=hu.array_shapes(1, 3, 2, 64),
weight_output_dim=st.integers(2, 64),
use_bias=st.booleans())
def test_linear(self, batch_size, data_shape, weight_output_dim, use_bias):
data_shape = [batch_size] + list(data_shape)
input_data = torch.rand(data_shape)
weight = torch.rand((weight_output_dim, data_shape[-1]))
if use_bias:
bias = torch.rand((weight_output_dim))
else:
bias = None
ref_result = F.linear(input_data, weight, bias)
packed_weight_bias = torch.ops._xnnpack.linear_prepack(weight, bias)
output_linear_xnnpack = torch.ops._xnnpack.linear_packed(input_data, packed_weight_bias)
torch.testing.assert_allclose(ref_result, output_linear_xnnpack, rtol=1e-2, atol=1e-3)
@given(batch_size=st.integers(0, 3),
input_channels_per_group=st.integers(1, 32),
height=st.integers(5, 64),
width=st.integers(5, 64),
output_channels_per_group=st.integers(1, 32),
groups=st.integers(1, 16),
kernel_h=st.integers(1, 7),
kernel_w=st.integers(1, 7),
stride_h=st.integers(1, 2),
stride_w=st.integers(1, 2),
pad_h=st.integers(0, 2),
pad_w=st.integers(0, 2),
dilation=st.integers(1, 2),
use_bias=st.booleans())
def test_conv2d(self,
batch_size,
input_channels_per_group,
height,
width,
output_channels_per_group,
groups,
kernel_h,
kernel_w,
stride_h,
stride_w,
pad_h,
pad_w,
dilation,
use_bias):
input_channels = input_channels_per_group * groups
output_channels = output_channels_per_group * groups
kernels = (kernel_h, kernel_w)
strides = (stride_h, stride_w)
paddings = (pad_h, pad_w)
dilations = (dilation, dilation)
assume(height + 2 * paddings[0] >=
dilations[0] * (kernels[0] - 1) + 1)
assume(width + 2 * paddings[1] >=
dilations[1] * (kernels[1] - 1) + 1)
input_data = torch.rand((batch_size, input_channels, height, width))
weight = torch.rand((output_channels, input_channels_per_group, kernel_h, kernel_w))
bias = None
if use_bias:
bias = torch.rand((output_channels))
ref_result = F.conv2d(input_data, weight, bias,
strides, paddings, dilations, groups)
packed_weight_bias = torch.ops._xnnpack.conv2d_prepack(weight, bias,
strides, paddings, dilations, groups)
xnnpack_result = torch.ops._xnnpack.conv2d_packed(input_data, packed_weight_bias)
torch.testing.assert_allclose(ref_result, xnnpack_result, rtol=1e-2, atol=1e-3)
@unittest.skipUnless(torch.backends.xnnpack.enabled,
" XNNPACK must be enabled for these tests."
" Please build with USE_XNNPACK=1.")
class TestXNNPACKSerDes(TestCase):
@given(batch_size=st.integers(0, 3),
data_shape=hu.array_shapes(1, 3, 2, 64),
weight_output_dim=st.integers(2, 64),
use_bias=st.booleans())
def test_linear(self, batch_size, data_shape, weight_output_dim, use_bias):
class Linear(torch.nn.Module):
def __init__(self, weight, bias=None):
super(Linear, self).__init__()
self.weight = weight
self.bias = bias
def forward(self, x):
return F.linear(x, self.weight, self.bias)
class LinearPrePacked(torch.nn.Module):
def __init__(self, weight, bias=None):
super(LinearPrePacked, self).__init__()
self.packed_weight_bias = torch.ops._xnnpack.linear_prepack(weight, bias)
def forward(self, x):
return torch.ops._xnnpack.linear_packed(x, self.packed_weight_bias)
data_shape = [batch_size] + list(data_shape)
weight = torch.rand((weight_output_dim, data_shape[-1]))
if use_bias:
bias = torch.rand((weight_output_dim))
else:
bias = None
scripted_linear = torch.jit.script(Linear(weight, bias))
scripted_linear_prepacked = torch.jit.script(LinearPrePacked(weight, bias))
input_data = torch.rand(data_shape)
ref_result = scripted_linear(input_data)
output_linear_xnnpack = scripted_linear_prepacked(input_data)
torch.testing.assert_allclose(ref_result, output_linear_xnnpack, rtol=1e-2, atol=1e-3)
# Serialize the modules and then deserialize
input_data = torch.rand(data_shape)
buffer = io.BytesIO()
torch.jit.save(scripted_linear, buffer)
buffer.seek(0)
deserialized_linear = torch.jit.load(buffer)
buffer = io.BytesIO()
torch.jit.save(scripted_linear_prepacked, buffer)
buffer.seek(0)
deserialized_linear_prepacked = torch.jit.load(buffer)
ref_result = deserialized_linear(input_data)
output_linear_xnnpack = deserialized_linear_prepacked(input_data)
torch.testing.assert_allclose(ref_result, output_linear_xnnpack, rtol=1e-2, atol=1e-3)
@given(batch_size=st.integers(0, 3),
input_channels_per_group=st.integers(1, 32),
height=st.integers(5, 64),
width=st.integers(5, 64),
output_channels_per_group=st.integers(1, 32),
groups=st.integers(1, 16),
kernel_h=st.integers(1, 7),
kernel_w=st.integers(1, 7),
stride_h=st.integers(1, 2),
stride_w=st.integers(1, 2),
pad_h=st.integers(0, 2),
pad_w=st.integers(0, 2),
dilation=st.integers(1, 2),
use_bias=st.booleans())
def test_conv2d(self,
batch_size,
input_channels_per_group,
height,
width,
output_channels_per_group,
groups,
kernel_h,
kernel_w,
stride_h,
stride_w,
pad_h,
pad_w,
dilation,
use_bias):
class Conv2D(torch.nn.Module):
def __init__(self, weight, bias, strides, paddings, dilations, groups):
super(Conv2D, self).__init__()
self.weight = weight
self.bias = bias
self.strides = strides
self.paddings = paddings
self.dilations = dilations
self.groups = groups
def forward(self, x):
return F.conv2d(x, self.weight, self.bias,
self.strides, self.paddings, self.dilations, self.groups)
class Conv2DPrePacked(torch.nn.Module):
def __init__(self, weight, bias, strides, paddings, dilations, groups):
super(Conv2DPrePacked, self).__init__()
self.packed_weight_bias = torch.ops._xnnpack.conv2d_prepack(weight, bias,
strides, paddings, dilations, groups)
def forward(self, x):
return torch.ops._xnnpack.conv2d_packed(x, self.packed_weight_bias)
input_channels = input_channels_per_group * groups
output_channels = output_channels_per_group * groups
kernels = (kernel_h, kernel_w)
strides = (stride_h, stride_w)
paddings = (pad_h, pad_w)
dilations = (dilation, dilation)
assume(height + 2 * paddings[0] >=
dilations[0] * (kernels[0] - 1) + 1)
assume(width + 2 * paddings[1] >=
dilations[1] * (kernels[1] - 1) + 1)
input_data = torch.rand((batch_size, input_channels, height, width))
weight = torch.rand((output_channels, input_channels_per_group, kernel_h, kernel_w))
bias = None
if use_bias:
bias = torch.rand((output_channels))
scripted_conv2d = torch.jit.script(Conv2D(weight, bias,
strides, paddings, dilations, groups))
scripted_conv2d_prepacked = torch.jit.script(Conv2DPrePacked(
weight, bias, strides, paddings, dilations, groups))
ref_result = scripted_conv2d(input_data)
xnnpack_result = scripted_conv2d_prepacked(input_data)
torch.testing.assert_allclose(ref_result, xnnpack_result, rtol=1e-2, atol=1e-3)
# Serialize the modules and then deserialize
input_data = torch.rand((batch_size, input_channels, height, width))
buffer = io.BytesIO()
torch.jit.save(scripted_conv2d, buffer)
buffer.seek(0)
deserialized_conv2d = torch.jit.load(buffer)
buffer = io.BytesIO()
torch.jit.save(scripted_conv2d_prepacked, buffer)
buffer.seek(0)
deserialized_conv2d_prepacked = torch.jit.load(buffer)
ref_result = deserialized_conv2d(input_data)
xnnpack_result = deserialized_conv2d_prepacked(input_data)
torch.testing.assert_allclose(ref_result, xnnpack_result, rtol=1e-2, atol=1e-3)
@given(batch_size=st.integers(0, 3),
input_channels_per_group=st.integers(1, 32),
height=st.integers(5, 64),
width=st.integers(5, 64),
output_channels_per_group=st.integers(1, 32),
groups=st.integers(1, 16),
kernel_h=st.integers(1, 7),
kernel_w=st.integers(1, 7),
stride_h=st.integers(1, 2),
stride_w=st.integers(1, 2),
pad_h=st.integers(0, 2),
pad_w=st.integers(0, 2),
dilation=st.integers(1, 2),
linear_weight_output_dim=st.integers(2, 64),
use_bias=st.booleans())
def test_combined_model(self,
batch_size,
input_channels_per_group,
height,
width,
output_channels_per_group,
groups,
kernel_h,
kernel_w,
stride_h,
stride_w,
pad_h,
pad_w,
dilation,
linear_weight_output_dim,
use_bias):
class M(torch.nn.Module):
def __init__(self, conv_weight, conv_bias, linear_weight, linear_bias,
strides, paddings, dilations, groups):
super(M, self).__init__()
self.conv_weight = conv_weight
self.conv_bias = conv_bias
self.linear_weight = linear_weight
self.linear_bias = linear_bias
self.strides = strides
self.paddings = paddings
self.dilations = dilations
self.groups = groups
def forward(self, x):
o = F.conv2d(x, self.conv_weight, self.conv_bias,
self.strides, self.paddings, self.dilations, self.groups)
o = o.permute([0, 2, 3, 1])
o = F.linear(o, self.linear_weight, self.linear_bias)
return F.relu(o)
class MPrePacked(torch.nn.Module):
def __init__(self, conv_weight, conv_bias, linear_weight, linear_bias,
strides, paddings, dilations, groups):
super(MPrePacked, self).__init__()
self.conv2d_packed_weight_bias = \
torch.ops._xnnpack.conv2d_prepack(conv_weight, conv_bias,
strides, paddings, dilations, groups)
self.linear_packed_weight_bias = \
torch.ops._xnnpack.linear_prepack(linear_weight, linear_bias)
def forward(self, x):
o = torch.ops._xnnpack.conv2d_packed(x, self.conv2d_packed_weight_bias)
o = o.permute([0, 2, 3, 1])
o = torch.ops._xnnpack.linear_packed(o, self.linear_packed_weight_bias)
return F.relu(o)
input_channels = input_channels_per_group * groups
output_channels = output_channels_per_group * groups
kernels = (kernel_h, kernel_w)
strides = (stride_h, stride_w)
paddings = (pad_h, pad_w)
dilations = (dilation, dilation)
assume(height + 2 * paddings[0] >=
dilations[0] * (kernels[0] - 1) + 1)
assume(width + 2 * paddings[1] >=
dilations[1] * (kernels[1] - 1) + 1)
input_data = torch.rand((batch_size, input_channels, height, width))
conv_weight = torch.rand((output_channels, input_channels_per_group, kernel_h, kernel_w))
conv_bias = None
if use_bias:
conv_bias = torch.rand((output_channels))
# This is done just to find the output shape of the result
# so that the shape of weight for the following linear layer
# can be determined.
result = F.conv2d(input_data, conv_weight, conv_bias,
strides, paddings, dilations, groups)
linear_input_shape = result.shape[1]
input_data = input_data.contiguous(memory_format=torch.channels_last)
linear_weight = torch.rand((linear_weight_output_dim, linear_input_shape))
linear_bias = None
if use_bias:
linear_bias = torch.rand((linear_weight_output_dim))
scripted_m = torch.jit.script(M(conv_weight, conv_bias, linear_weight,
linear_bias, strides, paddings, dilations, groups))
scripted_m_prepacked = torch.jit.script(
MPrePacked(
conv_weight,
conv_bias,
linear_weight,
linear_bias,
strides,
paddings,
dilations,
groups))
ref_result = scripted_m(input_data)
xnnpack_result = scripted_m_prepacked(input_data)
torch.testing.assert_allclose(ref_result, xnnpack_result, rtol=1e-2, atol=1e-3)
# Serialize the modules and then deserialize
input_data = torch.rand((batch_size, input_channels, height, width))
input_data = input_data.contiguous(memory_format=torch.channels_last)
buffer = io.BytesIO()
torch.jit.save(scripted_m, buffer)
buffer.seek(0)
deserialized_m = torch.jit.load(buffer)
buffer = io.BytesIO()
torch.jit.save(scripted_m_prepacked, buffer)
buffer.seek(0)
deserialized_m_prepacked = torch.jit.load(buffer)
ref_result = deserialized_m(input_data)
xnnpack_result = deserialized_m_prepacked(input_data)
torch.testing.assert_allclose(ref_result, xnnpack_result, rtol=1e-2, atol=1e-3)
if __name__ == "__main__":
run_tests()

View File

@ -0,0 +1,25 @@
from __future__ import absolute_import, division, print_function, unicode_literals
import sys
import torch
import types
class _XNNPACKEnabled(object):
def __get__(self, obj, objtype):
return torch._C._is_xnnpack_enabled()
def __set__(self, obj, val):
raise RuntimeError("Assignment not supported")
class XNNPACKEngine(types.ModuleType):
def __init__(self, m, name):
super(XNNPACKEngine, self).__init__(name)
self.m = m
def __getattr__(self, attr):
return self.m.__getattribute__(attr)
enabled = _XNNPACKEnabled()
# This is the sys.modules replacement trick, see
# https://stackoverflow.com/questions/2447353/getattr-on-a-module/7668273#7668273
sys.modules[__name__] = XNNPACKEngine(sys.modules[__name__], __name__)

View File

@ -516,6 +516,12 @@ PyObject *THPModule_supportedQEngines(PyObject */* unused */)
return list.release();
}
PyObject *THPModule_isEnabledXNNPACK(PyObject * /* unused */)
{
if (at::globalContext().isXNNPACKAvailable()) Py_RETURN_TRUE;
else Py_RETURN_FALSE;
}
//NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays, modernize-avoid-c-arrays)
static PyMethodDef TorchMethods[] = {
{"_initExtension", (PyCFunction)THPModule_initExtension, METH_O, nullptr},
@ -556,6 +562,7 @@ static PyMethodDef TorchMethods[] = {
{"_get_qengine", (PyCFunction)THPModule_qEngine, METH_NOARGS, nullptr},
{"_set_qengine", (PyCFunction)THPModule_setQEngine, METH_O, nullptr},
{"_supported_qengines", (PyCFunction)THPModule_supportedQEngines, METH_NOARGS, nullptr},
{"_is_xnnpack_enabled", (PyCFunction)THPModule_isEnabledXNNPACK, METH_NOARGS, nullptr},
{nullptr, nullptr, 0, nullptr}
};