mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[8/n] Update XNNPACK Version Part 8 Everything Remaining to get it to work (#115587)
> **__Note:__** XNNPACK Upgrade is too large in the range of **40k** files and **10m** Lines of code, Thus we break the update of the library into multiple parts. All Parts [1 - 6/n] Must be landed together for it to work. ***This also means If there is a revert. Please revert the Entire Stack.*** This change is everything remaining requiring XNNPACK version to work. Differential Revision: [D52044420](https://our.internmc.facebook.com/intern/diff/D52044420/) Pull Request resolved: https://github.com/pytorch/pytorch/pull/115587 Approved by: https://github.com/digantdesai
This commit is contained in:
committed by
PyTorch MergeBot
parent
e918461377
commit
a8dc9d8e35
@ -259,24 +259,32 @@ enum xnn_status xnnp_create_add_nd(
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_add_nd(
|
||||
enum xnn_status xnnp_reshape_add_nd(
|
||||
xnn_operator_t op,
|
||||
const std::vector<size_t>& a_shape,
|
||||
const std::vector<size_t>& b_shape,
|
||||
pthreadpool_t pt_pool) {
|
||||
return xnn_reshape_add_nd_qs8(
|
||||
op, /* xnn_operator_t add_op */
|
||||
a_shape.size(), /* size_t num_input1_dims */
|
||||
a_shape.data(), /* const size_t* input1_shape */
|
||||
b_shape.size(), /* size_t num_input2_dims */
|
||||
b_shape.data(), /* const size_t* input2_shape */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_add_nd(
|
||||
xnn_operator_t op,
|
||||
const int8_t* da,
|
||||
const int8_t* db,
|
||||
int8_t* dc,
|
||||
pthreadpool_t pt_pool) {
|
||||
return xnn_setup_add_nd_qs8(
|
||||
op, /* xnn_operator_t add_op */
|
||||
a_shape.size(), /* size_t num_input1_dims */
|
||||
a_shape.data(), /* const size_t* input1_shape */
|
||||
b_shape.size(), /* size_t num_input2_dims */
|
||||
b_shape.data(), /* const size_t* input2_shape */
|
||||
da, /* const int8_t* input1 */
|
||||
db, /* const int8_t* input2 */
|
||||
dc, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
dc); /* int8_t* output */
|
||||
}
|
||||
|
||||
template <typename scalar_t, bool ReLUFused = false>
|
||||
@ -348,11 +356,20 @@ Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
const auto qa_shape = xnnp_utils::get_mem_format_aware_shape(qa_contig);
|
||||
const auto qb_shape = xnnp_utils::get_mem_format_aware_shape(qb_contig);
|
||||
|
||||
// Setup the operator
|
||||
status = xnnp_setup_add_nd(
|
||||
// Reshape the operator
|
||||
status = xnnp_reshape_add_nd(
|
||||
xnnp_add_operator.get(),
|
||||
qa_shape,
|
||||
qb_shape,
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
func_name, ": xnn reshape operator failed(", status,")!");
|
||||
|
||||
// Setup the operator
|
||||
status = xnnp_setup_add_nd(
|
||||
xnnp_add_operator.get(),
|
||||
reinterpret_cast<const underlying_t*>(qa_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<const underlying_t*>(qb_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(qy.data_ptr<scalar_t>()),
|
||||
|
@ -100,6 +100,7 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
|
||||
op_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t weights_cache */
|
||||
op); /* xnn_operator_t* deconvolution_op_out */
|
||||
|
||||
}
|
||||
@ -132,9 +133,10 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
|
||||
op_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t weights_cache */
|
||||
op); /* xnn_operator_t* convolution_op_out */
|
||||
} else { /* per_channel */
|
||||
return xnn_create_convolution2d_nhwc_qc8(
|
||||
return xnn_create_convolution2d_nhwc_qs8_qc8w(
|
||||
pad_top, /* uint32_t input_padding_top */
|
||||
pad_right, /* uint32_t input_padding_right */
|
||||
pad_bottom, /* uint32_t input_padding_bottom */
|
||||
@ -161,21 +163,20 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
|
||||
op_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t weights_cache */
|
||||
op); /* xnn_operator_t* convolution_op_out */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Series of setup wrapper functions to call xnn_setup_[de]conv* functions.
|
||||
* Series of reshape wrapper functions to call xnn_reshape_[de]conv* functions.
|
||||
*/
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_convolution2d_nhwc(
|
||||
enum xnn_status xnnp_reshape_convolution2d_nhwc(
|
||||
xnn_operator_t op,
|
||||
size_t batch,
|
||||
size_t in_h,
|
||||
size_t in_w,
|
||||
const int8_t* inp,
|
||||
int8_t* outp,
|
||||
pthreadpool_t pt_pool,
|
||||
bool per_channel = false,
|
||||
bool transpose = false,
|
||||
@ -183,36 +184,78 @@ enum xnn_status xnnp_setup_convolution2d_nhwc(
|
||||
uint32_t adj_w = 0) {
|
||||
if(transpose) {
|
||||
TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!");
|
||||
return xnn_setup_deconvolution2d_nhwc_qs8(
|
||||
return xnn_reshape_deconvolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
adj_h, /* uint32_t adjustment_height */
|
||||
adj_w, /* uint32_t adjustment_width */
|
||||
inp, /* const int8_t* input */
|
||||
outp, /* int8_t* output */
|
||||
nullptr, /* size_t* output_height_out */
|
||||
nullptr, /* size_t* output_width_out */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
size_t workspace_size = SIZE_MAX;
|
||||
size_t workspace_alignment = SIZE_MAX;
|
||||
|
||||
if (!per_channel) {
|
||||
return xnn_reshape_convolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
&workspace_size, /* size_t* workspace_size */
|
||||
&workspace_alignment, /* size_t* workspace_alignment */
|
||||
nullptr, /* size_t* output_height_out */
|
||||
nullptr, /* size_t* output_width_out */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
} else { /* per_channel */
|
||||
return xnn_reshape_convolution2d_nhwc_qs8_qc8w(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
&workspace_size, /* size_t* workspace_size */
|
||||
&workspace_alignment, /* size_t* workspace_alignment */
|
||||
nullptr, /* size_t* output_height_out */
|
||||
nullptr, /* size_t* output_width_out */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Series of setup wrapper functions to call xnn_setup_[de]conv* functions.
|
||||
*/
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_convolution2d_nhwc(
|
||||
xnn_operator_t op,
|
||||
const int8_t* inp,
|
||||
int8_t* outp,
|
||||
bool per_channel = false,
|
||||
bool transpose = false) {
|
||||
if(transpose) {
|
||||
TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!");
|
||||
|
||||
return xnn_setup_deconvolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
inp, /* const int8_t* input */
|
||||
outp); /* int8_t* output */
|
||||
}
|
||||
|
||||
if (!per_channel) {
|
||||
return xnn_setup_convolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
inp, /* const int8_t* input */
|
||||
outp, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
nullptr, /* void workspace */
|
||||
inp, /* const int8_t* input */
|
||||
outp); /* int8_t* output */
|
||||
} else { /* per_channel */
|
||||
return xnn_setup_convolution2d_nhwc_qc8(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
inp, /* const int8_t* input */
|
||||
outp, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
return xnn_setup_convolution2d_nhwc_qs8_qc8w(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
nullptr, /* void workspace */
|
||||
inp, /* const int8_t* input */
|
||||
outp); /* int8_t* output */
|
||||
}
|
||||
}
|
||||
|
||||
@ -258,22 +301,31 @@ enum xnn_status xnnp_create_fully_connected_nc(
|
||||
output_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t */
|
||||
fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_reshape_fully_connected_nc(
|
||||
xnn_operator_t fully_connected_op,
|
||||
size_t batch_size,
|
||||
pthreadpool_t threadpool) {
|
||||
return xnn_reshape_fully_connected_nc_qs8(
|
||||
fully_connected_op, /* xnn_operator_t fully_connected_op */
|
||||
batch_size, /* size_t batch_size */
|
||||
threadpool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_fully_connected_nc(
|
||||
xnn_operator_t fully_connected_op,
|
||||
size_t batch_size,
|
||||
const int8_t* input,
|
||||
int8_t* output,
|
||||
pthreadpool_t threadpool) {
|
||||
int8_t* output) {
|
||||
return xnn_setup_fully_connected_nc_qs8(
|
||||
fully_connected_op, /* xnn_operator_t fully_connected_op */
|
||||
batch_size, /* size_t batch_size */
|
||||
input, /* const int8_t* input */
|
||||
output, /* int8_t* output */
|
||||
threadpool); /* pthreadpool_t threadpool */
|
||||
output /* int8_t* output */
|
||||
);
|
||||
}
|
||||
|
||||
} // namespace xnnp_utils
|
||||
|
@ -770,14 +770,12 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl_xnnp(
|
||||
output_zero_point,
|
||||
c10::MemoryFormat::ChannelsLast);
|
||||
|
||||
// Setup the operator
|
||||
status = at::native::xnnp_utils::xnnp_setup_convolution2d_nhwc(
|
||||
// Reshape the operator
|
||||
status = at::native::xnnp_utils::xnnp_reshape_convolution2d_nhwc(
|
||||
xnnp_convolution_op.get(),
|
||||
N,
|
||||
H,
|
||||
W,
|
||||
reinterpret_cast<const underlying_t*>(act_nhwc.template data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
|
||||
caffe2::pthreadpool_(),
|
||||
per_channel(),
|
||||
transpose(),
|
||||
@ -791,6 +789,21 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl_xnnp(
|
||||
status,
|
||||
")");
|
||||
|
||||
// Setup the operator
|
||||
status = at::native::xnnp_utils::xnnp_setup_convolution2d_nhwc(
|
||||
xnnp_convolution_op.get(),
|
||||
reinterpret_cast<const underlying_t*>(act_nhwc.template data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
|
||||
per_channel(),
|
||||
transpose());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
func_name,
|
||||
": xnn setup operator failed(",
|
||||
status,
|
||||
")");
|
||||
|
||||
// Run the operator
|
||||
status = xnn_run_operator(
|
||||
xnnp_convolution_op.get(), /* xnn_operator_t op */
|
||||
|
@ -565,14 +565,19 @@ at::Tensor PackedLinearWeightsQnnp::apply_impl_xnnp(
|
||||
rows_input *= input_contig.size(i);
|
||||
}
|
||||
|
||||
// Reshape the operator
|
||||
status = at::native::xnnp_utils::xnnp_reshape_fully_connected_nc(
|
||||
xnnp_linear_op.get(),
|
||||
rows_input, /* batch_size */
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
// Setup the operator
|
||||
status = at::native::xnnp_utils::xnnp_setup_fully_connected_nc(
|
||||
xnnp_linear_op.get(),
|
||||
rows_input, /* batch_size */
|
||||
reinterpret_cast<const underlying_t*>(
|
||||
input_contig.template data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
|
||||
caffe2::pthreadpool_());
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>())
|
||||
);
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
|
@ -139,17 +139,29 @@ Tensor _mul_out_xnnpack(
|
||||
const auto self_shape = xnnp_utils::get_mem_format_aware_shape(self_contig);
|
||||
const auto other_shape = xnnp_utils::get_mem_format_aware_shape(other_contig);
|
||||
|
||||
// set up operator
|
||||
status = xnn_setup_multiply_nd_qs8(
|
||||
// reshape operator
|
||||
status = xnn_reshape_multiply_nd_qs8(
|
||||
xnnp_qmul_operator.get(),
|
||||
self_shape.size(),
|
||||
self_shape.data(),
|
||||
other_shape.size(),
|
||||
other_shape.data(),
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
func_name,
|
||||
": xnn reshape operator failed(",
|
||||
status,
|
||||
")!");
|
||||
|
||||
// set up operator
|
||||
status = xnn_setup_multiply_nd_qs8(
|
||||
xnnp_qmul_operator.get(),
|
||||
reinterpret_cast<const underlying_t*>(self_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<const underlying_t*>(other_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>()),
|
||||
caffe2::pthreadpool_());
|
||||
reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>())
|
||||
);
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
|
@ -34,13 +34,20 @@ static Tensor& hardswish_impl(Tensor& input, Tensor& output) {
|
||||
|
||||
Operator hardswish_scoped_op(hardswish_op);
|
||||
|
||||
const xnn_status setup_status = xnn_setup_hardswish_nc_f32(
|
||||
const xnn_status reshape_status = xnn_reshape_hardswish_nc_f32(
|
||||
hardswish_op,
|
||||
input.numel(), // Batch
|
||||
input.data_ptr<float>(),
|
||||
output.data_ptr<float>(),
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_hardswish_nc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_hardswish_nc_f32(
|
||||
hardswish_op,
|
||||
input.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_hardswish_nc_f32 failed!");
|
||||
|
@ -7,18 +7,13 @@
|
||||
|
||||
namespace at::native::xnnpack {
|
||||
|
||||
bool use_global_average_pool(
|
||||
const Tensor& input) {
|
||||
return xnnpack::available() &&
|
||||
(1 <= input.ndimension()) &&
|
||||
(input.device().is_cpu()) &&
|
||||
(kFloat == input.scalar_type()) &&
|
||||
!input.requires_grad() &&
|
||||
true;
|
||||
bool use_global_average_pool(const Tensor& input) {
|
||||
return xnnpack::available() && (1 <= input.ndimension()) &&
|
||||
(input.device().is_cpu()) && (kFloat == input.scalar_type()) &&
|
||||
!input.requires_grad() && true;
|
||||
}
|
||||
|
||||
Tensor global_average_pool(
|
||||
const Tensor& input) {
|
||||
Tensor global_average_pool(const Tensor& input) {
|
||||
using namespace internal;
|
||||
|
||||
const Tensor input_padded_contig_nhwc =
|
||||
@ -27,10 +22,10 @@ Tensor global_average_pool(
|
||||
|
||||
Tensor output = mobile::empty_with_tail_padding(
|
||||
{
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch),
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels),
|
||||
1,
|
||||
1,
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch),
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels),
|
||||
1,
|
||||
1,
|
||||
},
|
||||
input_padded_contig_nhwc.options().dtype(),
|
||||
MemoryFormat::ChannelsLast,
|
||||
@ -38,42 +33,61 @@ Tensor global_average_pool(
|
||||
|
||||
xnn_operator_t global_average_pooling_op{};
|
||||
const xnn_status create_status = xnn_create_global_average_pooling_nwc_f32(
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // input stride
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // output stride
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
0 /* flags */,
|
||||
&global_average_pooling_op);
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // input stride
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // output stride
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
0 /* flags */,
|
||||
&global_average_pooling_op);
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == create_status,
|
||||
"xnn_create_global_average_pooling_nwc_f32 failed!");
|
||||
xnn_status_success == create_status,
|
||||
"xnn_create_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
Operator global_avg_pool_scoped_op(global_average_pooling_op);
|
||||
|
||||
const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
|
||||
size_t workspace_size = 0;
|
||||
size_t workspace_alignment = 0;
|
||||
|
||||
const xnn_status reshape_status = xnn_reshape_global_average_pooling_nwc_f32(
|
||||
global_average_pooling_op,
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::width) *
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::height), // width
|
||||
input_padded_contig_nhwc.data_ptr<float>(),
|
||||
output.data_ptr<float>(),
|
||||
&workspace_size, // workspace_size
|
||||
&workspace_alignment, // workspace_alignment
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
const xnn_status run_status = xnn_run_operator(
|
||||
global_average_pooling_op,
|
||||
caffe2::pthreadpool_());
|
||||
// Create Workspace pointer, which we will align and pad with 16 bytes
|
||||
size_t xnnpack_buffer_padding = 16;
|
||||
std::vector<char> workspace_vector(workspace_size + workspace_alignment + xnnpack_buffer_padding);
|
||||
void* maybe_aligned_workspace = workspace_vector.data();
|
||||
void* aligned_workspace =
|
||||
(void*)((intptr_t)maybe_aligned_workspace + workspace_alignment - (intptr_t)maybe_aligned_workspace % workspace_alignment);
|
||||
|
||||
const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
|
||||
global_average_pooling_op,
|
||||
aligned_workspace,
|
||||
input_padded_contig_nhwc.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == run_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
const xnn_status run_status =
|
||||
xnn_run_operator(global_average_pooling_op, caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == run_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
return output.to(input.suggest_memory_format());
|
||||
}
|
||||
|
@ -79,13 +79,20 @@ Tensor channel_shuffle(
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::height) *
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::width);
|
||||
|
||||
const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32(
|
||||
const xnn_status reshape_status = xnn_reshape_channel_shuffle_nc_x32(
|
||||
channel_shuffle_op, // operator
|
||||
batch_size, // batch_size
|
||||
input_padded_contig_nhwc.data_ptr<float>(), // input
|
||||
output_padded_contig_nhwc.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_channel_shuffle_nc_x32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32(
|
||||
channel_shuffle_op, // operator
|
||||
input_padded_contig_nhwc.data_ptr<float>(), // input
|
||||
output_padded_contig_nhwc.data_ptr<float>()); // output
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_channel_shuffle_nc_x32 failed!");
|
||||
|
@ -236,6 +236,7 @@ ContextConv2D create(
|
||||
output_max, // output_max
|
||||
0u, // flags
|
||||
nullptr, // xnn_caches_t
|
||||
nullptr, // xnn_weights_cache_t
|
||||
&convolution_op); // operator
|
||||
} else {
|
||||
for (const auto i : c10::irange(4)) {
|
||||
@ -265,6 +266,7 @@ ContextConv2D create(
|
||||
output_max, // output_max
|
||||
0u, // flags
|
||||
nullptr, // xnn_caches_t
|
||||
nullptr, // xnn_weights_cache_t
|
||||
&convolution_op); // operator
|
||||
}
|
||||
|
||||
@ -338,26 +340,41 @@ Tensor run(
|
||||
*/
|
||||
|
||||
if (context.transposed_) {
|
||||
setup_status = xnn_setup_deconvolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
setup_status = xnn_reshape_deconvolution2d_nhwc_f32(
|
||||
context.op.get(),
|
||||
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
padded_input_nhwc.size(Layout::Activation4D::height), // input_height
|
||||
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
context.output_padding_[0], // adjustment_height
|
||||
context.output_padding_[1], // adjustment_width
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>(), // output
|
||||
nullptr, // output_height_out
|
||||
nullptr, // output_width_out
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
} else {
|
||||
setup_status = xnn_setup_convolution2d_nhwc_f32(
|
||||
setup_status = xnn_setup_deconvolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>()); // output
|
||||
} else {
|
||||
size_t workspace_size = SIZE_MAX;
|
||||
size_t workspace_alignment = SIZE_MAX;
|
||||
|
||||
setup_status = xnn_reshape_convolution2d_nhwc_f32(
|
||||
context.op.get(),
|
||||
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
padded_input_nhwc.size(Layout::Activation4D::height), // input_height
|
||||
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>(), // output
|
||||
&workspace_size, // workspace_size
|
||||
&workspace_alignment, // workspace_alignment
|
||||
nullptr, // output_height_out
|
||||
nullptr, // output_width_out
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
setup_status = xnn_setup_convolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
nullptr, // workspace
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>()); // output
|
||||
}
|
||||
|
||||
TORCH_CHECK(
|
||||
|
@ -95,6 +95,7 @@ ContextLinear create(
|
||||
output_max, // output_max
|
||||
0u, // flags
|
||||
nullptr, // xnn_caches_t
|
||||
nullptr, // xnn_weights_cache_t
|
||||
&linear_op); // operator
|
||||
|
||||
TORCH_CHECK(
|
||||
@ -136,13 +137,20 @@ Tensor run(
|
||||
padded_input.suggest_memory_format(),
|
||||
padded_input.opt_names());
|
||||
|
||||
const xnn_status setup_status = xnn_setup_fully_connected_nc_f32(
|
||||
const xnn_status reshape_status = xnn_reshape_fully_connected_nc_f32(
|
||||
context.op.get(), // operator
|
||||
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
|
||||
padded_input.data_ptr<float>(), // input
|
||||
output.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_fully_connected_nc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_fully_connected_nc_f32(
|
||||
context.op.get(), // operator
|
||||
padded_input.data_ptr<float>(), // input
|
||||
output.data_ptr<float>()); // output
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_fully_connected_nc_f32 failed!");
|
||||
|
@ -214,15 +214,24 @@ Tensor max_pool2d(
|
||||
xnn_status_success == create_status,
|
||||
"xnn_create_max_pooling2d_nhwc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
|
||||
const xnn_status reshape_status = xnn_reshape_max_pooling2d_nhwc_f32(
|
||||
max_pool_op, // operator
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::height), // input_height
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
input_padded_contig_nhwc.data_ptr<float>(), // input
|
||||
output_padded_contig_nhwc.data_ptr<float>(), // output
|
||||
nullptr, // output_height_out
|
||||
nullptr, // output_width_out
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_max_pooling2d_nhwc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
|
||||
max_pool_op, // operator
|
||||
input_padded_contig_nhwc.data_ptr<float>(), // input
|
||||
output_padded_contig_nhwc.data_ptr<float>()); // output
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_max_pooling2d_nhwc_f32 failed!");
|
||||
|
@ -619,7 +619,13 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
|
||||
# Disable ARM BF16 and FP16 vector for now; unused and causes build failures because
|
||||
# these new ISA features may not be supported on older compilers
|
||||
set(XNNPACK_ENABLE_ARM_BF16 OFF CACHE BOOL "")
|
||||
set(XNNPACK_ENABLE_ARM_FP16_VECTOR OFF CACHE BOOL "")
|
||||
|
||||
# Disable AVXVNNI for now, older clang versions seem not to support it
|
||||
# (clang 12 is where avx-vnni support is added)
|
||||
set(XNNPACK_ENABLE_AVXVNNI OFF CACHE BOOL "")
|
||||
|
||||
# Disable I8MM For CI since clang 9 does not support neon i8mm.
|
||||
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
|
||||
|
||||
# Setting this global PIC flag for all XNNPACK targets.
|
||||
# This is needed for Object libraries within XNNPACK which must
|
||||
|
1
third_party/BUCK.oss
vendored
1
third_party/BUCK.oss
vendored
@ -127,6 +127,7 @@ cxx_library(
|
||||
"cpuinfo/wrappers/linux/multiline.c",
|
||||
"cpuinfo/wrappers/linux/processors.c",
|
||||
"cpuinfo/wrappers/linux/smallfile.c",
|
||||
"cpuinfo/wrappers/log.c",
|
||||
"cpuinfo/wrappers/mach/topology.c",
|
||||
"cpuinfo/wrappers/x86/cache/descriptor.c",
|
||||
"cpuinfo/wrappers/x86/cache/deterministic.c",
|
||||
|
2
third_party/XNNPACK
vendored
2
third_party/XNNPACK
vendored
Submodule third_party/XNNPACK updated: 51a987591a...d9cce341f8
2
third_party/cpuinfo
vendored
2
third_party/cpuinfo
vendored
Submodule third_party/cpuinfo updated: 6481e8bef0...d6860c477c
1
third_party/generate-cpuinfo-wrappers.py
vendored
1
third_party/generate-cpuinfo-wrappers.py
vendored
@ -9,6 +9,7 @@ CPUINFO_SOURCES = {
|
||||
"init.c",
|
||||
"api.c",
|
||||
"cache.c",
|
||||
"log.c",
|
||||
],
|
||||
"defined(__linux__)": [
|
||||
"linux/multiline.c",
|
||||
|
135
third_party/generate-xnnpack-wrappers.py
vendored
135
third_party/generate-xnnpack-wrappers.py
vendored
@ -8,16 +8,22 @@ import logging
|
||||
|
||||
BANNER = "Auto-generated by generate-wrappers.py script. Do not modify"
|
||||
WRAPPER_SRC_NAMES = {
|
||||
"PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS": None,
|
||||
"PROD_SCALAR_AARCH32_MICROKERNEL_SRCS" : "defined(__arm__)",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS": None,
|
||||
"PROD_FMA_MICROKERNEL_SRCS": "defined(__riscv) || defined(__riscv__)",
|
||||
"PROD_ARMSIMD32_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"PROD_NEON_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFP16_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFMA_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_AARCH64_NEON_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONV8_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_AARCH64_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONDOT_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONI8MM_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_SSE_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_SSE2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_SSSE3_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
@ -30,42 +36,13 @@ WRAPPER_SRC_NAMES = {
|
||||
"PROD_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_AVX512SKX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_AVX512VBMI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_AVX512VNNI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_RVV_MICROKERNEL_SRCS": "defined(__riscv) || defined(__riscv__)",
|
||||
"PROD_AVXVNNI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"AARCH32_ASM_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"AARCH64_ASM_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
|
||||
# add additoonal:
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"ALL_ARMSIMD32_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"ALL_AVX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"ALL_AVX2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"ALL_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
|
||||
'ALL_AVX512SKX_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_AVX512VBMI_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_F16C_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_FMA3_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_FP16ARITH_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEON_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEON_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_NEONBF16_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONDOT_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFMA_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_NEONFP16_MICROKERNEL_SRCS':"defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFP16ARITH_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_NEONV8_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_SCALAR_MICROKERNEL_SRCS': "defined(__arm__)",
|
||||
'ALL_SSE_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_SSE2_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_SSE41_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_SSSE3_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_XOP_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'AARCH32_ASM_MICROKERNEL_SRCS': "defined(__arm__)",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
|
||||
# add non-prod microkernel sources here:
|
||||
}
|
||||
|
||||
SRC_NAMES = set([
|
||||
@ -73,12 +50,24 @@ SRC_NAMES = set([
|
||||
"SUBGRAPH_SRCS",
|
||||
"LOGGING_SRCS",
|
||||
"XNNPACK_SRCS",
|
||||
"HOT_SRCS",
|
||||
"TABLE_SRCS",
|
||||
"JIT_SRCS",
|
||||
"JIT_AARCH32_SRCS",
|
||||
"JIT_AARCH64_SRCS",
|
||||
"PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS",
|
||||
"PROD_FMA_MICROKERNEL_SRCS",
|
||||
"PROD_ARMSIMD32_MICROKERNEL_SRCS",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFMA_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONV8_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOT_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONI8MM_MICROKERNEL_SRCS",
|
||||
"PROD_SSE_MICROKERNEL_SRCS",
|
||||
"PROD_SSE2_MICROKERNEL_SRCS",
|
||||
"PROD_SSSE3_MICROKERNEL_SRCS",
|
||||
@ -90,59 +79,14 @@ SRC_NAMES = set([
|
||||
"PROD_AVX2_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512F_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512SKX_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_AARCH32_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_RISCV_MICROKERNEL_SRCS",
|
||||
"PROD_ARMSIMD32_MICROKERNEL_SRCS",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFMA_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONV8_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOT_MICROKERNEL_SRCS",
|
||||
"PROD_SSE2_MICROKERNEL_SRCS",
|
||||
"PROD_SSSE3_MICROKERNEL_SRCS",
|
||||
"PROD_SSE41_MICROKERNEL_SRCS",
|
||||
"PROD_AVX_MICROKERNEL_SRCS",
|
||||
"PROD_F16C_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512VBMI_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512VNNI_MICROKERNEL_SRCS",
|
||||
"PROD_RVV_MICROKERNEL_SRCS",
|
||||
"PROD_AVXVNNI_MICROKERNEL_SRCS",
|
||||
"AARCH32_ASM_MICROKERNEL_SRCS",
|
||||
"AARCH64_ASM_MICROKERNEL_SRCS",
|
||||
|
||||
# new adding libs:
|
||||
'ALL_ARMSIMD32_MICROKERNEL_SRCS',
|
||||
'ALL_AVX_MICROKERNEL_SRCS',
|
||||
'ALL_AVX2_MICROKERNEL_SRCS',
|
||||
'ALL_AVX512F_MICROKERNEL_SRCS',
|
||||
'ALL_AVX512SKX_MICROKERNEL_SRCS',
|
||||
'ALL_AVX512VBMI_MICROKERNEL_SRCS',
|
||||
'ALL_F16C_MICROKERNEL_SRCS',
|
||||
'ALL_FMA3_MICROKERNEL_SRCS',
|
||||
'ALL_FP16ARITH_MICROKERNEL_SRCS',
|
||||
'ALL_HEXAGON_MICROKERNEL_SRCS',
|
||||
'ALL_NEON_MICROKERNEL_SRCS',
|
||||
'ALL_NEON_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONBF16_MICROKERNEL_SRCS',
|
||||
'ALL_NEONBF16_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONDOT_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFMA_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFP16_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFP16ARITH_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONV8_MICROKERNEL_SRCS',
|
||||
'ALL_SCALAR_MICROKERNEL_SRCS',
|
||||
'ALL_SSE_MICROKERNEL_SRCS',
|
||||
'ALL_SSE2_MICROKERNEL_SRCS',
|
||||
'ALL_SSE41_MICROKERNEL_SRCS',
|
||||
'ALL_SSSE3_MICROKERNEL_SRCS',
|
||||
'ALL_WASM_MICROKERNEL_SRCS',
|
||||
'ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS',
|
||||
'ALL_WASMSIMD_MICROKERNEL_SRCS',
|
||||
'ALL_XOP_MICROKERNEL_SRCS',
|
||||
'AARCH32_ASM_MICROKERNEL_SRCS',
|
||||
'AARCH64_ASM_MICROKERNEL_SRCS',
|
||||
# add non-prod microkernel sources here:
|
||||
])
|
||||
|
||||
def handle_singleline_parse(line):
|
||||
@ -150,11 +94,10 @@ def handle_singleline_parse(line):
|
||||
end_index = line.find(")")
|
||||
line = line[start_index+1:end_index]
|
||||
key_val = line.split(" ")
|
||||
return key_val[0], key_val[1][4:]
|
||||
return key_val[0], list(map(lambda x: x[4:], key_val[1:]))
|
||||
|
||||
def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
|
||||
sources = collections.defaultdict(list)
|
||||
count = 0
|
||||
with open(os.path.join(xnnpack_path, cmakefile)) as cmake:
|
||||
lines = cmake.readlines()
|
||||
i = 0
|
||||
@ -163,7 +106,7 @@ def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
|
||||
|
||||
if lines[i].startswith("SET") and "src/" in lines[i]:
|
||||
name, val = handle_singleline_parse(line)
|
||||
sources[name].append(val)
|
||||
sources[name].extend(val)
|
||||
i+=1
|
||||
continue
|
||||
|
||||
|
892
third_party/xnnpack.buck.bzl
vendored
892
third_party/xnnpack.buck.bzl
vendored
File diff suppressed because it is too large
Load Diff
8003
third_party/xnnpack_src_defs.bzl
vendored
8003
third_party/xnnpack_src_defs.bzl
vendored
File diff suppressed because it is too large
Load Diff
6104
third_party/xnnpack_wrapper_defs.bzl
vendored
6104
third_party/xnnpack_wrapper_defs.bzl
vendored
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user