Revert "[8/n] Update XNNPACK Version Part 8 Everything Remaining to get it to work (#115587)"

This reverts commit a8dc9d8e353ddcf7db0247349a3acd0dd37fcc6f.

Reverted https://github.com/pytorch/pytorch/pull/115587 on behalf of https://github.com/facebook-github-bot due to Diff reverted internally ([comment](https://github.com/pytorch/pytorch/pull/115587#issuecomment-1852835898))
This commit is contained in:
PyTorch MergeBot
2023-12-12 21:28:09 +00:00
parent ac4f6beb00
commit c3ed9f65a0
20 changed files with 13589 additions and 1914 deletions

View File

@ -259,32 +259,24 @@ enum xnn_status xnnp_create_add_nd(
} }
C10_ALWAYS_INLINE C10_ALWAYS_INLINE
enum xnn_status xnnp_reshape_add_nd( enum xnn_status xnnp_setup_add_nd(
xnn_operator_t op, xnn_operator_t op,
const std::vector<size_t>& a_shape, const std::vector<size_t>& a_shape,
const std::vector<size_t>& b_shape, const std::vector<size_t>& b_shape,
pthreadpool_t pt_pool) {
return xnn_reshape_add_nd_qs8(
op, /* xnn_operator_t add_op */
a_shape.size(), /* size_t num_input1_dims */
a_shape.data(), /* const size_t* input1_shape */
b_shape.size(), /* size_t num_input2_dims */
b_shape.data(), /* const size_t* input2_shape */
pt_pool); /* pthreadpool_t threadpool */
}
C10_ALWAYS_INLINE
enum xnn_status xnnp_setup_add_nd(
xnn_operator_t op,
const int8_t* da, const int8_t* da,
const int8_t* db, const int8_t* db,
int8_t* dc, int8_t* dc,
pthreadpool_t pt_pool) { pthreadpool_t pt_pool) {
return xnn_setup_add_nd_qs8( return xnn_setup_add_nd_qs8(
op, /* xnn_operator_t add_op */ op, /* xnn_operator_t add_op */
a_shape.size(), /* size_t num_input1_dims */
a_shape.data(), /* const size_t* input1_shape */
b_shape.size(), /* size_t num_input2_dims */
b_shape.data(), /* const size_t* input2_shape */
da, /* const int8_t* input1 */ da, /* const int8_t* input1 */
db, /* const int8_t* input2 */ db, /* const int8_t* input2 */
dc); /* int8_t* output */ dc, /* int8_t* output */
pt_pool); /* pthreadpool_t threadpool */
} }
template <typename scalar_t, bool ReLUFused = false> template <typename scalar_t, bool ReLUFused = false>
@ -356,20 +348,11 @@ Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
const auto qa_shape = xnnp_utils::get_mem_format_aware_shape(qa_contig); const auto qa_shape = xnnp_utils::get_mem_format_aware_shape(qa_contig);
const auto qb_shape = xnnp_utils::get_mem_format_aware_shape(qb_contig); const auto qb_shape = xnnp_utils::get_mem_format_aware_shape(qb_contig);
// Reshape the operator
status = xnnp_reshape_add_nd(
xnnp_add_operator.get(),
qa_shape,
qb_shape,
caffe2::pthreadpool_());
TORCH_CHECK(
status == xnn_status_success,
func_name, ": xnn reshape operator failed(", status,")!");
// Setup the operator // Setup the operator
status = xnnp_setup_add_nd( status = xnnp_setup_add_nd(
xnnp_add_operator.get(), xnnp_add_operator.get(),
qa_shape,
qb_shape,
reinterpret_cast<const underlying_t*>(qa_contig.data_ptr<scalar_t>()), reinterpret_cast<const underlying_t*>(qa_contig.data_ptr<scalar_t>()),
reinterpret_cast<const underlying_t*>(qb_contig.data_ptr<scalar_t>()), reinterpret_cast<const underlying_t*>(qb_contig.data_ptr<scalar_t>()),
reinterpret_cast<underlying_t*>(qy.data_ptr<scalar_t>()), reinterpret_cast<underlying_t*>(qy.data_ptr<scalar_t>()),

View File

@ -100,7 +100,6 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
op_max, /* int8_t output_max */ op_max, /* int8_t output_max */
flags, /* uint32_t flags */ flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */ nullptr, /* xnn_caches_t caches */
nullptr, /* xnn_weights_cache_t weights_cache */
op); /* xnn_operator_t* deconvolution_op_out */ op); /* xnn_operator_t* deconvolution_op_out */
} }
@ -133,10 +132,9 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
op_max, /* int8_t output_max */ op_max, /* int8_t output_max */
flags, /* uint32_t flags */ flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */ nullptr, /* xnn_caches_t caches */
nullptr, /* xnn_weights_cache_t weights_cache */
op); /* xnn_operator_t* convolution_op_out */ op); /* xnn_operator_t* convolution_op_out */
} else { /* per_channel */ } else { /* per_channel */
return xnn_create_convolution2d_nhwc_qs8_qc8w( return xnn_create_convolution2d_nhwc_qc8(
pad_top, /* uint32_t input_padding_top */ pad_top, /* uint32_t input_padding_top */
pad_right, /* uint32_t input_padding_right */ pad_right, /* uint32_t input_padding_right */
pad_bottom, /* uint32_t input_padding_bottom */ pad_bottom, /* uint32_t input_padding_bottom */
@ -163,99 +161,58 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
op_max, /* int8_t output_max */ op_max, /* int8_t output_max */
flags, /* uint32_t flags */ flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */ nullptr, /* xnn_caches_t caches */
nullptr, /* xnn_weights_cache_t weights_cache */
op); /* xnn_operator_t* convolution_op_out */ op); /* xnn_operator_t* convolution_op_out */
} }
} }
/*
* Series of reshape wrapper functions to call xnn_reshape_[de]conv* functions.
*/
C10_ALWAYS_INLINE
enum xnn_status xnnp_reshape_convolution2d_nhwc(
xnn_operator_t op,
size_t batch,
size_t in_h,
size_t in_w,
pthreadpool_t pt_pool,
bool per_channel = false,
bool transpose = false,
uint32_t adj_h = 0,
uint32_t adj_w = 0) {
if(transpose) {
TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!");
return xnn_reshape_deconvolution2d_nhwc_qs8(
op, /* xnn_operator_t deconvolution_op */
batch, /* size_t batch_size */
in_h, /* size_t input_height */
in_w, /* size_t input_width */
adj_h, /* uint32_t adjustment_height */
adj_w, /* uint32_t adjustment_width */
nullptr, /* size_t* output_height_out */
nullptr, /* size_t* output_width_out */
pt_pool); /* pthreadpool_t threadpool */
}
size_t workspace_size = SIZE_MAX;
size_t workspace_alignment = SIZE_MAX;
if (!per_channel) {
return xnn_reshape_convolution2d_nhwc_qs8(
op, /* xnn_operator_t convolution_op */
batch, /* size_t batch_size */
in_h, /* size_t input_height */
in_w, /* size_t input_width */
&workspace_size, /* size_t* workspace_size */
&workspace_alignment, /* size_t* workspace_alignment */
nullptr, /* size_t* output_height_out */
nullptr, /* size_t* output_width_out */
pt_pool); /* pthreadpool_t threadpool */
} else { /* per_channel */
return xnn_reshape_convolution2d_nhwc_qs8_qc8w(
op, /* xnn_operator_t convolution_op */
batch, /* size_t batch_size */
in_h, /* size_t input_height */
in_w, /* size_t input_width */
&workspace_size, /* size_t* workspace_size */
&workspace_alignment, /* size_t* workspace_alignment */
nullptr, /* size_t* output_height_out */
nullptr, /* size_t* output_width_out */
pt_pool); /* pthreadpool_t threadpool */
}
}
/* /*
* Series of setup wrapper functions to call xnn_setup_[de]conv* functions. * Series of setup wrapper functions to call xnn_setup_[de]conv* functions.
*/ */
C10_ALWAYS_INLINE C10_ALWAYS_INLINE
enum xnn_status xnnp_setup_convolution2d_nhwc( enum xnn_status xnnp_setup_convolution2d_nhwc(
xnn_operator_t op, xnn_operator_t op,
size_t batch,
size_t in_h,
size_t in_w,
const int8_t* inp, const int8_t* inp,
int8_t* outp, int8_t* outp,
pthreadpool_t pt_pool,
bool per_channel = false, bool per_channel = false,
bool transpose = false) { bool transpose = false,
uint32_t adj_h = 0,
uint32_t adj_w = 0) {
if(transpose) { if(transpose) {
TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!"); TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!");
return xnn_setup_deconvolution2d_nhwc_qs8( return xnn_setup_deconvolution2d_nhwc_qs8(
op, /* xnn_operator_t deconvolution_op */ op, /* xnn_operator_t deconvolution_op */
batch, /* size_t batch_size */
in_h, /* size_t input_height */
in_w, /* size_t input_width */
adj_h, /* uint32_t adjustment_height */
adj_w, /* uint32_t adjustment_width */
inp, /* const int8_t* input */ inp, /* const int8_t* input */
outp); /* int8_t* output */ outp, /* int8_t* output */
pt_pool); /* pthreadpool_t threadpool */
} }
if (!per_channel) { if (!per_channel) {
return xnn_setup_convolution2d_nhwc_qs8( return xnn_setup_convolution2d_nhwc_qs8(
op, /* xnn_operator_t deconvolution_op */ op, /* xnn_operator_t convolution_op */
nullptr, /* void workspace */ batch, /* size_t batch_size */
inp, /* const int8_t* input */ in_h, /* size_t input_height */
outp); /* int8_t* output */ in_w, /* size_t input_width */
inp, /* const int8_t* input */
outp, /* int8_t* output */
pt_pool); /* pthreadpool_t threadpool */
} else { /* per_channel */ } else { /* per_channel */
return xnn_setup_convolution2d_nhwc_qs8_qc8w( return xnn_setup_convolution2d_nhwc_qc8(
op, /* xnn_operator_t deconvolution_op */ op, /* xnn_operator_t convolution_op */
nullptr, /* void workspace */ batch, /* size_t batch_size */
inp, /* const int8_t* input */ in_h, /* size_t input_height */
outp); /* int8_t* output */ in_w, /* size_t input_width */
inp, /* const int8_t* input */
outp, /* int8_t* output */
pt_pool); /* pthreadpool_t threadpool */
} }
} }
@ -301,31 +258,22 @@ enum xnn_status xnnp_create_fully_connected_nc(
output_max, /* int8_t output_max */ output_max, /* int8_t output_max */
flags, /* uint32_t flags */ flags, /* uint32_t flags */
nullptr, /* xnn_caches_t caches */ nullptr, /* xnn_caches_t caches */
nullptr, /* xnn_weights_cache_t */
fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */ fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */
} }
C10_ALWAYS_INLINE
enum xnn_status xnnp_reshape_fully_connected_nc(
xnn_operator_t fully_connected_op,
size_t batch_size,
pthreadpool_t threadpool) {
return xnn_reshape_fully_connected_nc_qs8(
fully_connected_op, /* xnn_operator_t fully_connected_op */
batch_size, /* size_t batch_size */
threadpool); /* pthreadpool_t threadpool */
}
C10_ALWAYS_INLINE C10_ALWAYS_INLINE
enum xnn_status xnnp_setup_fully_connected_nc( enum xnn_status xnnp_setup_fully_connected_nc(
xnn_operator_t fully_connected_op, xnn_operator_t fully_connected_op,
size_t batch_size,
const int8_t* input, const int8_t* input,
int8_t* output) { int8_t* output,
pthreadpool_t threadpool) {
return xnn_setup_fully_connected_nc_qs8( return xnn_setup_fully_connected_nc_qs8(
fully_connected_op, /* xnn_operator_t fully_connected_op */ fully_connected_op, /* xnn_operator_t fully_connected_op */
batch_size, /* size_t batch_size */
input, /* const int8_t* input */ input, /* const int8_t* input */
output /* int8_t* output */ output, /* int8_t* output */
); threadpool); /* pthreadpool_t threadpool */
} }
} // namespace xnnp_utils } // namespace xnnp_utils

View File

@ -770,12 +770,14 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl_xnnp(
output_zero_point, output_zero_point,
c10::MemoryFormat::ChannelsLast); c10::MemoryFormat::ChannelsLast);
// Reshape the operator // Setup the operator
status = at::native::xnnp_utils::xnnp_reshape_convolution2d_nhwc( status = at::native::xnnp_utils::xnnp_setup_convolution2d_nhwc(
xnnp_convolution_op.get(), xnnp_convolution_op.get(),
N, N,
H, H,
W, W,
reinterpret_cast<const underlying_t*>(act_nhwc.template data_ptr<scalar_t>()),
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
caffe2::pthreadpool_(), caffe2::pthreadpool_(),
per_channel(), per_channel(),
transpose(), transpose(),
@ -789,21 +791,6 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl_xnnp(
status, status,
")"); ")");
// Setup the operator
status = at::native::xnnp_utils::xnnp_setup_convolution2d_nhwc(
xnnp_convolution_op.get(),
reinterpret_cast<const underlying_t*>(act_nhwc.template data_ptr<scalar_t>()),
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
per_channel(),
transpose());
TORCH_CHECK(
status == xnn_status_success,
func_name,
": xnn setup operator failed(",
status,
")");
// Run the operator // Run the operator
status = xnn_run_operator( status = xnn_run_operator(
xnnp_convolution_op.get(), /* xnn_operator_t op */ xnnp_convolution_op.get(), /* xnn_operator_t op */

View File

@ -565,19 +565,14 @@ at::Tensor PackedLinearWeightsQnnp::apply_impl_xnnp(
rows_input *= input_contig.size(i); rows_input *= input_contig.size(i);
} }
// Reshape the operator
status = at::native::xnnp_utils::xnnp_reshape_fully_connected_nc(
xnnp_linear_op.get(),
rows_input, /* batch_size */
caffe2::pthreadpool_());
// Setup the operator // Setup the operator
status = at::native::xnnp_utils::xnnp_setup_fully_connected_nc( status = at::native::xnnp_utils::xnnp_setup_fully_connected_nc(
xnnp_linear_op.get(), xnnp_linear_op.get(),
rows_input, /* batch_size */
reinterpret_cast<const underlying_t*>( reinterpret_cast<const underlying_t*>(
input_contig.template data_ptr<scalar_t>()), input_contig.template data_ptr<scalar_t>()),
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()) reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
); caffe2::pthreadpool_());
TORCH_CHECK( TORCH_CHECK(
status == xnn_status_success, status == xnn_status_success,

View File

@ -139,29 +139,17 @@ Tensor _mul_out_xnnpack(
const auto self_shape = xnnp_utils::get_mem_format_aware_shape(self_contig); const auto self_shape = xnnp_utils::get_mem_format_aware_shape(self_contig);
const auto other_shape = xnnp_utils::get_mem_format_aware_shape(other_contig); const auto other_shape = xnnp_utils::get_mem_format_aware_shape(other_contig);
// reshape operator // set up operator
status = xnn_reshape_multiply_nd_qs8( status = xnn_setup_multiply_nd_qs8(
xnnp_qmul_operator.get(), xnnp_qmul_operator.get(),
self_shape.size(), self_shape.size(),
self_shape.data(), self_shape.data(),
other_shape.size(), other_shape.size(),
other_shape.data(), other_shape.data(),
caffe2::pthreadpool_());
TORCH_CHECK(
status == xnn_status_success,
func_name,
": xnn reshape operator failed(",
status,
")!");
// set up operator
status = xnn_setup_multiply_nd_qs8(
xnnp_qmul_operator.get(),
reinterpret_cast<const underlying_t*>(self_contig.data_ptr<scalar_t>()), reinterpret_cast<const underlying_t*>(self_contig.data_ptr<scalar_t>()),
reinterpret_cast<const underlying_t*>(other_contig.data_ptr<scalar_t>()), reinterpret_cast<const underlying_t*>(other_contig.data_ptr<scalar_t>()),
reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>()) reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>()),
); caffe2::pthreadpool_());
TORCH_CHECK( TORCH_CHECK(
status == xnn_status_success, status == xnn_status_success,

View File

@ -34,19 +34,12 @@ static Tensor& hardswish_impl(Tensor& input, Tensor& output) {
Operator hardswish_scoped_op(hardswish_op); Operator hardswish_scoped_op(hardswish_op);
const xnn_status reshape_status = xnn_reshape_hardswish_nc_f32(
hardswish_op,
input.numel(), // Batch
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK(
xnn_status_success == reshape_status,
"xnn_reshape_hardswish_nc_f32 failed!");
const xnn_status setup_status = xnn_setup_hardswish_nc_f32( const xnn_status setup_status = xnn_setup_hardswish_nc_f32(
hardswish_op, hardswish_op,
input.numel(), // Batch
input.data_ptr<float>(), input.data_ptr<float>(),
output.data_ptr<float>()); output.data_ptr<float>(),
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == setup_status, xnn_status_success == setup_status,

View File

@ -7,13 +7,18 @@
namespace at::native::xnnpack { namespace at::native::xnnpack {
bool use_global_average_pool(const Tensor& input) { bool use_global_average_pool(
return xnnpack::available() && (1 <= input.ndimension()) && const Tensor& input) {
(input.device().is_cpu()) && (kFloat == input.scalar_type()) && return xnnpack::available() &&
!input.requires_grad() && true; (1 <= input.ndimension()) &&
(input.device().is_cpu()) &&
(kFloat == input.scalar_type()) &&
!input.requires_grad() &&
true;
} }
Tensor global_average_pool(const Tensor& input) { Tensor global_average_pool(
const Tensor& input) {
using namespace internal; using namespace internal;
const Tensor input_padded_contig_nhwc = const Tensor input_padded_contig_nhwc =
@ -22,10 +27,10 @@ Tensor global_average_pool(const Tensor& input) {
Tensor output = mobile::empty_with_tail_padding( Tensor output = mobile::empty_with_tail_padding(
{ {
input_padded_contig_nhwc.size(Layout::Activation4D::batch), input_padded_contig_nhwc.size(Layout::Activation4D::batch),
input_padded_contig_nhwc.size(Layout::Activation4D::channels), input_padded_contig_nhwc.size(Layout::Activation4D::channels),
1, 1,
1, 1,
}, },
input_padded_contig_nhwc.options().dtype(), input_padded_contig_nhwc.options().dtype(),
MemoryFormat::ChannelsLast, MemoryFormat::ChannelsLast,
@ -33,61 +38,42 @@ Tensor global_average_pool(const Tensor& input) {
xnn_operator_t global_average_pooling_op{}; xnn_operator_t global_average_pooling_op{};
const xnn_status create_status = xnn_create_global_average_pooling_nwc_f32( const xnn_status create_status = xnn_create_global_average_pooling_nwc_f32(
input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
input_padded_contig_nhwc.size( input_padded_contig_nhwc.size(
Layout::Activation4D::channels), // input stride Layout::Activation4D::channels), // input stride
input_padded_contig_nhwc.size( input_padded_contig_nhwc.size(
Layout::Activation4D::channels), // output stride Layout::Activation4D::channels), // output stride
-std::numeric_limits<float>::infinity(), -std::numeric_limits<float>::infinity(),
std::numeric_limits<float>::infinity(), std::numeric_limits<float>::infinity(),
0 /* flags */, 0 /* flags */,
&global_average_pooling_op); &global_average_pooling_op);
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == create_status, xnn_status_success == create_status,
"xnn_create_global_average_pooling_nwc_f32 failed!"); "xnn_create_global_average_pooling_nwc_f32 failed!");
Operator global_avg_pool_scoped_op(global_average_pooling_op); Operator global_avg_pool_scoped_op(global_average_pooling_op);
size_t workspace_size = 0; const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
size_t workspace_alignment = 0;
const xnn_status reshape_status = xnn_reshape_global_average_pooling_nwc_f32(
global_average_pooling_op, global_average_pooling_op,
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
input_padded_contig_nhwc.size(Layout::Activation4D::width) * input_padded_contig_nhwc.size(Layout::Activation4D::width) *
input_padded_contig_nhwc.size(Layout::Activation4D::height), // width input_padded_contig_nhwc.size(Layout::Activation4D::height), // width
&workspace_size, // workspace_size input_padded_contig_nhwc.data_ptr<float>(),
&workspace_alignment, // workspace_alignment output.data_ptr<float>(),
caffe2::pthreadpool_()); caffe2::pthreadpool_());
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == reshape_status, xnn_status_success == setup_status,
"xnn_reshape_global_average_pooling_nwc_f32 failed!"); "xnn_setup_global_average_pooling_nwc_f32 failed!");
// Create Workspace pointer, which we will align and pad with 16 bytes const xnn_status run_status = xnn_run_operator(
size_t xnnpack_buffer_padding = 16; global_average_pooling_op,
std::vector<char> workspace_vector(workspace_size + workspace_alignment + xnnpack_buffer_padding); caffe2::pthreadpool_());
void* maybe_aligned_workspace = workspace_vector.data();
void* aligned_workspace =
(void*)((intptr_t)maybe_aligned_workspace + workspace_alignment - (intptr_t)maybe_aligned_workspace % workspace_alignment);
const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
global_average_pooling_op,
aligned_workspace,
input_padded_contig_nhwc.data_ptr<float>(),
output.data_ptr<float>());
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == setup_status, xnn_status_success == run_status,
"xnn_setup_global_average_pooling_nwc_f32 failed!"); "xnn_setup_global_average_pooling_nwc_f32 failed!");
const xnn_status run_status =
xnn_run_operator(global_average_pooling_op, caffe2::pthreadpool_());
TORCH_CHECK(
xnn_status_success == run_status,
"xnn_setup_global_average_pooling_nwc_f32 failed!");
return output.to(input.suggest_memory_format()); return output.to(input.suggest_memory_format());
} }

View File

@ -79,19 +79,12 @@ Tensor channel_shuffle(
input_padded_contig_nhwc.size(Layout::Activation4D::height) * input_padded_contig_nhwc.size(Layout::Activation4D::height) *
input_padded_contig_nhwc.size(Layout::Activation4D::width); input_padded_contig_nhwc.size(Layout::Activation4D::width);
const xnn_status reshape_status = xnn_reshape_channel_shuffle_nc_x32(
channel_shuffle_op, // operator
batch_size, // batch_size
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK(
xnn_status_success == reshape_status,
"xnn_reshape_channel_shuffle_nc_x32 failed!");
const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32( const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32(
channel_shuffle_op, // operator channel_shuffle_op, // operator
batch_size, // batch_size
input_padded_contig_nhwc.data_ptr<float>(), // input input_padded_contig_nhwc.data_ptr<float>(), // input
output_padded_contig_nhwc.data_ptr<float>()); // output output_padded_contig_nhwc.data_ptr<float>(), // output
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == setup_status, xnn_status_success == setup_status,

View File

@ -236,7 +236,6 @@ ContextConv2D create(
output_max, // output_max output_max, // output_max
0u, // flags 0u, // flags
nullptr, // xnn_caches_t nullptr, // xnn_caches_t
nullptr, // xnn_weights_cache_t
&convolution_op); // operator &convolution_op); // operator
} else { } else {
for (const auto i : c10::irange(4)) { for (const auto i : c10::irange(4)) {
@ -266,7 +265,6 @@ ContextConv2D create(
output_max, // output_max output_max, // output_max
0u, // flags 0u, // flags
nullptr, // xnn_caches_t nullptr, // xnn_caches_t
nullptr, // xnn_weights_cache_t
&convolution_op); // operator &convolution_op); // operator
} }
@ -340,41 +338,26 @@ Tensor run(
*/ */
if (context.transposed_) { if (context.transposed_) {
setup_status = xnn_reshape_deconvolution2d_nhwc_f32( setup_status = xnn_setup_deconvolution2d_nhwc_f32(
context.op.get(), context.op.get(), // operator
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
padded_input_nhwc.size(Layout::Activation4D::height), // input_height padded_input_nhwc.size(Layout::Activation4D::height), // input_height
padded_input_nhwc.size(Layout::Activation4D::width), // input_width padded_input_nhwc.size(Layout::Activation4D::width), // input_width
context.output_padding_[0], // adjustment_height context.output_padding_[0], // adjustment_height
context.output_padding_[1], // adjustment_width context.output_padding_[1], // adjustment_width
nullptr, // output_height_out padded_input_nhwc.data_ptr<float>(), // input
nullptr, // output_width_out output.data_ptr<float>(), // output
caffe2::pthreadpool_()); // threadpool caffe2::pthreadpool_()); // threadpool
setup_status = xnn_setup_deconvolution2d_nhwc_f32(
context.op.get(), // operator
padded_input_nhwc.data_ptr<float>(), // input
output.data_ptr<float>()); // output
} else { } else {
size_t workspace_size = SIZE_MAX; setup_status = xnn_setup_convolution2d_nhwc_f32(
size_t workspace_alignment = SIZE_MAX; context.op.get(), // operator
setup_status = xnn_reshape_convolution2d_nhwc_f32(
context.op.get(),
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
padded_input_nhwc.size(Layout::Activation4D::height), // input_height padded_input_nhwc.size(Layout::Activation4D::height), // input_height
padded_input_nhwc.size(Layout::Activation4D::width), // input_width padded_input_nhwc.size(Layout::Activation4D::width), // input_width
&workspace_size, // workspace_size
&workspace_alignment, // workspace_alignment
nullptr, // output_height_out
nullptr, // output_width_out
caffe2::pthreadpool_());
setup_status = xnn_setup_convolution2d_nhwc_f32(
context.op.get(), // operator
nullptr, // workspace
padded_input_nhwc.data_ptr<float>(), // input padded_input_nhwc.data_ptr<float>(), // input
output.data_ptr<float>()); // output output.data_ptr<float>(), // output
caffe2::pthreadpool_());
} }
TORCH_CHECK( TORCH_CHECK(

View File

@ -95,7 +95,6 @@ ContextLinear create(
output_max, // output_max output_max, // output_max
0u, // flags 0u, // flags
nullptr, // xnn_caches_t nullptr, // xnn_caches_t
nullptr, // xnn_weights_cache_t
&linear_op); // operator &linear_op); // operator
TORCH_CHECK( TORCH_CHECK(
@ -137,19 +136,12 @@ Tensor run(
padded_input.suggest_memory_format(), padded_input.suggest_memory_format(),
padded_input.opt_names()); padded_input.opt_names());
const xnn_status reshape_status = xnn_reshape_fully_connected_nc_f32(
context.op.get(), // operator
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK(
xnn_status_success == reshape_status,
"xnn_reshape_fully_connected_nc_f32 failed!");
const xnn_status setup_status = xnn_setup_fully_connected_nc_f32( const xnn_status setup_status = xnn_setup_fully_connected_nc_f32(
context.op.get(), // operator context.op.get(), // operator
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
padded_input.data_ptr<float>(), // input padded_input.data_ptr<float>(), // input
output.data_ptr<float>()); // output output.data_ptr<float>(), // output
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == setup_status, xnn_status_success == setup_status,

View File

@ -214,23 +214,14 @@ Tensor max_pool2d(
xnn_status_success == create_status, xnn_status_success == create_status,
"xnn_create_max_pooling2d_nhwc_f32 failed!"); "xnn_create_max_pooling2d_nhwc_f32 failed!");
const xnn_status reshape_status = xnn_reshape_max_pooling2d_nhwc_f32( const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
max_pool_op, // operator max_pool_op, // operator
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
input_padded_contig_nhwc.size(Layout::Activation4D::height), // input_height input_padded_contig_nhwc.size(Layout::Activation4D::height), // input_height
input_padded_contig_nhwc.size(Layout::Activation4D::width), // input_width input_padded_contig_nhwc.size(Layout::Activation4D::width), // input_width
nullptr, // output_height_out
nullptr, // output_width_out
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK(
xnn_status_success == reshape_status,
"xnn_reshape_max_pooling2d_nhwc_f32 failed!");
const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
max_pool_op, // operator
input_padded_contig_nhwc.data_ptr<float>(), // input input_padded_contig_nhwc.data_ptr<float>(), // input
output_padded_contig_nhwc.data_ptr<float>()); // output output_padded_contig_nhwc.data_ptr<float>(), // output
caffe2::pthreadpool_()); // threadpool
TORCH_CHECK( TORCH_CHECK(
xnn_status_success == setup_status, xnn_status_success == setup_status,

View File

@ -619,13 +619,7 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
# Disable ARM BF16 and FP16 vector for now; unused and causes build failures because # Disable ARM BF16 and FP16 vector for now; unused and causes build failures because
# these new ISA features may not be supported on older compilers # these new ISA features may not be supported on older compilers
set(XNNPACK_ENABLE_ARM_BF16 OFF CACHE BOOL "") set(XNNPACK_ENABLE_ARM_BF16 OFF CACHE BOOL "")
set(XNNPACK_ENABLE_ARM_FP16_VECTOR OFF CACHE BOOL "")
# Disable AVXVNNI for now, older clang versions seem not to support it
# (clang 12 is where avx-vnni support is added)
set(XNNPACK_ENABLE_AVXVNNI OFF CACHE BOOL "")
# Disable I8MM For CI since clang 9 does not support neon i8mm.
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
# Setting this global PIC flag for all XNNPACK targets. # Setting this global PIC flag for all XNNPACK targets.
# This is needed for Object libraries within XNNPACK which must # This is needed for Object libraries within XNNPACK which must

View File

@ -127,7 +127,6 @@ cxx_library(
"cpuinfo/wrappers/linux/multiline.c", "cpuinfo/wrappers/linux/multiline.c",
"cpuinfo/wrappers/linux/processors.c", "cpuinfo/wrappers/linux/processors.c",
"cpuinfo/wrappers/linux/smallfile.c", "cpuinfo/wrappers/linux/smallfile.c",
"cpuinfo/wrappers/log.c",
"cpuinfo/wrappers/mach/topology.c", "cpuinfo/wrappers/mach/topology.c",
"cpuinfo/wrappers/x86/cache/descriptor.c", "cpuinfo/wrappers/x86/cache/descriptor.c",
"cpuinfo/wrappers/x86/cache/deterministic.c", "cpuinfo/wrappers/x86/cache/deterministic.c",

View File

@ -9,7 +9,6 @@ CPUINFO_SOURCES = {
"init.c", "init.c",
"api.c", "api.c",
"cache.c", "cache.c",
"log.c",
], ],
"defined(__linux__)": [ "defined(__linux__)": [
"linux/multiline.c", "linux/multiline.c",

View File

@ -8,22 +8,16 @@ import logging
BANNER = "Auto-generated by generate-wrappers.py script. Do not modify" BANNER = "Auto-generated by generate-wrappers.py script. Do not modify"
WRAPPER_SRC_NAMES = { WRAPPER_SRC_NAMES = {
"PROD_SCALAR_MICROKERNEL_SRCS": None, "PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS": None,
"PROD_FMA_MICROKERNEL_SRCS": "defined(__riscv) || defined(__riscv__)", "PROD_SCALAR_AARCH32_MICROKERNEL_SRCS" : "defined(__arm__)",
"PROD_ARMSIMD32_MICROKERNEL_SRCS": "defined(__arm__)",
"PROD_FP16ARITH_MICROKERNEL_SRCS": "defined(__arm__)",
"PROD_NEON_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)", "PROD_NEON_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEONFP16_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)", "PROD_NEONFP16_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEON_AARCH64_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEONFMA_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)", "PROD_NEONFMA_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEON_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)", "PROD_AARCH64_NEON_MICROKERNEL_SRCS": "defined(__aarch64__)",
"PROD_NEONV8_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)", "PROD_NEONV8_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)", "PROD_AARCH64_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__aarch64__)",
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
"PROD_NEONDOT_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)", "PROD_NEONDOT_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
"PROD_NEONI8MM_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_SSE_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)", "PROD_SSE_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"PROD_SSE2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)", "PROD_SSE2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"PROD_SSSE3_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)", "PROD_SSSE3_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
@ -36,13 +30,42 @@ WRAPPER_SRC_NAMES = {
"PROD_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)", "PROD_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"PROD_AVX512SKX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)", "PROD_AVX512SKX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"PROD_AVX512VBMI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)", "PROD_AVX512VBMI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"PROD_AVX512VNNI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"PROD_RVV_MICROKERNEL_SRCS": "defined(__riscv) || defined(__riscv__)",
"PROD_AVXVNNI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"AARCH32_ASM_MICROKERNEL_SRCS": "defined(__arm__)", "AARCH32_ASM_MICROKERNEL_SRCS": "defined(__arm__)",
"AARCH64_ASM_MICROKERNEL_SRCS": "defined(__aarch64__)", "AARCH64_ASM_MICROKERNEL_SRCS": "defined(__aarch64__)",
# add non-prod microkernel sources here: # add additoonal:
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"ALL_ARMSIMD32_MICROKERNEL_SRCS": "defined(__arm__)",
"ALL_AVX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"ALL_AVX2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
"ALL_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_AVX512SKX_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_AVX512VBMI_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_F16C_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_FMA3_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_FP16ARITH_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
'ALL_NEON_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
'ALL_NEON_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
'ALL_NEONBF16_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
'ALL_NEONDOT_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
'ALL_NEONFMA_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
'ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
'ALL_NEONFP16_MICROKERNEL_SRCS':"defined(__arm__) || defined(__aarch64__)",
'ALL_NEONFP16ARITH_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
'ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
'ALL_NEONV8_MICROKERNEL_SRCS': "defined(__aarch64__)",
'ALL_SCALAR_MICROKERNEL_SRCS': "defined(__arm__)",
'ALL_SSE_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_SSE2_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_SSE41_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_SSSE3_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'ALL_XOP_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
'AARCH32_ASM_MICROKERNEL_SRCS': "defined(__arm__)",
"PROD_FP16ARITH_MICROKERNEL_SRCS": "defined(__aarch64__)",
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
"PROD_SCALAR_MICROKERNEL_SRCS": "defined(__arm__)",
} }
SRC_NAMES = set([ SRC_NAMES = set([
@ -50,24 +73,12 @@ SRC_NAMES = set([
"SUBGRAPH_SRCS", "SUBGRAPH_SRCS",
"LOGGING_SRCS", "LOGGING_SRCS",
"XNNPACK_SRCS", "XNNPACK_SRCS",
"HOT_SRCS",
"TABLE_SRCS", "TABLE_SRCS",
"JIT_SRCS", "JIT_SRCS",
"PROD_SCALAR_MICROKERNEL_SRCS", "JIT_AARCH32_SRCS",
"PROD_FMA_MICROKERNEL_SRCS", "JIT_AARCH64_SRCS",
"PROD_ARMSIMD32_MICROKERNEL_SRCS", "PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS",
"PROD_FP16ARITH_MICROKERNEL_SRCS",
"PROD_NEON_MICROKERNEL_SRCS",
"PROD_NEONFP16_MICROKERNEL_SRCS",
"PROD_NEONFMA_MICROKERNEL_SRCS",
"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
"PROD_NEONV8_MICROKERNEL_SRCS",
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
"PROD_NEONDOT_MICROKERNEL_SRCS",
"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS",
"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS",
"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS",
"PROD_NEONI8MM_MICROKERNEL_SRCS",
"PROD_SSE_MICROKERNEL_SRCS", "PROD_SSE_MICROKERNEL_SRCS",
"PROD_SSE2_MICROKERNEL_SRCS", "PROD_SSE2_MICROKERNEL_SRCS",
"PROD_SSSE3_MICROKERNEL_SRCS", "PROD_SSSE3_MICROKERNEL_SRCS",
@ -79,14 +90,59 @@ SRC_NAMES = set([
"PROD_AVX2_MICROKERNEL_SRCS", "PROD_AVX2_MICROKERNEL_SRCS",
"PROD_AVX512F_MICROKERNEL_SRCS", "PROD_AVX512F_MICROKERNEL_SRCS",
"PROD_AVX512SKX_MICROKERNEL_SRCS", "PROD_AVX512SKX_MICROKERNEL_SRCS",
"PROD_SCALAR_MICROKERNEL_SRCS",
"PROD_SCALAR_AARCH32_MICROKERNEL_SRCS",
"PROD_SCALAR_RISCV_MICROKERNEL_SRCS",
"PROD_ARMSIMD32_MICROKERNEL_SRCS",
"PROD_FP16ARITH_MICROKERNEL_SRCS",
"PROD_NEON_MICROKERNEL_SRCS",
"PROD_NEONFP16_MICROKERNEL_SRCS",
"PROD_NEONFMA_MICROKERNEL_SRCS",
"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
"PROD_NEONV8_MICROKERNEL_SRCS",
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
"PROD_NEONDOT_MICROKERNEL_SRCS",
"PROD_SSE2_MICROKERNEL_SRCS",
"PROD_SSSE3_MICROKERNEL_SRCS",
"PROD_SSE41_MICROKERNEL_SRCS",
"PROD_AVX_MICROKERNEL_SRCS",
"PROD_F16C_MICROKERNEL_SRCS",
"PROD_AVX512VBMI_MICROKERNEL_SRCS", "PROD_AVX512VBMI_MICROKERNEL_SRCS",
"PROD_AVX512VNNI_MICROKERNEL_SRCS", "PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
"PROD_RVV_MICROKERNEL_SRCS",
"PROD_AVXVNNI_MICROKERNEL_SRCS",
"AARCH32_ASM_MICROKERNEL_SRCS",
"AARCH64_ASM_MICROKERNEL_SRCS",
# add non-prod microkernel sources here: # new adding libs:
'ALL_ARMSIMD32_MICROKERNEL_SRCS',
'ALL_AVX_MICROKERNEL_SRCS',
'ALL_AVX2_MICROKERNEL_SRCS',
'ALL_AVX512F_MICROKERNEL_SRCS',
'ALL_AVX512SKX_MICROKERNEL_SRCS',
'ALL_AVX512VBMI_MICROKERNEL_SRCS',
'ALL_F16C_MICROKERNEL_SRCS',
'ALL_FMA3_MICROKERNEL_SRCS',
'ALL_FP16ARITH_MICROKERNEL_SRCS',
'ALL_HEXAGON_MICROKERNEL_SRCS',
'ALL_NEON_MICROKERNEL_SRCS',
'ALL_NEON_AARCH64_MICROKERNEL_SRCS',
'ALL_NEONBF16_MICROKERNEL_SRCS',
'ALL_NEONBF16_AARCH64_MICROKERNEL_SRCS',
'ALL_NEONDOT_MICROKERNEL_SRCS',
'ALL_NEONFMA_MICROKERNEL_SRCS',
'ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS',
'ALL_NEONFP16_MICROKERNEL_SRCS',
'ALL_NEONFP16ARITH_MICROKERNEL_SRCS',
'ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS',
'ALL_NEONV8_MICROKERNEL_SRCS',
'ALL_SCALAR_MICROKERNEL_SRCS',
'ALL_SSE_MICROKERNEL_SRCS',
'ALL_SSE2_MICROKERNEL_SRCS',
'ALL_SSE41_MICROKERNEL_SRCS',
'ALL_SSSE3_MICROKERNEL_SRCS',
'ALL_WASM_MICROKERNEL_SRCS',
'ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS',
'ALL_WASMSIMD_MICROKERNEL_SRCS',
'ALL_XOP_MICROKERNEL_SRCS',
'AARCH32_ASM_MICROKERNEL_SRCS',
'AARCH64_ASM_MICROKERNEL_SRCS',
]) ])
def handle_singleline_parse(line): def handle_singleline_parse(line):
@ -94,10 +150,11 @@ def handle_singleline_parse(line):
end_index = line.find(")") end_index = line.find(")")
line = line[start_index+1:end_index] line = line[start_index+1:end_index]
key_val = line.split(" ") key_val = line.split(" ")
return key_val[0], list(map(lambda x: x[4:], key_val[1:])) return key_val[0], key_val[1][4:]
def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"): def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
sources = collections.defaultdict(list) sources = collections.defaultdict(list)
count = 0
with open(os.path.join(xnnpack_path, cmakefile)) as cmake: with open(os.path.join(xnnpack_path, cmakefile)) as cmake:
lines = cmake.readlines() lines = cmake.readlines()
i = 0 i = 0
@ -106,7 +163,7 @@ def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
if lines[i].startswith("SET") and "src/" in lines[i]: if lines[i].startswith("SET") and "src/" in lines[i]:
name, val = handle_singleline_parse(line) name, val = handle_singleline_parse(line)
sources[name].extend(val) sources[name].append(val)
i+=1 i+=1
continue continue

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff