mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
Revert "[8/n] Update XNNPACK Version Part 8 Everything Remaining to get it to work (#115587)"
This reverts commit a8dc9d8e353ddcf7db0247349a3acd0dd37fcc6f. Reverted https://github.com/pytorch/pytorch/pull/115587 on behalf of https://github.com/facebook-github-bot due to Diff reverted internally ([comment](https://github.com/pytorch/pytorch/pull/115587#issuecomment-1852835898))
This commit is contained in:
@ -259,32 +259,24 @@ enum xnn_status xnnp_create_add_nd(
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_reshape_add_nd(
|
||||
enum xnn_status xnnp_setup_add_nd(
|
||||
xnn_operator_t op,
|
||||
const std::vector<size_t>& a_shape,
|
||||
const std::vector<size_t>& b_shape,
|
||||
pthreadpool_t pt_pool) {
|
||||
return xnn_reshape_add_nd_qs8(
|
||||
op, /* xnn_operator_t add_op */
|
||||
a_shape.size(), /* size_t num_input1_dims */
|
||||
a_shape.data(), /* const size_t* input1_shape */
|
||||
b_shape.size(), /* size_t num_input2_dims */
|
||||
b_shape.data(), /* const size_t* input2_shape */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_add_nd(
|
||||
xnn_operator_t op,
|
||||
const int8_t* da,
|
||||
const int8_t* db,
|
||||
int8_t* dc,
|
||||
pthreadpool_t pt_pool) {
|
||||
return xnn_setup_add_nd_qs8(
|
||||
op, /* xnn_operator_t add_op */
|
||||
a_shape.size(), /* size_t num_input1_dims */
|
||||
a_shape.data(), /* const size_t* input1_shape */
|
||||
b_shape.size(), /* size_t num_input2_dims */
|
||||
b_shape.data(), /* const size_t* input2_shape */
|
||||
da, /* const int8_t* input1 */
|
||||
db, /* const int8_t* input2 */
|
||||
dc); /* int8_t* output */
|
||||
dc, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
template <typename scalar_t, bool ReLUFused = false>
|
||||
@ -356,20 +348,11 @@ Tensor xnnp_add(Tensor qa, Tensor qb, double scale, int64_t zero_point) {
|
||||
const auto qa_shape = xnnp_utils::get_mem_format_aware_shape(qa_contig);
|
||||
const auto qb_shape = xnnp_utils::get_mem_format_aware_shape(qb_contig);
|
||||
|
||||
// Reshape the operator
|
||||
status = xnnp_reshape_add_nd(
|
||||
xnnp_add_operator.get(),
|
||||
qa_shape,
|
||||
qb_shape,
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
func_name, ": xnn reshape operator failed(", status,")!");
|
||||
|
||||
// Setup the operator
|
||||
status = xnnp_setup_add_nd(
|
||||
xnnp_add_operator.get(),
|
||||
qa_shape,
|
||||
qb_shape,
|
||||
reinterpret_cast<const underlying_t*>(qa_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<const underlying_t*>(qb_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(qy.data_ptr<scalar_t>()),
|
||||
|
@ -100,7 +100,6 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
|
||||
op_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t weights_cache */
|
||||
op); /* xnn_operator_t* deconvolution_op_out */
|
||||
|
||||
}
|
||||
@ -133,10 +132,9 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
|
||||
op_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t weights_cache */
|
||||
op); /* xnn_operator_t* convolution_op_out */
|
||||
} else { /* per_channel */
|
||||
return xnn_create_convolution2d_nhwc_qs8_qc8w(
|
||||
return xnn_create_convolution2d_nhwc_qc8(
|
||||
pad_top, /* uint32_t input_padding_top */
|
||||
pad_right, /* uint32_t input_padding_right */
|
||||
pad_bottom, /* uint32_t input_padding_bottom */
|
||||
@ -163,99 +161,58 @@ enum xnn_status xnnp_create_convolution2d_nhwc(
|
||||
op_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t weights_cache */
|
||||
op); /* xnn_operator_t* convolution_op_out */
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Series of reshape wrapper functions to call xnn_reshape_[de]conv* functions.
|
||||
*/
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_reshape_convolution2d_nhwc(
|
||||
xnn_operator_t op,
|
||||
size_t batch,
|
||||
size_t in_h,
|
||||
size_t in_w,
|
||||
pthreadpool_t pt_pool,
|
||||
bool per_channel = false,
|
||||
bool transpose = false,
|
||||
uint32_t adj_h = 0,
|
||||
uint32_t adj_w = 0) {
|
||||
if(transpose) {
|
||||
TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!");
|
||||
return xnn_reshape_deconvolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
adj_h, /* uint32_t adjustment_height */
|
||||
adj_w, /* uint32_t adjustment_width */
|
||||
nullptr, /* size_t* output_height_out */
|
||||
nullptr, /* size_t* output_width_out */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
size_t workspace_size = SIZE_MAX;
|
||||
size_t workspace_alignment = SIZE_MAX;
|
||||
|
||||
if (!per_channel) {
|
||||
return xnn_reshape_convolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
&workspace_size, /* size_t* workspace_size */
|
||||
&workspace_alignment, /* size_t* workspace_alignment */
|
||||
nullptr, /* size_t* output_height_out */
|
||||
nullptr, /* size_t* output_width_out */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
} else { /* per_channel */
|
||||
return xnn_reshape_convolution2d_nhwc_qs8_qc8w(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
&workspace_size, /* size_t* workspace_size */
|
||||
&workspace_alignment, /* size_t* workspace_alignment */
|
||||
nullptr, /* size_t* output_height_out */
|
||||
nullptr, /* size_t* output_width_out */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* Series of setup wrapper functions to call xnn_setup_[de]conv* functions.
|
||||
*/
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_convolution2d_nhwc(
|
||||
xnn_operator_t op,
|
||||
size_t batch,
|
||||
size_t in_h,
|
||||
size_t in_w,
|
||||
const int8_t* inp,
|
||||
int8_t* outp,
|
||||
pthreadpool_t pt_pool,
|
||||
bool per_channel = false,
|
||||
bool transpose = false) {
|
||||
bool transpose = false,
|
||||
uint32_t adj_h = 0,
|
||||
uint32_t adj_w = 0) {
|
||||
if(transpose) {
|
||||
TORCH_CHECK(!per_channel, "XNNPACK Q[SC]8 does not have a per channel deconvolution!");
|
||||
|
||||
return xnn_setup_deconvolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
adj_h, /* uint32_t adjustment_height */
|
||||
adj_w, /* uint32_t adjustment_width */
|
||||
inp, /* const int8_t* input */
|
||||
outp); /* int8_t* output */
|
||||
outp, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
if (!per_channel) {
|
||||
return xnn_setup_convolution2d_nhwc_qs8(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
nullptr, /* void workspace */
|
||||
inp, /* const int8_t* input */
|
||||
outp); /* int8_t* output */
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
inp, /* const int8_t* input */
|
||||
outp, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
} else { /* per_channel */
|
||||
return xnn_setup_convolution2d_nhwc_qs8_qc8w(
|
||||
op, /* xnn_operator_t deconvolution_op */
|
||||
nullptr, /* void workspace */
|
||||
inp, /* const int8_t* input */
|
||||
outp); /* int8_t* output */
|
||||
return xnn_setup_convolution2d_nhwc_qc8(
|
||||
op, /* xnn_operator_t convolution_op */
|
||||
batch, /* size_t batch_size */
|
||||
in_h, /* size_t input_height */
|
||||
in_w, /* size_t input_width */
|
||||
inp, /* const int8_t* input */
|
||||
outp, /* int8_t* output */
|
||||
pt_pool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
}
|
||||
|
||||
@ -301,31 +258,22 @@ enum xnn_status xnnp_create_fully_connected_nc(
|
||||
output_max, /* int8_t output_max */
|
||||
flags, /* uint32_t flags */
|
||||
nullptr, /* xnn_caches_t caches */
|
||||
nullptr, /* xnn_weights_cache_t */
|
||||
fully_connected_op_out); /* xnn_operator_t* fully_connected_op_out */
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_reshape_fully_connected_nc(
|
||||
xnn_operator_t fully_connected_op,
|
||||
size_t batch_size,
|
||||
pthreadpool_t threadpool) {
|
||||
return xnn_reshape_fully_connected_nc_qs8(
|
||||
fully_connected_op, /* xnn_operator_t fully_connected_op */
|
||||
batch_size, /* size_t batch_size */
|
||||
threadpool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
C10_ALWAYS_INLINE
|
||||
enum xnn_status xnnp_setup_fully_connected_nc(
|
||||
xnn_operator_t fully_connected_op,
|
||||
size_t batch_size,
|
||||
const int8_t* input,
|
||||
int8_t* output) {
|
||||
int8_t* output,
|
||||
pthreadpool_t threadpool) {
|
||||
return xnn_setup_fully_connected_nc_qs8(
|
||||
fully_connected_op, /* xnn_operator_t fully_connected_op */
|
||||
batch_size, /* size_t batch_size */
|
||||
input, /* const int8_t* input */
|
||||
output /* int8_t* output */
|
||||
);
|
||||
output, /* int8_t* output */
|
||||
threadpool); /* pthreadpool_t threadpool */
|
||||
}
|
||||
|
||||
} // namespace xnnp_utils
|
||||
|
@ -770,12 +770,14 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl_xnnp(
|
||||
output_zero_point,
|
||||
c10::MemoryFormat::ChannelsLast);
|
||||
|
||||
// Reshape the operator
|
||||
status = at::native::xnnp_utils::xnnp_reshape_convolution2d_nhwc(
|
||||
// Setup the operator
|
||||
status = at::native::xnnp_utils::xnnp_setup_convolution2d_nhwc(
|
||||
xnnp_convolution_op.get(),
|
||||
N,
|
||||
H,
|
||||
W,
|
||||
reinterpret_cast<const underlying_t*>(act_nhwc.template data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
|
||||
caffe2::pthreadpool_(),
|
||||
per_channel(),
|
||||
transpose(),
|
||||
@ -789,21 +791,6 @@ at::Tensor PackedConvWeightsQnnp<kSpatialDim>::apply_impl_xnnp(
|
||||
status,
|
||||
")");
|
||||
|
||||
// Setup the operator
|
||||
status = at::native::xnnp_utils::xnnp_setup_convolution2d_nhwc(
|
||||
xnnp_convolution_op.get(),
|
||||
reinterpret_cast<const underlying_t*>(act_nhwc.template data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
|
||||
per_channel(),
|
||||
transpose());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
func_name,
|
||||
": xnn setup operator failed(",
|
||||
status,
|
||||
")");
|
||||
|
||||
// Run the operator
|
||||
status = xnn_run_operator(
|
||||
xnnp_convolution_op.get(), /* xnn_operator_t op */
|
||||
|
@ -565,19 +565,14 @@ at::Tensor PackedLinearWeightsQnnp::apply_impl_xnnp(
|
||||
rows_input *= input_contig.size(i);
|
||||
}
|
||||
|
||||
// Reshape the operator
|
||||
status = at::native::xnnp_utils::xnnp_reshape_fully_connected_nc(
|
||||
xnnp_linear_op.get(),
|
||||
rows_input, /* batch_size */
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
// Setup the operator
|
||||
status = at::native::xnnp_utils::xnnp_setup_fully_connected_nc(
|
||||
xnnp_linear_op.get(),
|
||||
rows_input, /* batch_size */
|
||||
reinterpret_cast<const underlying_t*>(
|
||||
input_contig.template data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>())
|
||||
);
|
||||
reinterpret_cast<underlying_t*>(output.template data_ptr<scalar_t>()),
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
|
@ -139,29 +139,17 @@ Tensor _mul_out_xnnpack(
|
||||
const auto self_shape = xnnp_utils::get_mem_format_aware_shape(self_contig);
|
||||
const auto other_shape = xnnp_utils::get_mem_format_aware_shape(other_contig);
|
||||
|
||||
// reshape operator
|
||||
status = xnn_reshape_multiply_nd_qs8(
|
||||
// set up operator
|
||||
status = xnn_setup_multiply_nd_qs8(
|
||||
xnnp_qmul_operator.get(),
|
||||
self_shape.size(),
|
||||
self_shape.data(),
|
||||
other_shape.size(),
|
||||
other_shape.data(),
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
func_name,
|
||||
": xnn reshape operator failed(",
|
||||
status,
|
||||
")!");
|
||||
|
||||
// set up operator
|
||||
status = xnn_setup_multiply_nd_qs8(
|
||||
xnnp_qmul_operator.get(),
|
||||
reinterpret_cast<const underlying_t*>(self_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<const underlying_t*>(other_contig.data_ptr<scalar_t>()),
|
||||
reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>())
|
||||
);
|
||||
reinterpret_cast<underlying_t*>(out.data_ptr<scalar_t>()),
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
status == xnn_status_success,
|
||||
|
@ -34,19 +34,12 @@ static Tensor& hardswish_impl(Tensor& input, Tensor& output) {
|
||||
|
||||
Operator hardswish_scoped_op(hardswish_op);
|
||||
|
||||
const xnn_status reshape_status = xnn_reshape_hardswish_nc_f32(
|
||||
hardswish_op,
|
||||
input.numel(), // Batch
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_hardswish_nc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_hardswish_nc_f32(
|
||||
hardswish_op,
|
||||
input.numel(), // Batch
|
||||
input.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
output.data_ptr<float>(),
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
|
@ -7,13 +7,18 @@
|
||||
|
||||
namespace at::native::xnnpack {
|
||||
|
||||
bool use_global_average_pool(const Tensor& input) {
|
||||
return xnnpack::available() && (1 <= input.ndimension()) &&
|
||||
(input.device().is_cpu()) && (kFloat == input.scalar_type()) &&
|
||||
!input.requires_grad() && true;
|
||||
bool use_global_average_pool(
|
||||
const Tensor& input) {
|
||||
return xnnpack::available() &&
|
||||
(1 <= input.ndimension()) &&
|
||||
(input.device().is_cpu()) &&
|
||||
(kFloat == input.scalar_type()) &&
|
||||
!input.requires_grad() &&
|
||||
true;
|
||||
}
|
||||
|
||||
Tensor global_average_pool(const Tensor& input) {
|
||||
Tensor global_average_pool(
|
||||
const Tensor& input) {
|
||||
using namespace internal;
|
||||
|
||||
const Tensor input_padded_contig_nhwc =
|
||||
@ -22,10 +27,10 @@ Tensor global_average_pool(const Tensor& input) {
|
||||
|
||||
Tensor output = mobile::empty_with_tail_padding(
|
||||
{
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch),
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels),
|
||||
1,
|
||||
1,
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch),
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels),
|
||||
1,
|
||||
1,
|
||||
},
|
||||
input_padded_contig_nhwc.options().dtype(),
|
||||
MemoryFormat::ChannelsLast,
|
||||
@ -33,61 +38,42 @@ Tensor global_average_pool(const Tensor& input) {
|
||||
|
||||
xnn_operator_t global_average_pooling_op{};
|
||||
const xnn_status create_status = xnn_create_global_average_pooling_nwc_f32(
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // input stride
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // output stride
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
0 /* flags */,
|
||||
&global_average_pooling_op);
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::channels), // channels
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // input stride
|
||||
input_padded_contig_nhwc.size(
|
||||
Layout::Activation4D::channels), // output stride
|
||||
-std::numeric_limits<float>::infinity(),
|
||||
std::numeric_limits<float>::infinity(),
|
||||
0 /* flags */,
|
||||
&global_average_pooling_op);
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == create_status,
|
||||
"xnn_create_global_average_pooling_nwc_f32 failed!");
|
||||
xnn_status_success == create_status,
|
||||
"xnn_create_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
Operator global_avg_pool_scoped_op(global_average_pooling_op);
|
||||
|
||||
size_t workspace_size = 0;
|
||||
size_t workspace_alignment = 0;
|
||||
|
||||
const xnn_status reshape_status = xnn_reshape_global_average_pooling_nwc_f32(
|
||||
const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
|
||||
global_average_pooling_op,
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::width) *
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::height), // width
|
||||
&workspace_size, // workspace_size
|
||||
&workspace_alignment, // workspace_alignment
|
||||
input_padded_contig_nhwc.data_ptr<float>(),
|
||||
output.data_ptr<float>(),
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_global_average_pooling_nwc_f32 failed!");
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
// Create Workspace pointer, which we will align and pad with 16 bytes
|
||||
size_t xnnpack_buffer_padding = 16;
|
||||
std::vector<char> workspace_vector(workspace_size + workspace_alignment + xnnpack_buffer_padding);
|
||||
void* maybe_aligned_workspace = workspace_vector.data();
|
||||
void* aligned_workspace =
|
||||
(void*)((intptr_t)maybe_aligned_workspace + workspace_alignment - (intptr_t)maybe_aligned_workspace % workspace_alignment);
|
||||
|
||||
const xnn_status setup_status = xnn_setup_global_average_pooling_nwc_f32(
|
||||
global_average_pooling_op,
|
||||
aligned_workspace,
|
||||
input_padded_contig_nhwc.data_ptr<float>(),
|
||||
output.data_ptr<float>());
|
||||
const xnn_status run_status = xnn_run_operator(
|
||||
global_average_pooling_op,
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
const xnn_status run_status =
|
||||
xnn_run_operator(global_average_pooling_op, caffe2::pthreadpool_());
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == run_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
xnn_status_success == run_status,
|
||||
"xnn_setup_global_average_pooling_nwc_f32 failed!");
|
||||
|
||||
return output.to(input.suggest_memory_format());
|
||||
}
|
||||
|
@ -79,19 +79,12 @@ Tensor channel_shuffle(
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::height) *
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::width);
|
||||
|
||||
const xnn_status reshape_status = xnn_reshape_channel_shuffle_nc_x32(
|
||||
channel_shuffle_op, // operator
|
||||
batch_size, // batch_size
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_channel_shuffle_nc_x32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_channel_shuffle_nc_x32(
|
||||
channel_shuffle_op, // operator
|
||||
batch_size, // batch_size
|
||||
input_padded_contig_nhwc.data_ptr<float>(), // input
|
||||
output_padded_contig_nhwc.data_ptr<float>()); // output
|
||||
output_padded_contig_nhwc.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
|
@ -236,7 +236,6 @@ ContextConv2D create(
|
||||
output_max, // output_max
|
||||
0u, // flags
|
||||
nullptr, // xnn_caches_t
|
||||
nullptr, // xnn_weights_cache_t
|
||||
&convolution_op); // operator
|
||||
} else {
|
||||
for (const auto i : c10::irange(4)) {
|
||||
@ -266,7 +265,6 @@ ContextConv2D create(
|
||||
output_max, // output_max
|
||||
0u, // flags
|
||||
nullptr, // xnn_caches_t
|
||||
nullptr, // xnn_weights_cache_t
|
||||
&convolution_op); // operator
|
||||
}
|
||||
|
||||
@ -340,41 +338,26 @@ Tensor run(
|
||||
*/
|
||||
|
||||
if (context.transposed_) {
|
||||
setup_status = xnn_reshape_deconvolution2d_nhwc_f32(
|
||||
context.op.get(),
|
||||
setup_status = xnn_setup_deconvolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
padded_input_nhwc.size(Layout::Activation4D::height), // input_height
|
||||
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
context.output_padding_[0], // adjustment_height
|
||||
context.output_padding_[1], // adjustment_width
|
||||
nullptr, // output_height_out
|
||||
nullptr, // output_width_out
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
setup_status = xnn_setup_deconvolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>()); // output
|
||||
} else {
|
||||
size_t workspace_size = SIZE_MAX;
|
||||
size_t workspace_alignment = SIZE_MAX;
|
||||
|
||||
setup_status = xnn_reshape_convolution2d_nhwc_f32(
|
||||
context.op.get(),
|
||||
setup_status = xnn_setup_convolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
padded_input_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
padded_input_nhwc.size(Layout::Activation4D::height), // input_height
|
||||
padded_input_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
&workspace_size, // workspace_size
|
||||
&workspace_alignment, // workspace_alignment
|
||||
nullptr, // output_height_out
|
||||
nullptr, // output_width_out
|
||||
caffe2::pthreadpool_());
|
||||
|
||||
setup_status = xnn_setup_convolution2d_nhwc_f32(
|
||||
context.op.get(), // operator
|
||||
nullptr, // workspace
|
||||
padded_input_nhwc.data_ptr<float>(), // input
|
||||
output.data_ptr<float>()); // output
|
||||
output.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_());
|
||||
}
|
||||
|
||||
TORCH_CHECK(
|
||||
|
@ -95,7 +95,6 @@ ContextLinear create(
|
||||
output_max, // output_max
|
||||
0u, // flags
|
||||
nullptr, // xnn_caches_t
|
||||
nullptr, // xnn_weights_cache_t
|
||||
&linear_op); // operator
|
||||
|
||||
TORCH_CHECK(
|
||||
@ -137,19 +136,12 @@ Tensor run(
|
||||
padded_input.suggest_memory_format(),
|
||||
padded_input.opt_names());
|
||||
|
||||
const xnn_status reshape_status = xnn_reshape_fully_connected_nc_f32(
|
||||
context.op.get(), // operator
|
||||
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_fully_connected_nc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_fully_connected_nc_f32(
|
||||
context.op.get(), // operator
|
||||
Layout::ActivationND::batch(padded_input.sizes()), // Batch,
|
||||
padded_input.data_ptr<float>(), // input
|
||||
output.data_ptr<float>()); // output
|
||||
output.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
|
@ -214,23 +214,14 @@ Tensor max_pool2d(
|
||||
xnn_status_success == create_status,
|
||||
"xnn_create_max_pooling2d_nhwc_f32 failed!");
|
||||
|
||||
const xnn_status reshape_status = xnn_reshape_max_pooling2d_nhwc_f32(
|
||||
const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
|
||||
max_pool_op, // operator
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::batch), // batch_size
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::height), // input_height
|
||||
input_padded_contig_nhwc.size(Layout::Activation4D::width), // input_width
|
||||
nullptr, // output_height_out
|
||||
nullptr, // output_width_out
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == reshape_status,
|
||||
"xnn_reshape_max_pooling2d_nhwc_f32 failed!");
|
||||
|
||||
const xnn_status setup_status = xnn_setup_max_pooling2d_nhwc_f32(
|
||||
max_pool_op, // operator
|
||||
input_padded_contig_nhwc.data_ptr<float>(), // input
|
||||
output_padded_contig_nhwc.data_ptr<float>()); // output
|
||||
output_padded_contig_nhwc.data_ptr<float>(), // output
|
||||
caffe2::pthreadpool_()); // threadpool
|
||||
|
||||
TORCH_CHECK(
|
||||
xnn_status_success == setup_status,
|
||||
|
@ -619,13 +619,7 @@ if(USE_XNNPACK AND NOT USE_SYSTEM_XNNPACK)
|
||||
# Disable ARM BF16 and FP16 vector for now; unused and causes build failures because
|
||||
# these new ISA features may not be supported on older compilers
|
||||
set(XNNPACK_ENABLE_ARM_BF16 OFF CACHE BOOL "")
|
||||
|
||||
# Disable AVXVNNI for now, older clang versions seem not to support it
|
||||
# (clang 12 is where avx-vnni support is added)
|
||||
set(XNNPACK_ENABLE_AVXVNNI OFF CACHE BOOL "")
|
||||
|
||||
# Disable I8MM For CI since clang 9 does not support neon i8mm.
|
||||
set(XNNPACK_ENABLE_ARM_I8MM OFF CACHE BOOL "")
|
||||
set(XNNPACK_ENABLE_ARM_FP16_VECTOR OFF CACHE BOOL "")
|
||||
|
||||
# Setting this global PIC flag for all XNNPACK targets.
|
||||
# This is needed for Object libraries within XNNPACK which must
|
||||
|
1
third_party/BUCK.oss
vendored
1
third_party/BUCK.oss
vendored
@ -127,7 +127,6 @@ cxx_library(
|
||||
"cpuinfo/wrappers/linux/multiline.c",
|
||||
"cpuinfo/wrappers/linux/processors.c",
|
||||
"cpuinfo/wrappers/linux/smallfile.c",
|
||||
"cpuinfo/wrappers/log.c",
|
||||
"cpuinfo/wrappers/mach/topology.c",
|
||||
"cpuinfo/wrappers/x86/cache/descriptor.c",
|
||||
"cpuinfo/wrappers/x86/cache/deterministic.c",
|
||||
|
2
third_party/XNNPACK
vendored
2
third_party/XNNPACK
vendored
Submodule third_party/XNNPACK updated: d9cce341f8...51a987591a
2
third_party/cpuinfo
vendored
2
third_party/cpuinfo
vendored
Submodule third_party/cpuinfo updated: d6860c477c...6481e8bef0
1
third_party/generate-cpuinfo-wrappers.py
vendored
1
third_party/generate-cpuinfo-wrappers.py
vendored
@ -9,7 +9,6 @@ CPUINFO_SOURCES = {
|
||||
"init.c",
|
||||
"api.c",
|
||||
"cache.c",
|
||||
"log.c",
|
||||
],
|
||||
"defined(__linux__)": [
|
||||
"linux/multiline.c",
|
||||
|
135
third_party/generate-xnnpack-wrappers.py
vendored
135
third_party/generate-xnnpack-wrappers.py
vendored
@ -8,22 +8,16 @@ import logging
|
||||
|
||||
BANNER = "Auto-generated by generate-wrappers.py script. Do not modify"
|
||||
WRAPPER_SRC_NAMES = {
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS": None,
|
||||
"PROD_FMA_MICROKERNEL_SRCS": "defined(__riscv) || defined(__riscv__)",
|
||||
"PROD_ARMSIMD32_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS": None,
|
||||
"PROD_SCALAR_AARCH32_MICROKERNEL_SRCS" : "defined(__arm__)",
|
||||
"PROD_NEON_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFP16_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFMA_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_AARCH64_NEON_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONV8_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_AARCH64_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONDOT_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONI8MM_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_SSE_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_SSE2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_SSSE3_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
@ -36,13 +30,42 @@ WRAPPER_SRC_NAMES = {
|
||||
"PROD_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_AVX512SKX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_AVX512VBMI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_AVX512VNNI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"PROD_RVV_MICROKERNEL_SRCS": "defined(__riscv) || defined(__riscv__)",
|
||||
"PROD_AVXVNNI_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"AARCH32_ASM_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"AARCH64_ASM_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
|
||||
# add non-prod microkernel sources here:
|
||||
# add additoonal:
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"ALL_ARMSIMD32_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
"ALL_AVX_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"ALL_AVX2_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
"ALL_AVX512F_MICROKERNEL_SRCS": "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
|
||||
'ALL_AVX512SKX_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_AVX512VBMI_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_F16C_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_FMA3_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_FP16ARITH_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEON_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEON_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_NEONBF16_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONDOT_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFMA_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_NEONFP16_MICROKERNEL_SRCS':"defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFP16ARITH_MICROKERNEL_SRCS': "defined(__arm__) || defined(__aarch64__)",
|
||||
'ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_NEONV8_MICROKERNEL_SRCS': "defined(__aarch64__)",
|
||||
'ALL_SCALAR_MICROKERNEL_SRCS': "defined(__arm__)",
|
||||
'ALL_SSE_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_SSE2_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_SSE41_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_SSSE3_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'ALL_XOP_MICROKERNEL_SRCS': "defined(__i386__) || defined(__i686__) || defined(__x86_64__)",
|
||||
'AARCH32_ASM_MICROKERNEL_SRCS': "defined(__arm__)",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS": "defined(__aarch64__)",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS": "defined(__arm__) || defined(__aarch64__)",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS": "defined(__arm__)",
|
||||
|
||||
}
|
||||
|
||||
SRC_NAMES = set([
|
||||
@ -50,24 +73,12 @@ SRC_NAMES = set([
|
||||
"SUBGRAPH_SRCS",
|
||||
"LOGGING_SRCS",
|
||||
"XNNPACK_SRCS",
|
||||
"HOT_SRCS",
|
||||
"TABLE_SRCS",
|
||||
"JIT_SRCS",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS",
|
||||
"PROD_FMA_MICROKERNEL_SRCS",
|
||||
"PROD_ARMSIMD32_MICROKERNEL_SRCS",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFMA_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONV8_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOT_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOT_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOTFP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOTFP16ARITH_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONI8MM_MICROKERNEL_SRCS",
|
||||
"JIT_AARCH32_SRCS",
|
||||
"JIT_AARCH64_SRCS",
|
||||
"PROD_SCALAR_PORTABLE_MICROKERNEL_SRCS",
|
||||
"PROD_SSE_MICROKERNEL_SRCS",
|
||||
"PROD_SSE2_MICROKERNEL_SRCS",
|
||||
"PROD_SSSE3_MICROKERNEL_SRCS",
|
||||
@ -79,14 +90,59 @@ SRC_NAMES = set([
|
||||
"PROD_AVX2_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512F_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512SKX_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_AARCH32_MICROKERNEL_SRCS",
|
||||
"PROD_SCALAR_RISCV_MICROKERNEL_SRCS",
|
||||
"PROD_ARMSIMD32_MICROKERNEL_SRCS",
|
||||
"PROD_FP16ARITH_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFMA_MICROKERNEL_SRCS",
|
||||
"PROD_NEON_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONV8_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS",
|
||||
"PROD_NEONDOT_MICROKERNEL_SRCS",
|
||||
"PROD_SSE2_MICROKERNEL_SRCS",
|
||||
"PROD_SSSE3_MICROKERNEL_SRCS",
|
||||
"PROD_SSE41_MICROKERNEL_SRCS",
|
||||
"PROD_AVX_MICROKERNEL_SRCS",
|
||||
"PROD_F16C_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512VBMI_MICROKERNEL_SRCS",
|
||||
"PROD_AVX512VNNI_MICROKERNEL_SRCS",
|
||||
"PROD_RVV_MICROKERNEL_SRCS",
|
||||
"PROD_AVXVNNI_MICROKERNEL_SRCS",
|
||||
"AARCH32_ASM_MICROKERNEL_SRCS",
|
||||
"AARCH64_ASM_MICROKERNEL_SRCS",
|
||||
"PROD_NEONFP16ARITH_MICROKERNEL_SRCS",
|
||||
|
||||
# add non-prod microkernel sources here:
|
||||
# new adding libs:
|
||||
'ALL_ARMSIMD32_MICROKERNEL_SRCS',
|
||||
'ALL_AVX_MICROKERNEL_SRCS',
|
||||
'ALL_AVX2_MICROKERNEL_SRCS',
|
||||
'ALL_AVX512F_MICROKERNEL_SRCS',
|
||||
'ALL_AVX512SKX_MICROKERNEL_SRCS',
|
||||
'ALL_AVX512VBMI_MICROKERNEL_SRCS',
|
||||
'ALL_F16C_MICROKERNEL_SRCS',
|
||||
'ALL_FMA3_MICROKERNEL_SRCS',
|
||||
'ALL_FP16ARITH_MICROKERNEL_SRCS',
|
||||
'ALL_HEXAGON_MICROKERNEL_SRCS',
|
||||
'ALL_NEON_MICROKERNEL_SRCS',
|
||||
'ALL_NEON_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONBF16_MICROKERNEL_SRCS',
|
||||
'ALL_NEONBF16_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONDOT_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFMA_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFMA_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFP16_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFP16ARITH_MICROKERNEL_SRCS',
|
||||
'ALL_NEONFP16ARITH_AARCH64_MICROKERNEL_SRCS',
|
||||
'ALL_NEONV8_MICROKERNEL_SRCS',
|
||||
'ALL_SCALAR_MICROKERNEL_SRCS',
|
||||
'ALL_SSE_MICROKERNEL_SRCS',
|
||||
'ALL_SSE2_MICROKERNEL_SRCS',
|
||||
'ALL_SSE41_MICROKERNEL_SRCS',
|
||||
'ALL_SSSE3_MICROKERNEL_SRCS',
|
||||
'ALL_WASM_MICROKERNEL_SRCS',
|
||||
'ALL_WASMRELAXEDSIMD_MICROKERNEL_SRCS',
|
||||
'ALL_WASMSIMD_MICROKERNEL_SRCS',
|
||||
'ALL_XOP_MICROKERNEL_SRCS',
|
||||
'AARCH32_ASM_MICROKERNEL_SRCS',
|
||||
'AARCH64_ASM_MICROKERNEL_SRCS',
|
||||
])
|
||||
|
||||
def handle_singleline_parse(line):
|
||||
@ -94,10 +150,11 @@ def handle_singleline_parse(line):
|
||||
end_index = line.find(")")
|
||||
line = line[start_index+1:end_index]
|
||||
key_val = line.split(" ")
|
||||
return key_val[0], list(map(lambda x: x[4:], key_val[1:]))
|
||||
return key_val[0], key_val[1][4:]
|
||||
|
||||
def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
|
||||
sources = collections.defaultdict(list)
|
||||
count = 0
|
||||
with open(os.path.join(xnnpack_path, cmakefile)) as cmake:
|
||||
lines = cmake.readlines()
|
||||
i = 0
|
||||
@ -106,7 +163,7 @@ def update_sources(xnnpack_path, cmakefile = "XNNPACK/CMakeLists.txt"):
|
||||
|
||||
if lines[i].startswith("SET") and "src/" in lines[i]:
|
||||
name, val = handle_singleline_parse(line)
|
||||
sources[name].extend(val)
|
||||
sources[name].append(val)
|
||||
i+=1
|
||||
continue
|
||||
|
||||
|
860
third_party/xnnpack.buck.bzl
vendored
860
third_party/xnnpack.buck.bzl
vendored
File diff suppressed because it is too large
Load Diff
8007
third_party/xnnpack_src_defs.bzl
vendored
8007
third_party/xnnpack_src_defs.bzl
vendored
File diff suppressed because it is too large
Load Diff
6104
third_party/xnnpack_wrapper_defs.bzl
vendored
6104
third_party/xnnpack_wrapper_defs.bzl
vendored
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user