mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
fix missing-prototypes warnings in torch_cpu (Part 6) (#101845)
This PR fixes more missing-prototypes violations in the torch_cpu source following PRs #100053, #100147, #100245, #100849 and #101788 Pull Request resolved: https://github.com/pytorch/pytorch/pull/101845 Approved by: https://github.com/albanD
This commit is contained in:
@ -64,6 +64,11 @@ bool operator==(const ivalue::Tuple& lhs, const ivalue::Tuple& rhs) {
|
|||||||
_fastEqualsForContainer);
|
_fastEqualsForContainer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
|
||||||
|
out << v.qualifiedClassName() << "." << v.name();
|
||||||
|
return out;
|
||||||
|
}
|
||||||
|
|
||||||
bool operator==(const ivalue::EnumHolder& lhs, const ivalue::EnumHolder& rhs) {
|
bool operator==(const ivalue::EnumHolder& lhs, const ivalue::EnumHolder& rhs) {
|
||||||
return lhs.name() == rhs.name() && *rhs.type() == *lhs.type();
|
return lhs.name() == rhs.name() && *rhs.type() == *lhs.type();
|
||||||
}
|
}
|
||||||
@ -763,11 +768,6 @@ IValueComparator getGreaterThanComparator(const IValue& v) {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
|
|
||||||
out << v.qualifiedClassName() << "." << v.name();
|
|
||||||
return out;
|
|
||||||
}
|
|
||||||
|
|
||||||
std::ostream& operator<<(std::ostream & out, const IValue & v) {
|
std::ostream& operator<<(std::ostream & out, const IValue & v) {
|
||||||
auto formatter = [&](std::ostream& out, const IValue& v) {
|
auto formatter = [&](std::ostream& out, const IValue& v) {
|
||||||
out << v;
|
out << v;
|
||||||
|
@ -171,7 +171,7 @@ Tensor arange(
|
|||||||
return at::arange_out(result, start, end, step);
|
return at::arange_out(result, start, end, step);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor& arange_start_out(const Scalar& start, const Scalar& end, Tensor& result) {
|
static Tensor& arange_start_out(const Scalar& start, const Scalar& end, Tensor& result) {
|
||||||
return at::arange_out(result, start, end, /*step=*/1);
|
return at::arange_out(result, start, end, /*step=*/1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -179,7 +179,7 @@ Tensor& arange_out(const Scalar& end, Tensor& result) {
|
|||||||
return at::arange_out(result, /*start=*/0, end, /*step=*/1);
|
return at::arange_out(result, /*start=*/0, end, /*step=*/1);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor& arange_out(Tensor& result, const Scalar& start, const Scalar& end) {
|
static Tensor& arange_out(Tensor& result, const Scalar& start, const Scalar& end) {
|
||||||
return at::arange_out(result, start, end, /*step=*/1);
|
return at::arange_out(result, start, end, /*step=*/1);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -189,14 +189,14 @@ Tensor _dim_arange(const Tensor& like, int64_t dim) {
|
|||||||
|
|
||||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ complex / polar ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ complex / polar ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||||
|
|
||||||
void complex_check_floating(const Tensor& a, const Tensor& b) {
|
static void complex_check_floating(const Tensor& a, const Tensor& b) {
|
||||||
TORCH_CHECK((a.scalar_type() == kFloat || a.scalar_type() == kDouble || a.scalar_type() == kHalf) &&
|
TORCH_CHECK((a.scalar_type() == kFloat || a.scalar_type() == kDouble || a.scalar_type() == kHalf) &&
|
||||||
(b.scalar_type() == kFloat || b.scalar_type() == kDouble || b.scalar_type() == kHalf),
|
(b.scalar_type() == kFloat || b.scalar_type() == kDouble || b.scalar_type() == kHalf),
|
||||||
"Expected both inputs to be Half, Float or Double tensors but got ",
|
"Expected both inputs to be Half, Float or Double tensors but got ",
|
||||||
a.scalar_type(), " and ", b.scalar_type());
|
a.scalar_type(), " and ", b.scalar_type());
|
||||||
}
|
}
|
||||||
|
|
||||||
void complex_check_dtype(
|
static void complex_check_dtype(
|
||||||
const Tensor& result,
|
const Tensor& result,
|
||||||
const Tensor& a,
|
const Tensor& a,
|
||||||
const Tensor& b) {
|
const Tensor& b) {
|
||||||
@ -352,7 +352,12 @@ Tensor& empty_out(IntArrayRef size,
|
|||||||
return self.to(ScalarType::n, non_blocking); \
|
return self.to(ScalarType::n, non_blocking); \
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Some scalar types in CAST_OP have no declarations, they may be unused in Pytorch.
|
||||||
|
// But we keep them and ignore the warning here until verified in the future.
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, DEFINE_CAST_OP)
|
AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, DEFINE_CAST_OP)
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
|
||||||
#undef DEFINE_CAST_OP
|
#undef DEFINE_CAST_OP
|
||||||
|
|
||||||
|
@ -691,7 +691,7 @@ static void modified_bessel_k1_kernel(TensorIteratorBase& iterator) {
|
|||||||
|
|
||||||
#define IMPLEMENT_FLOAT_KERNEL(op) \
|
#define IMPLEMENT_FLOAT_KERNEL(op) \
|
||||||
inline namespace CPU_CAPABILITY { \
|
inline namespace CPU_CAPABILITY { \
|
||||||
void op##_kernel(TensorIteratorBase& iter) { \
|
static void op##_kernel(TensorIteratorBase& iter) { \
|
||||||
TORCH_INTERNAL_ASSERT(iter.ntensors() == 2); \
|
TORCH_INTERNAL_ASSERT(iter.ntensors() == 2); \
|
||||||
AT_DISPATCH_FLOATING_TYPES_AND2(kBFloat16, kHalf, iter.dtype(), #op "_vml_cpu", [&]() { \
|
AT_DISPATCH_FLOATING_TYPES_AND2(kBFloat16, kHalf, iter.dtype(), #op "_vml_cpu", [&]() { \
|
||||||
constexpr int64_t grain_size = 2048; \
|
constexpr int64_t grain_size = 2048; \
|
||||||
@ -715,6 +715,19 @@ static void modified_bessel_k1_kernel(TensorIteratorBase& iterator) {
|
|||||||
} \
|
} \
|
||||||
REGISTER_DISPATCH(op##_stub, &CPU_CAPABILITY::op##_kernel)
|
REGISTER_DISPATCH(op##_stub, &CPU_CAPABILITY::op##_kernel)
|
||||||
|
|
||||||
|
#define STATIC_IMPLEMENT_COMPLEX_KERNEL(op) \
|
||||||
|
inline namespace CPU_CAPABILITY { \
|
||||||
|
static void op##_kernel(TensorIteratorBase& iter) { \
|
||||||
|
TORCH_INTERNAL_ASSERT(iter.ntensors() == 2); \
|
||||||
|
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kBFloat16, kHalf, iter.dtype(), #op "_vml_cpu", [&]() { \
|
||||||
|
constexpr int64_t grain_size = 2048; \
|
||||||
|
iter.for_each(IMPLEMENT_ITERATOR_LAMBDA(op), grain_size); \
|
||||||
|
}); \
|
||||||
|
iter.cast_outputs(); \
|
||||||
|
} \
|
||||||
|
} \
|
||||||
|
REGISTER_DISPATCH(op##_stub, &CPU_CAPABILITY::op##_kernel)
|
||||||
|
|
||||||
} // CPU_CAPABILITY namespace
|
} // CPU_CAPABILITY namespace
|
||||||
|
|
||||||
REGISTER_DISPATCH(rsqrt_stub, &CPU_CAPABILITY::rsqrt_kernel);
|
REGISTER_DISPATCH(rsqrt_stub, &CPU_CAPABILITY::rsqrt_kernel);
|
||||||
@ -761,51 +774,28 @@ REGISTER_DISPATCH(special_modified_bessel_i1_stub, &CPU_CAPABILITY::modified_bes
|
|||||||
REGISTER_DISPATCH(special_modified_bessel_k0_stub, &CPU_CAPABILITY::modified_bessel_k0_kernel);
|
REGISTER_DISPATCH(special_modified_bessel_k0_stub, &CPU_CAPABILITY::modified_bessel_k0_kernel);
|
||||||
REGISTER_DISPATCH(special_modified_bessel_k1_stub, &CPU_CAPABILITY::modified_bessel_k1_kernel);
|
REGISTER_DISPATCH(special_modified_bessel_k1_stub, &CPU_CAPABILITY::modified_bessel_k1_kernel);
|
||||||
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(acos)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(acos)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(asin)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(atan)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(asin)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_COMPLEX_KERNEL(atan)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(ceil)
|
IMPLEMENT_FLOAT_KERNEL(ceil)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(cos)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(cos)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(erf)
|
IMPLEMENT_FLOAT_KERNEL(erf)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(erfc)
|
IMPLEMENT_FLOAT_KERNEL(erfc)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(erfinv)
|
IMPLEMENT_FLOAT_KERNEL(erfinv)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(exp)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(exp)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(expm1)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_COMPLEX_KERNEL(expm1)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(floor)
|
IMPLEMENT_FLOAT_KERNEL(floor)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(log)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(log)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(log10)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(log1p)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(log10)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(log2)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_COMPLEX_KERNEL(log1p)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_COMPLEX_KERNEL(log2)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(i0)
|
IMPLEMENT_FLOAT_KERNEL(i0)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(round)
|
IMPLEMENT_FLOAT_KERNEL(round)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(sin)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(sin)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_COMPLEX_KERNEL(sqrt)
|
IMPLEMENT_COMPLEX_KERNEL(sqrt)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(tan)
|
||||||
IMPLEMENT_COMPLEX_KERNEL(tan)
|
STATIC_IMPLEMENT_COMPLEX_KERNEL(tanh)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_COMPLEX_KERNEL(tanh)
|
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(trunc)
|
IMPLEMENT_FLOAT_KERNEL(trunc)
|
||||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables,modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays)
|
|
||||||
IMPLEMENT_FLOAT_KERNEL(lgamma)
|
IMPLEMENT_FLOAT_KERNEL(lgamma)
|
||||||
|
|
||||||
} // namespace at::native
|
} // namespace at::native
|
||||||
|
@ -27,19 +27,19 @@ Tensor mkldnn_convolution(
|
|||||||
TORCH_CHECK(false, "mkldnn_convolution_forward: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_forward: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor mkldnn_convolution_backward_input(
|
static Tensor mkldnn_convolution_backward_input(
|
||||||
IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight,
|
IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight,
|
||||||
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined) {
|
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined) {
|
||||||
TORCH_CHECK(false, "mkldnn_convolution_backward_input: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_backward_input: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Tensor, Tensor> mkldnn_convolution_backward_weights(
|
static std::tuple<Tensor, Tensor> mkldnn_convolution_backward_weights(
|
||||||
IntArrayRef weight_size, const Tensor& grad_output, const Tensor& input,
|
IntArrayRef weight_size, const Tensor& grad_output, const Tensor& input,
|
||||||
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined) {
|
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, bool bias_defined) {
|
||||||
TORCH_CHECK(false, "mkldnn_convolution_backward_weights: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_backward_weights: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Tensor, Tensor, Tensor> mkldnn_convolution_backward(
|
static std::tuple<Tensor, Tensor, Tensor> mkldnn_convolution_backward(
|
||||||
const Tensor& input, const Tensor& grad_output_t, const Tensor& weight,
|
const Tensor& input, const Tensor& grad_output_t, const Tensor& weight,
|
||||||
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, std::array<bool,3> output_mask) {
|
IntArrayRef padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups, std::array<bool,3> output_mask) {
|
||||||
TORCH_CHECK(false, "mkldnn_convolution_backward: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_backward: ATen not compiled with MKLDNN support");
|
||||||
@ -47,27 +47,27 @@ std::tuple<Tensor, Tensor, Tensor> mkldnn_convolution_backward(
|
|||||||
|
|
||||||
REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_backward_stub);
|
REGISTER_NO_CPU_DISPATCH(mkldnn_convolution_backward_stub);
|
||||||
|
|
||||||
Tensor mkldnn_convolution_transpose(
|
static Tensor mkldnn_convolution_transpose(
|
||||||
const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
|
const Tensor& input, const Tensor& weight, const c10::optional<Tensor>& bias_opt,
|
||||||
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups) {
|
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation, int64_t groups) {
|
||||||
TORCH_CHECK(false, "mkldnn_convolution_transpose: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_transpose: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor mkldnn_convolution_transpose_backward_input(
|
static Tensor mkldnn_convolution_transpose_backward_input(
|
||||||
IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight,
|
IntArrayRef input_size, const Tensor& grad_output, const Tensor& weight,
|
||||||
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
|
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
|
||||||
int64_t groups, bool bias_defined) {
|
int64_t groups, bool bias_defined) {
|
||||||
TORCH_CHECK(false, "mkldnn_convolution_transpose_backward_input: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_transpose_backward_input: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Tensor, Tensor> mkldnn_convolution_transpose_backward_weights(
|
static std::tuple<Tensor, Tensor> mkldnn_convolution_transpose_backward_weights(
|
||||||
IntArrayRef weight_size, const Tensor& grad_output, const Tensor& input,
|
IntArrayRef weight_size, const Tensor& grad_output, const Tensor& input,
|
||||||
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
|
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
|
||||||
int64_t groups, bool bias_defined) {
|
int64_t groups, bool bias_defined) {
|
||||||
TORCH_CHECK(false, "mkldnn_convolution_transpose_backward_weights: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_convolution_transpose_backward_weights: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Tensor, Tensor, Tensor> mkldnn_convolution_transpose_backward(
|
static std::tuple<Tensor, Tensor, Tensor> mkldnn_convolution_transpose_backward(
|
||||||
const Tensor& input, const Tensor& grad_output_t, const Tensor& weight,
|
const Tensor& input, const Tensor& grad_output_t, const Tensor& weight,
|
||||||
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
|
IntArrayRef padding, IntArrayRef output_padding, IntArrayRef stride, IntArrayRef dilation,
|
||||||
int64_t groups, std::array<bool,3> output_mask) {
|
int64_t groups, std::array<bool,3> output_mask) {
|
||||||
|
@ -6,6 +6,7 @@
|
|||||||
#ifndef AT_PER_OPERATOR_HEADERS
|
#ifndef AT_PER_OPERATOR_HEADERS
|
||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
#else
|
#else
|
||||||
|
#include <ATen/ops/_native_batch_norm_legit_native.h>
|
||||||
#include <ATen/ops/_to_dense_native.h>
|
#include <ATen/ops/_to_dense_native.h>
|
||||||
#include <ATen/ops/empty_native.h>
|
#include <ATen/ops/empty_native.h>
|
||||||
#include <ATen/ops/native_batch_norm_backward_native.h>
|
#include <ATen/ops/native_batch_norm_backward_native.h>
|
||||||
@ -34,7 +35,7 @@ std::tuple<Tensor, Tensor, Tensor> mkldnn_batch_norm_backward(
|
|||||||
TORCH_CHECK(false, "mkldnn_batch_norm_backward: ATen not compiled with MKLDNN support");
|
TORCH_CHECK(false, "mkldnn_batch_norm_backward: ATen not compiled with MKLDNN support");
|
||||||
}
|
}
|
||||||
|
|
||||||
std::tuple<Tensor, Tensor, Tensor> mkldnn_layer_norm_last_index_weight_bias_f32(
|
static std::tuple<Tensor, Tensor, Tensor> mkldnn_layer_norm_last_index_weight_bias_f32(
|
||||||
const Tensor& input,
|
const Tensor& input,
|
||||||
IntArrayRef normalized_shape, const Tensor& weight, const Tensor& bias,
|
IntArrayRef normalized_shape, const Tensor& weight, const Tensor& bias,
|
||||||
double eps, bool inplace) {
|
double eps, bool inplace) {
|
||||||
|
@ -106,7 +106,7 @@ namespace at {
|
|||||||
namespace native {
|
namespace native {
|
||||||
|
|
||||||
|
|
||||||
Tensor mkldnn_view_symint(const Tensor& self, c10::SymIntArrayRef size) {
|
static Tensor mkldnn_view_symint(const Tensor& self, c10::SymIntArrayRef size) {
|
||||||
return mkldnn_view(self, C10_AS_INTARRAYREF_SLOW(size));
|
return mkldnn_view(self, C10_AS_INTARRAYREF_SLOW(size));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,6 +5,7 @@
|
|||||||
#include <ATen/NativeFunctions.h>
|
#include <ATen/NativeFunctions.h>
|
||||||
#else
|
#else
|
||||||
#include <ATen/ops/is_nonzero_native.h>
|
#include <ATen/ops/is_nonzero_native.h>
|
||||||
|
#include <ATen/ops/_foobar_native.h>
|
||||||
#include <ATen/ops/_test_functorch_fallback_native.h>
|
#include <ATen/ops/_test_functorch_fallback_native.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
#include <ATen/ops/quantized_max_pool1d_native.h>
|
#include <ATen/ops/quantized_max_pool1d_native.h>
|
||||||
#include <ATen/ops/quantized_max_pool2d.h>
|
#include <ATen/ops/quantized_max_pool2d.h>
|
||||||
#include <ATen/ops/quantized_max_pool2d_native.h>
|
#include <ATen/ops/quantized_max_pool2d_native.h>
|
||||||
|
#include <ATen/ops/quantized_max_pool3d_native.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
|
@ -32,6 +32,7 @@
|
|||||||
#include <ATen/ops/_sparse_csr_tensor_unsafe_native.h>
|
#include <ATen/ops/_sparse_csr_tensor_unsafe_native.h>
|
||||||
#include <ATen/ops/_sparse_mm_reduce_impl_backward_native.h>
|
#include <ATen/ops/_sparse_mm_reduce_impl_backward_native.h>
|
||||||
#include <ATen/ops/_sparse_mm_reduce_impl_backward_native.h>
|
#include <ATen/ops/_sparse_mm_reduce_impl_backward_native.h>
|
||||||
|
#include <ATen/ops/_sparse_mm_reduce_impl_native.h>
|
||||||
#include <ATen/ops/_unique.h>
|
#include <ATen/ops/_unique.h>
|
||||||
#include <ATen/ops/abs.h>
|
#include <ATen/ops/abs.h>
|
||||||
#include <ATen/ops/abs_native.h>
|
#include <ATen/ops/abs_native.h>
|
||||||
@ -464,7 +465,7 @@ CREATE_UNARY_UFUNC(tan);
|
|||||||
CREATE_UNARY_UFUNC(tanh);
|
CREATE_UNARY_UFUNC(tanh);
|
||||||
CREATE_UNARY_UFUNC(trunc);
|
CREATE_UNARY_UFUNC(trunc);
|
||||||
CREATE_UNARY_UFUNC(conj_physical);
|
CREATE_UNARY_UFUNC(conj_physical);
|
||||||
CREATE_UNARY_UFUNC(relu);
|
static CREATE_UNARY_UFUNC(relu);
|
||||||
|
|
||||||
// With addition of `round.decimals` overload, using CREATE_UNARY_UFUNC leads
|
// With addition of `round.decimals` overload, using CREATE_UNARY_UFUNC leads
|
||||||
// to unresolved overload.
|
// to unresolved overload.
|
||||||
@ -776,18 +777,6 @@ Tensor _sparse_csr_mm(const Tensor& mat1, const Tensor& mat2) {
|
|||||||
1.0);
|
1.0);
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor _sparse_csr_addmm(
|
|
||||||
const Tensor& t,
|
|
||||||
const SparseCsrTensor& sparse,
|
|
||||||
const Tensor& dense,
|
|
||||||
const Scalar& beta,
|
|
||||||
const Scalar& alpha) {
|
|
||||||
// _sparse_addmm forward is functionally equivalent to addmm; it's
|
|
||||||
// just the backward that is different. This technically does an
|
|
||||||
// unnecessary redispatch, I was too lazy to make it not do that
|
|
||||||
return at::addmm(t, sparse, dense, beta, alpha);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Functions for element-wise addition.
|
// Functions for element-wise addition.
|
||||||
Tensor add_sparse_csr(
|
Tensor add_sparse_csr(
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
|
@ -188,7 +188,12 @@ COALESCED_UNARY_UFUNC(sqrt);
|
|||||||
COALESCED_UNARY_UFUNC(tan);
|
COALESCED_UNARY_UFUNC(tan);
|
||||||
COALESCED_UNARY_UFUNC(tanh);
|
COALESCED_UNARY_UFUNC(tanh);
|
||||||
COALESCED_UNARY_UFUNC(trunc);
|
COALESCED_UNARY_UFUNC(trunc);
|
||||||
|
// relu function has no declaration, it may be unused in Pytorch.
|
||||||
|
// But we keep it and ignore the warning here until verified in the future.
|
||||||
|
#pragma clang diagnostic push
|
||||||
|
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||||
COALESCED_UNARY_UFUNC(relu);
|
COALESCED_UNARY_UFUNC(relu);
|
||||||
|
#pragma clang diagnostic pop
|
||||||
|
|
||||||
COALESCED_UNARY_UFUNC_NO_INPLACE(signbit);
|
COALESCED_UNARY_UFUNC_NO_INPLACE(signbit);
|
||||||
COALESCED_UNARY_UFUNC_NO_INPLACE(isneginf);
|
COALESCED_UNARY_UFUNC_NO_INPLACE(isneginf);
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#ifdef USE_XNNPACK
|
#ifdef USE_XNNPACK
|
||||||
|
|
||||||
#include <ATen/native/xnnpack/Common.h>
|
#include <ATen/native/xnnpack/Common.h>
|
||||||
|
#include <ATen/native/xnnpack/Engine.h>
|
||||||
#include <ATen/native/utils/Factory.h>
|
#include <ATen/native/utils/Factory.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
@ -18,7 +19,7 @@ bool use_hardswish(
|
|||||||
true;
|
true;
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor& hardswish_impl(Tensor& input, Tensor& output) {
|
static Tensor& hardswish_impl(Tensor& input, Tensor& output) {
|
||||||
using namespace internal;
|
using namespace internal;
|
||||||
|
|
||||||
xnn_operator_t hardswish_op{};
|
xnn_operator_t hardswish_op{};
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
#ifdef USE_XNNPACK
|
#ifdef USE_XNNPACK
|
||||||
|
|
||||||
#include <ATen/native/xnnpack/Common.h>
|
|
||||||
#include <ATen/native/utils/Factory.h>
|
#include <ATen/native/utils/Factory.h>
|
||||||
|
#include <ATen/native/xnnpack/Common.h>
|
||||||
|
#include <ATen/native/xnnpack/Engine.h>
|
||||||
#include <ATen/native/xnnpack/Pooling.h>
|
#include <ATen/native/xnnpack/Pooling.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
@ -1,6 +1,7 @@
|
|||||||
#ifdef USE_XNNPACK
|
#ifdef USE_XNNPACK
|
||||||
|
|
||||||
#include <ATen/native/xnnpack/Common.h>
|
#include <ATen/native/xnnpack/Common.h>
|
||||||
|
#include <ATen/native/xnnpack/Engine.h>
|
||||||
#include <ATen/native/utils/Factory.h>
|
#include <ATen/native/utils/Factory.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
@ -2,11 +2,12 @@
|
|||||||
|
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <ATen/native/xnnpack/Common.h>
|
|
||||||
#include <ATen/native/ConvUtils.h>
|
#include <ATen/native/ConvUtils.h>
|
||||||
#include <ATen/native/utils/Factory.h>
|
#include <ATen/native/utils/Factory.h>
|
||||||
#include <ATen/native/utils/ParamUtils.h>
|
#include <ATen/native/utils/ParamUtils.h>
|
||||||
|
#include <ATen/native/xnnpack/Common.h>
|
||||||
#include <ATen/native/xnnpack/Convolution.h>
|
#include <ATen/native/xnnpack/Convolution.h>
|
||||||
|
#include <ATen/native/xnnpack/Engine.h>
|
||||||
#include <c10/util/irange.h>
|
#include <c10/util/irange.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
@ -62,6 +62,15 @@ Tensor run(ContextConv2D& context, const Tensor& input);
|
|||||||
|
|
||||||
} // namespace convolution2d
|
} // namespace convolution2d
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
|
Tensor convolution2d(
|
||||||
|
const Tensor& input,
|
||||||
|
const Tensor& weight,
|
||||||
|
const Tensor& bias,
|
||||||
|
const IntArrayRef padding,
|
||||||
|
const IntArrayRef stride,
|
||||||
|
const IntArrayRef dilation,
|
||||||
|
const int64_t groups);
|
||||||
} // namespace xnnpack
|
} // namespace xnnpack
|
||||||
} // namespace native
|
} // namespace native
|
||||||
} // namespace at
|
} // namespace at
|
||||||
|
@ -32,6 +32,16 @@ ContextLinear create(
|
|||||||
Tensor run(const ContextLinear& context, const Tensor& input);
|
Tensor run(const ContextLinear& context, const Tensor& input);
|
||||||
} // namespace linear
|
} // namespace linear
|
||||||
} // namespace internal
|
} // namespace internal
|
||||||
|
|
||||||
|
bool use_linear(
|
||||||
|
const Tensor& input,
|
||||||
|
const Tensor& weight,
|
||||||
|
const Tensor& bias);
|
||||||
|
|
||||||
|
Tensor linear(
|
||||||
|
const Tensor& input,
|
||||||
|
const Tensor& weight,
|
||||||
|
const Tensor& bias);
|
||||||
} // namespace xnnpack
|
} // namespace xnnpack
|
||||||
} // namespace native
|
} // namespace native
|
||||||
} // namespace at
|
} // namespace at
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
#ifdef USE_XNNPACK
|
#ifdef USE_XNNPACK
|
||||||
|
|
||||||
#include <ATen/native/Pool.h>
|
#include <ATen/native/Pool.h>
|
||||||
#include <ATen/native/xnnpack/Common.h>
|
|
||||||
#include <ATen/native/utils/Factory.h>
|
#include <ATen/native/utils/Factory.h>
|
||||||
|
#include <ATen/native/xnnpack/Common.h>
|
||||||
|
#include <ATen/native/xnnpack/Engine.h>
|
||||||
#include <ATen/native/xnnpack/Pooling.h>
|
#include <ATen/native/xnnpack/Pooling.h>
|
||||||
|
|
||||||
namespace at {
|
namespace at {
|
||||||
|
@ -18,6 +18,7 @@ namespace native {
|
|||||||
// This file contains a number of kernels for aten functions that are fully code-generated.
|
// This file contains a number of kernels for aten functions that are fully code-generated.
|
||||||
// TODO: rename this file to something more generic.
|
// TODO: rename this file to something more generic.
|
||||||
|
|
||||||
|
namespace {
|
||||||
at::Tensor clone_arg(const at::Tensor& t) {
|
at::Tensor clone_arg(const at::Tensor& t) {
|
||||||
return t.clone();
|
return t.clone();
|
||||||
}
|
}
|
||||||
@ -59,6 +60,7 @@ void resize_out_helper(const at::TensorList& dst, const at::TensorList& src) {
|
|||||||
at::native::resize_output(dst[i], src[i].sizes());
|
at::native::resize_output(dst[i], src[i].sizes());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
${CompositeViewCopyKernel_Definitions}
|
${CompositeViewCopyKernel_Definitions}
|
||||||
|
@ -22,9 +22,10 @@ class VulkanImplRegistrar {
|
|||||||
};
|
};
|
||||||
|
|
||||||
at::Tensor& vulkan_copy_(at::Tensor& self, const at::Tensor& src);
|
at::Tensor& vulkan_copy_(at::Tensor& self, const at::Tensor& src);
|
||||||
|
} // namespace vulkan
|
||||||
|
|
||||||
namespace native {
|
namespace native {
|
||||||
bool is_vulkan_available();
|
bool is_vulkan_available();
|
||||||
}// namespace native
|
}// namespace native
|
||||||
|
|
||||||
} // namespace vulkan
|
|
||||||
} // namespace at
|
} // namespace at
|
||||||
|
@ -62,6 +62,7 @@ class TORCH_API /*alignas(kCacheLineSize)*/ ThreadPool {
|
|||||||
size_t minWorkSize_;
|
size_t minWorkSize_;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
size_t getDefaultNumThreads();
|
||||||
} // namespace caffe2
|
} // namespace caffe2
|
||||||
|
|
||||||
#endif // CAFFE2_UTILS_THREADPOOL_H_
|
#endif // CAFFE2_UTILS_THREADPOOL_H_
|
||||||
|
@ -8,6 +8,7 @@
|
|||||||
#include <torch/csrc/autograd/VariableTypeUtils.h>
|
#include <torch/csrc/autograd/VariableTypeUtils.h>
|
||||||
#include <torch/csrc/autograd/autograd.h>
|
#include <torch/csrc/autograd/autograd.h>
|
||||||
#include <torch/csrc/autograd/functions/utils.h>
|
#include <torch/csrc/autograd/functions/utils.h>
|
||||||
|
#include <torch/csrc/autograd/generated/VariableType.h>
|
||||||
#include <torch/csrc/utils/memory.h>
|
#include <torch/csrc/utils/memory.h>
|
||||||
#include <torch/library.h>
|
#include <torch/library.h>
|
||||||
|
|
||||||
@ -22,7 +23,7 @@ namespace torch {
|
|||||||
namespace autograd {
|
namespace autograd {
|
||||||
namespace VariableType {
|
namespace VariableType {
|
||||||
|
|
||||||
std::vector<at::DeprecatedTypeProperties*> allTypesForBackends(
|
static std::vector<at::DeprecatedTypeProperties*> allTypesForBackends(
|
||||||
at::ArrayRef<at::Backend> backends) {
|
at::ArrayRef<at::Backend> backends) {
|
||||||
std::vector<DeprecatedTypeProperties*> res;
|
std::vector<DeprecatedTypeProperties*> res;
|
||||||
res.reserve(backends.size());
|
res.reserve(backends.size());
|
||||||
@ -37,16 +38,16 @@ std::vector<at::DeprecatedTypeProperties*> allTypesForBackends(
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
C10_EXPORT std::vector<at::DeprecatedTypeProperties*> allCPUTypes() {
|
std::vector<at::DeprecatedTypeProperties*> allCPUTypes() {
|
||||||
return allTypesForBackends({Backend::CPU, Backend::SparseCPU});
|
return allTypesForBackends({Backend::CPU, Backend::SparseCPU});
|
||||||
}
|
}
|
||||||
|
|
||||||
C10_EXPORT std::vector<at::DeprecatedTypeProperties*> allCUDATypes() {
|
std::vector<at::DeprecatedTypeProperties*> allCUDATypes() {
|
||||||
at::globalContext().lazyInitCUDA();
|
at::globalContext().lazyInitCUDA();
|
||||||
return allTypesForBackends({Backend::CUDA, Backend::SparseCUDA});
|
return allTypesForBackends({Backend::CUDA, Backend::SparseCUDA});
|
||||||
}
|
}
|
||||||
|
|
||||||
C10_EXPORT std::vector<at::DeprecatedTypeProperties*> allXPUTypes() {
|
std::vector<at::DeprecatedTypeProperties*> allXPUTypes() {
|
||||||
return allTypesForBackends({Backend::XPU, Backend::SparseXPU});
|
return allTypesForBackends({Backend::XPU, Backend::SparseXPU});
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -375,7 +376,7 @@ namespace ADInplaceOrView {
|
|||||||
: (at::GradMode::is_enabled() ? CreationMeta::DEFAULT \
|
: (at::GradMode::is_enabled() ? CreationMeta::DEFAULT \
|
||||||
: CreationMeta::NO_GRAD_MODE)
|
: CreationMeta::NO_GRAD_MODE)
|
||||||
|
|
||||||
Tensor& copy_(
|
static Tensor& copy_(
|
||||||
c10::DispatchKeySet ks,
|
c10::DispatchKeySet ks,
|
||||||
Tensor& self,
|
Tensor& self,
|
||||||
const Tensor& src,
|
const Tensor& src,
|
||||||
@ -389,7 +390,7 @@ Tensor& copy_(
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Tensor& resize_(
|
static const Tensor& resize_(
|
||||||
c10::DispatchKeySet ks,
|
c10::DispatchKeySet ks,
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
SymIntArrayRef size,
|
SymIntArrayRef size,
|
||||||
@ -413,7 +414,7 @@ const Tensor& resize_(
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
const Tensor& resize_as_(
|
static const Tensor& resize_as_(
|
||||||
c10::DispatchKeySet ks,
|
c10::DispatchKeySet ks,
|
||||||
const Tensor& self,
|
const Tensor& self,
|
||||||
const Tensor& the_template,
|
const Tensor& the_template,
|
||||||
@ -438,7 +439,7 @@ const Tensor& resize_as_(
|
|||||||
return self;
|
return self;
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor detach(c10::DispatchKeySet ks, const Tensor& self) {
|
static Tensor detach(c10::DispatchKeySet ks, const Tensor& self) {
|
||||||
auto out = ([&]() {
|
auto out = ([&]() {
|
||||||
at::AutoDispatchBelowADInplaceOrView guard;
|
at::AutoDispatchBelowADInplaceOrView guard;
|
||||||
return at::_ops::detach::redispatch(
|
return at::_ops::detach::redispatch(
|
||||||
@ -460,7 +461,10 @@ Tensor detach(c10::DispatchKeySet ks, const Tensor& self) {
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
Tensor _fw_primal(c10::DispatchKeySet ks, const Tensor& self, int64_t level) {
|
static Tensor _fw_primal(
|
||||||
|
c10::DispatchKeySet ks,
|
||||||
|
const Tensor& self,
|
||||||
|
int64_t level) {
|
||||||
auto tmp = ([&]() {
|
auto tmp = ([&]() {
|
||||||
at::AutoDispatchBelowADInplaceOrView guard;
|
at::AutoDispatchBelowADInplaceOrView guard;
|
||||||
return at::alias(self);
|
return at::alias(self);
|
||||||
@ -484,7 +488,7 @@ Tensor _fw_primal(c10::DispatchKeySet ks, const Tensor& self, int64_t level) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NB: This does not redispatch any further
|
// NB: This does not redispatch any further
|
||||||
Tensor _make_dual(
|
static Tensor _make_dual(
|
||||||
c10::DispatchKeySet ks,
|
c10::DispatchKeySet ks,
|
||||||
const Tensor& primal,
|
const Tensor& primal,
|
||||||
const Tensor& tangent,
|
const Tensor& tangent,
|
||||||
|
@ -40,6 +40,7 @@ extern "C" {
|
|||||||
// This function is needed to avoid superfluous dependency on GNU OpenMP library
|
// This function is needed to avoid superfluous dependency on GNU OpenMP library
|
||||||
// when cuPTI is linked statically For more details see
|
// when cuPTI is linked statically For more details see
|
||||||
// https://github.com/pytorch/pytorch/issues/51026
|
// https://github.com/pytorch/pytorch/issues/51026
|
||||||
|
__attribute__((weak)) int acc_get_device_type();
|
||||||
__attribute__((weak)) int acc_get_device_type() {
|
__attribute__((weak)) int acc_get_device_type() {
|
||||||
throw std::runtime_error(
|
throw std::runtime_error(
|
||||||
"Dummy implementation of acc_get_device_type is not supposed to be called!");
|
"Dummy implementation of acc_get_device_type is not supposed to be called!");
|
||||||
|
@ -62,6 +62,9 @@ namespace ops {
|
|||||||
// Below are ProcessGroup's corresponding ops for each backend. Ops are but
|
// Below are ProcessGroup's corresponding ops for each backend. Ops are but
|
||||||
// routed through the dispatcher to be dispatched to the appropriate backend.
|
// routed through the dispatcher to be dispatched to the appropriate backend.
|
||||||
// Currently a no-op as the process group does not have a list of backends.
|
// Currently a no-op as the process group does not have a list of backends.
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
#define IMPL_SEND(DEV) \
|
#define IMPL_SEND(DEV) \
|
||||||
c10::intrusive_ptr<Work> send##DEV( \
|
c10::intrusive_ptr<Work> send##DEV( \
|
||||||
at::TensorList tensors, \
|
at::TensorList tensors, \
|
||||||
@ -425,6 +428,7 @@ void monitored_barrier_CPU(
|
|||||||
BarrierOptions{device_ids, std::chrono::milliseconds(timeout)},
|
BarrierOptions{device_ids, std::chrono::milliseconds(timeout)},
|
||||||
wait_all_ranks);
|
wait_all_ranks);
|
||||||
}
|
}
|
||||||
|
} // namespace
|
||||||
|
|
||||||
// register functions to dispatcher
|
// register functions to dispatcher
|
||||||
namespace {
|
namespace {
|
||||||
|
@ -30,7 +30,7 @@ namespace jit {
|
|||||||
namespace mobile {
|
namespace mobile {
|
||||||
namespace nnc {
|
namespace nnc {
|
||||||
|
|
||||||
std::vector<int64_t> getConstSizes(const BufPtr b) {
|
static std::vector<int64_t> getConstSizes(const BufPtr b) {
|
||||||
std::vector<int64_t> r;
|
std::vector<int64_t> r;
|
||||||
for (const auto& dim : b->dims()) {
|
for (const auto& dim : b->dims()) {
|
||||||
LongImmPtr imm_dim = to<LongImm>(dim);
|
LongImmPtr imm_dim = to<LongImm>(dim);
|
||||||
@ -42,7 +42,7 @@ std::vector<int64_t> getConstSizes(const BufPtr b) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Construct input-specs vector from the inputs of the original graph
|
// Construct input-specs vector from the inputs of the original graph
|
||||||
std::vector<mobile::nnc::InputSpec> toInputSpecs(
|
static std::vector<mobile::nnc::InputSpec> toInputSpecs(
|
||||||
const std::shared_ptr<tensorexpr::TensorExprKernel>& kernel) {
|
const std::shared_ptr<tensorexpr::TensorExprKernel>& kernel) {
|
||||||
const std::shared_ptr<Graph>& g = kernel->graph();
|
const std::shared_ptr<Graph>& g = kernel->graph();
|
||||||
std::vector<mobile::nnc::InputSpec> specs;
|
std::vector<mobile::nnc::InputSpec> specs;
|
||||||
@ -89,7 +89,7 @@ std::vector<mobile::nnc::InputSpec> toInputSpecs(
|
|||||||
// If a symbolic shape can be found in several different positions, we
|
// If a symbolic shape can be found in several different positions, we
|
||||||
// return the first one we find (TODO: maybe we should return all and
|
// return the first one we find (TODO: maybe we should return all and
|
||||||
// verify that they all match at runtime).
|
// verify that they all match at runtime).
|
||||||
std::vector<SymbolicShapePosition> findSymbolicShapePositions(
|
static std::vector<SymbolicShapePosition> findSymbolicShapePositions(
|
||||||
std::shared_ptr<tensorexpr::TensorExprKernel> kernel) {
|
std::shared_ptr<tensorexpr::TensorExprKernel> kernel) {
|
||||||
std::vector<SymbolicShapePosition> res;
|
std::vector<SymbolicShapePosition> res;
|
||||||
for (int64_t sym_idx : kernel->getSymbolicShapeInputs()) {
|
for (int64_t sym_idx : kernel->getSymbolicShapeInputs()) {
|
||||||
@ -122,7 +122,7 @@ std::vector<SymbolicShapePosition> findSymbolicShapePositions(
|
|||||||
return res;
|
return res;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::unique_ptr<Function> compileMethod(
|
static std::unique_ptr<Function> compileMethod(
|
||||||
std::shared_ptr<tensorexpr::TensorExprKernel> kernel,
|
std::shared_ptr<tensorexpr::TensorExprKernel> kernel,
|
||||||
const std::string& method_name,
|
const std::string& method_name,
|
||||||
const std::vector<std::vector<int64_t>>& sizes,
|
const std::vector<std::vector<int64_t>>& sizes,
|
||||||
@ -181,7 +181,7 @@ std::unique_ptr<Function> compileMethod(
|
|||||||
return func;
|
return func;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
|
static std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
|
||||||
const std::string& method_name,
|
const std::string& method_name,
|
||||||
std::shared_ptr<Graph>& g,
|
std::shared_ptr<Graph>& g,
|
||||||
const std::vector<std::vector<int64_t>>& sizes,
|
const std::vector<std::vector<int64_t>>& sizes,
|
||||||
@ -217,7 +217,7 @@ std::pair<std::unique_ptr<Function>, const std::string> aotCompile(
|
|||||||
return std::make_pair(std::move(func), compiled_assembly);
|
return std::make_pair(std::move(func), compiled_assembly);
|
||||||
}
|
}
|
||||||
|
|
||||||
void writeOutputLlvmAssembly(
|
static void writeOutputLlvmAssembly(
|
||||||
const std::string& asm_code,
|
const std::string& asm_code,
|
||||||
const std::string& output_llvm_file_name) {
|
const std::string& output_llvm_file_name) {
|
||||||
std::ofstream output(output_llvm_file_name);
|
std::ofstream output(output_llvm_file_name);
|
||||||
@ -226,7 +226,7 @@ void writeOutputLlvmAssembly(
|
|||||||
"The compiled llvm assembly code was saved to ", output_llvm_file_name);
|
"The compiled llvm assembly code was saved to ", output_llvm_file_name);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::string> split(
|
static std::vector<std::string> split(
|
||||||
char separator,
|
char separator,
|
||||||
const std::string& string,
|
const std::string& string,
|
||||||
bool ignore_empty = true) {
|
bool ignore_empty = true) {
|
||||||
@ -241,7 +241,7 @@ std::vector<std::string> split(
|
|||||||
return pieces;
|
return pieces;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<std::vector<int64_t>> parseInputShapes(
|
static std::vector<std::vector<int64_t>> parseInputShapes(
|
||||||
const std::string& input_dims_s) {
|
const std::string& input_dims_s) {
|
||||||
std::vector<std::string> input_dims_list = split(';', input_dims_s);
|
std::vector<std::string> input_dims_list = split(';', input_dims_s);
|
||||||
std::vector<std::vector<int64_t>> inputs;
|
std::vector<std::vector<int64_t>> inputs;
|
||||||
@ -257,7 +257,7 @@ std::vector<std::vector<int64_t>> parseInputShapes(
|
|||||||
return inputs;
|
return inputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<at::ScalarType> parseInputTypes(
|
static std::vector<at::ScalarType> parseInputTypes(
|
||||||
const std::string& input_types_str) {
|
const std::string& input_types_str) {
|
||||||
std::vector<std::string> inputTypes = split(';', input_types_str);
|
std::vector<std::string> inputTypes = split(';', input_types_str);
|
||||||
std::vector<at::ScalarType> scalarTypes;
|
std::vector<at::ScalarType> scalarTypes;
|
||||||
@ -277,7 +277,7 @@ std::vector<at::ScalarType> parseInputTypes(
|
|||||||
return scalarTypes;
|
return scalarTypes;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<at::MemoryFormat> parseInputMemoryFormats(
|
static std::vector<at::MemoryFormat> parseInputMemoryFormats(
|
||||||
const std::string& input_memory_format_str) {
|
const std::string& input_memory_format_str) {
|
||||||
std::vector<std::string> memFormatsStr = split(';', input_memory_format_str);
|
std::vector<std::string> memFormatsStr = split(';', input_memory_format_str);
|
||||||
std::vector<at::MemoryFormat> memFormats;
|
std::vector<at::MemoryFormat> memFormats;
|
||||||
@ -295,7 +295,7 @@ std::vector<at::MemoryFormat> parseInputMemoryFormats(
|
|||||||
return memFormats;
|
return memFormats;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<int64_t> parseInputDynamicShapes(
|
static std::vector<int64_t> parseInputDynamicShapes(
|
||||||
const std::string& dynamic_dims_s) {
|
const std::string& dynamic_dims_s) {
|
||||||
std::vector<std::string> dynamic_dims_list = split(',', dynamic_dims_s);
|
std::vector<std::string> dynamic_dims_list = split(',', dynamic_dims_s);
|
||||||
std::vector<int64_t> dynamic_dims;
|
std::vector<int64_t> dynamic_dims;
|
||||||
@ -306,7 +306,7 @@ std::vector<int64_t> parseInputDynamicShapes(
|
|||||||
return dynamic_dims;
|
return dynamic_dims;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string getNncKernelId(
|
static std::string getNncKernelId(
|
||||||
const std::string& model_name,
|
const std::string& model_name,
|
||||||
const std::string& model_version,
|
const std::string& model_version,
|
||||||
const std::string& method_name) {
|
const std::string& method_name) {
|
||||||
@ -316,7 +316,7 @@ std::string getNncKernelId(
|
|||||||
version_token;
|
version_token;
|
||||||
}
|
}
|
||||||
|
|
||||||
std::string getNncKernelFuncName(
|
static std::string getNncKernelFuncName(
|
||||||
const std::string& model_name,
|
const std::string& model_name,
|
||||||
const std::string& model_version,
|
const std::string& model_version,
|
||||||
const std::string& method_name) {
|
const std::string& method_name) {
|
||||||
@ -325,7 +325,8 @@ std::string getNncKernelFuncName(
|
|||||||
|
|
||||||
// Preprocess the graph and returns the processed graph and
|
// Preprocess the graph and returns the processed graph and
|
||||||
// symbolic values if dynamic input shapes are specified
|
// symbolic values if dynamic input shapes are specified
|
||||||
std::pair<std::shared_ptr<Graph>, std::vector<int64_t>> preprocessGraphPasses(
|
static std::pair<std::shared_ptr<Graph>, std::vector<int64_t>>
|
||||||
|
preprocessGraphPasses(
|
||||||
std::shared_ptr<Graph>& graph,
|
std::shared_ptr<Graph>& graph,
|
||||||
const std::vector<c10::optional<at::Tensor>>& example_inputs,
|
const std::vector<c10::optional<at::Tensor>>& example_inputs,
|
||||||
const std::vector<int64_t>& dynamic_sizes) {
|
const std::vector<int64_t>& dynamic_sizes) {
|
||||||
@ -367,7 +368,7 @@ std::pair<std::shared_ptr<Graph>, std::vector<int64_t>> preprocessGraphPasses(
|
|||||||
return std::make_pair(graph, sym_val);
|
return std::make_pair(graph, sym_val);
|
||||||
}
|
}
|
||||||
|
|
||||||
std::vector<c10::optional<at::Tensor>> generateExampleInputs(
|
static std::vector<c10::optional<at::Tensor>> generateExampleInputs(
|
||||||
const std::vector<std::vector<int64_t>>& inputShapes,
|
const std::vector<std::vector<int64_t>>& inputShapes,
|
||||||
const std::vector<at::ScalarType>& inputTypes,
|
const std::vector<at::ScalarType>& inputTypes,
|
||||||
const std::vector<at::MemoryFormat>& inputMemoryFormats) {
|
const std::vector<at::MemoryFormat>& inputMemoryFormats) {
|
||||||
@ -382,7 +383,7 @@ std::vector<c10::optional<at::Tensor>> generateExampleInputs(
|
|||||||
return example_inputs;
|
return example_inputs;
|
||||||
}
|
}
|
||||||
|
|
||||||
c10::IValue preprocess(
|
static c10::IValue preprocess(
|
||||||
const torch::jit::Module& mod,
|
const torch::jit::Module& mod,
|
||||||
const c10::Dict<c10::IValue, c10::IValue>& compile_spec,
|
const c10::Dict<c10::IValue, c10::IValue>& compile_spec,
|
||||||
const torch::jit::BackendDebugHandleGenerator& generate_debug_handles) {
|
const torch::jit::BackendDebugHandleGenerator& generate_debug_handles) {
|
||||||
|
@ -95,7 +95,7 @@ struct LibraryInfo {
|
|||||||
EHFrameHdr eh_frame_hdr_;
|
EHFrameHdr eh_frame_hdr_;
|
||||||
};
|
};
|
||||||
|
|
||||||
const char* process_name() {
|
static const char* process_name() {
|
||||||
static char name[PATH_MAX + 1] = "";
|
static char name[PATH_MAX + 1] = "";
|
||||||
if (*name == '\0') {
|
if (*name == '\0') {
|
||||||
ssize_t len = readlink("/proc/self/exe", name, PATH_MAX);
|
ssize_t len = readlink("/proc/self/exe", name, PATH_MAX);
|
||||||
@ -267,6 +267,7 @@ struct UnwindCache {
|
|||||||
static UnwindCache unwind_cache;
|
static UnwindCache unwind_cache;
|
||||||
static std::shared_timed_mutex cache_mutex_;
|
static std::shared_timed_mutex cache_mutex_;
|
||||||
|
|
||||||
|
extern "C" void unwind_c(std::vector<void*>* result, int64_t rsp, int64_t rbp);
|
||||||
extern "C" void unwind_c(std::vector<void*>* result, int64_t rsp, int64_t rbp) {
|
extern "C" void unwind_c(std::vector<void*>* result, int64_t rsp, int64_t rbp) {
|
||||||
std::shared_lock lock(cache_mutex_);
|
std::shared_lock lock(cache_mutex_);
|
||||||
UnwindState state;
|
UnwindState state;
|
||||||
|
@ -467,6 +467,7 @@ TORCH_LIBRARY_IMPL(aten, $dispatch_key, m) {
|
|||||||
else:
|
else:
|
||||||
deferred_template = CodeTemplate(
|
deferred_template = CodeTemplate(
|
||||||
"""\
|
"""\
|
||||||
|
TORCH_API void Register${backend_name}${dispatch_key}NativeFunctions();
|
||||||
TORCH_API void Register${backend_name}${dispatch_key}NativeFunctions() {
|
TORCH_API void Register${backend_name}${dispatch_key}NativeFunctions() {
|
||||||
static auto m = MAKE_TORCH_LIBRARY_IMPL(aten, $dispatch_key);
|
static auto m = MAKE_TORCH_LIBRARY_IMPL(aten, $dispatch_key);
|
||||||
$dispatch_registrations_body
|
$dispatch_registrations_body
|
||||||
|
Reference in New Issue
Block a user