mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
fix missing-prototypes warnings in torch_cpu (Part 4) (#100849)
This PR fixes more missing-prototypes violations in the torch_cpu source following PRs #100053, #100147 and #100245 Pull Request resolved: https://github.com/pytorch/pytorch/pull/100849 Approved by: https://github.com/albanD
This commit is contained in:
@ -1597,6 +1597,7 @@ TORCH_COPTS = COMMON_COPTS + [
|
||||
"-fvisibility-inlines-hidden",
|
||||
"-fno-math-errno ",
|
||||
"-fno-trapping-math",
|
||||
"-Wno-error=unused-function",
|
||||
]
|
||||
|
||||
torch_sources = {
|
||||
|
@ -763,7 +763,7 @@ IValueComparator getGreaterThanComparator(const IValue& v) {
|
||||
};
|
||||
}
|
||||
|
||||
static std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
|
||||
std::ostream& operator<<(std::ostream& out, const ivalue::EnumHolder& v) {
|
||||
out << v.qualifiedClassName() << "." << v.name();
|
||||
return out;
|
||||
}
|
||||
|
@ -1627,7 +1627,7 @@ struct ivalue::EnumHolder : c10::intrusive_ptr_target {
|
||||
|
||||
TORCH_API friend std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const EnumHolder& v);
|
||||
const ivalue::EnumHolder& v);
|
||||
|
||||
TORCH_API const std::string qualifiedClassName() const;
|
||||
|
||||
|
@ -405,7 +405,7 @@ static std::tuple<Tensor,optional<int64_t>> searchsorted_batch_rule(
|
||||
TORCH_INTERNAL_ASSERT(false);
|
||||
}
|
||||
|
||||
Tensor bucketize_decomp_Tensor(
|
||||
static Tensor bucketize_decomp_Tensor(
|
||||
const Tensor& self,
|
||||
const Tensor& boundaries,
|
||||
bool out_int32,
|
||||
@ -415,7 +415,7 @@ Tensor bucketize_decomp_Tensor(
|
||||
return at::searchsorted(boundaries, self, out_int32, right, nullopt, nullopt);
|
||||
}
|
||||
|
||||
Tensor bucketize_decomp_Scalar(
|
||||
static Tensor bucketize_decomp_Scalar(
|
||||
const Scalar& self,
|
||||
const Tensor& boundaries,
|
||||
bool out_int32,
|
||||
|
@ -374,8 +374,8 @@ TORCH_IMPL_FUNC(softshrink_backward_out) (
|
||||
shrink_backward_stub(device_type(), *this, lambd);
|
||||
}
|
||||
|
||||
static bool use_mkldnn(const Tensor& input) {
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
static bool use_mkldnn(const Tensor& input) {
|
||||
if (!at::globalContext().userEnabledMkldnn()) {
|
||||
return false;
|
||||
}
|
||||
@ -386,9 +386,8 @@ static bool use_mkldnn(const Tensor& input) {
|
||||
(input.device().is_cpu() &&
|
||||
(((input.scalar_type() == kBFloat16) && mkldnn_bf16_device_check()) ||
|
||||
(input.scalar_type() == kFloat))); // input is dense layout and bfloat16/float32
|
||||
#endif
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
|
||||
TORCH_IMPL_FUNC(gelu_out_cpu) (
|
||||
const Tensor& self, c10::string_view approximate, const Tensor& result
|
||||
|
@ -809,7 +809,7 @@ Tensor& arctan2_out(const Tensor& self, const Tensor& other, Tensor& result) {
|
||||
return at::atan2_out(result, self, other);
|
||||
}
|
||||
|
||||
Tensor& add_relu_impl(
|
||||
static Tensor& add_relu_impl(
|
||||
Tensor& result, const Tensor& self, const Tensor& other, const Scalar& alpha) {
|
||||
auto iter = TensorIterator::binary_op(result, self, other);
|
||||
Scalar min_val;
|
||||
@ -1003,7 +1003,7 @@ Tensor& mul__scalar_sparse_csr(Tensor& self, const Scalar& other) {
|
||||
return self;
|
||||
}
|
||||
|
||||
Device correct_out_device(const Tensor& self, const Tensor& other) {
|
||||
static Device correct_out_device(const Tensor& self, const Tensor& other) {
|
||||
if (self.device() == at::kCPU){
|
||||
return other.device();
|
||||
} else {
|
||||
@ -1049,7 +1049,7 @@ Tensor div_zerotensor(const Tensor& self, const Tensor& other) {
|
||||
}
|
||||
}
|
||||
|
||||
Tensor maybe_add_maybe_sub(const Tensor& self, const Tensor& other, const Scalar& alpha) {
|
||||
static Tensor maybe_add_maybe_sub(const Tensor& self, const Tensor& other, const Scalar& alpha) {
|
||||
auto out_device = correct_out_device(self, other);
|
||||
// hack to use the TensorIterator to get the correct broadcasting and type promotion logic
|
||||
auto device_ = Device(DeviceType::Meta);
|
||||
|
@ -769,6 +769,7 @@ static void check_input_same_type_as_parameters(
|
||||
check_input_same_type_as_parameters(input, weight, /*bias=*/ Tensor());
|
||||
}
|
||||
|
||||
#if AT_MKLDNN_ENABLED()
|
||||
static void check_input_same_type_as_parameters(
|
||||
const Tensor& input,
|
||||
const Tensor& weight,
|
||||
@ -787,6 +788,7 @@ static void check_input_same_type_as_parameters(
|
||||
check_input_same_type_as_parameters(input, weight, bias);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static auto view4d(const at::Tensor& tensor) -> at::Tensor {
|
||||
TORCH_CHECK(tensor.ndimension() == 3,
|
||||
|
@ -21,6 +21,7 @@
|
||||
#include <ATen/NativeFunctions.h>
|
||||
#else
|
||||
#include <ATen/ops/_copy_from.h>
|
||||
#include <ATen/ops/_propagate_xla_data.h>
|
||||
#include <ATen/ops/copy_native.h>
|
||||
#include <ATen/ops/empty.h>
|
||||
#include <ATen/ops/expand_copy.h>
|
||||
|
@ -3,6 +3,11 @@
|
||||
#include <ATen/WrapDimUtils.h>
|
||||
#include <ATen/LegacyVmapTransforms.h>
|
||||
|
||||
#ifdef AT_PER_OPERATOR_HEADERS
|
||||
#include <ATen/ops/_add_batch_dim_native.h>
|
||||
#include <ATen/ops/_remove_batch_dim_native.h>
|
||||
#endif
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
// Adds a batch dimension to the tensor `self` out-of-place
|
||||
|
@ -1893,7 +1893,7 @@ The behavior depends on the dimensionality of the Tensors as follows:
|
||||
- Otherwise, we return bmm, after broadcasting and folding the batched dimensions if
|
||||
there's more than one
|
||||
*/
|
||||
Tensor _matmul_impl(
|
||||
static Tensor _matmul_impl(
|
||||
Tensor& out,
|
||||
const Tensor& tensor1,
|
||||
const Tensor& tensor2) {
|
||||
|
@ -20,7 +20,7 @@
|
||||
|
||||
namespace at { namespace native {
|
||||
|
||||
void checkLongTensor(const Tensor& tensor) {
|
||||
static void checkLongTensor(const Tensor& tensor) {
|
||||
TORCH_CHECK(tensor.dim() == 1 && tensor.device().type() == at::kCPU && tensor.scalar_type() == at::kLong,
|
||||
"'lengths' argument should be a 1D CPU int64 tensor, but got ",
|
||||
tensor.dim(), "D ", tensor.device().str(), " ", tensor.scalar_type(), " tensor");
|
||||
|
@ -1807,7 +1807,7 @@ std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data(
|
||||
std::move(std::get<2>(results)));
|
||||
}
|
||||
|
||||
std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
|
||||
static std::tuple<Tensor, Tensor, Tensor> quantized_lstm_data_legacy(
|
||||
const Tensor& data,
|
||||
const Tensor& batch_sizes,
|
||||
c10::List<at::Tensor> hx_,
|
||||
|
@ -11,6 +11,7 @@
|
||||
#include <ATen/ops/resize_as_native.h>
|
||||
#include <ATen/ops/resize_native.h>
|
||||
#include <ATen/ops/resize.h>
|
||||
#include <ATen/ops/_resize_output.h>
|
||||
#endif
|
||||
|
||||
namespace at { namespace native {
|
||||
|
@ -399,7 +399,7 @@ static void build_index_op(
|
||||
iter.build(config);
|
||||
}
|
||||
|
||||
void check_indices_on_cpu_or_selfdevice(
|
||||
static void check_indices_on_cpu_or_selfdevice(
|
||||
const Tensor& self,
|
||||
const at::MaterializedIOptTensorListRef& indices) {
|
||||
auto dev = self.device();
|
||||
@ -960,7 +960,7 @@ TORCH_IMPL_FUNC(index_add_cpu_out)
|
||||
}
|
||||
}
|
||||
|
||||
void index_reduce_func_impl(
|
||||
static void index_reduce_func_impl(
|
||||
const Tensor& self,
|
||||
int64_t dim,
|
||||
const Tensor& index,
|
||||
@ -1144,7 +1144,7 @@ static void check_indexarray_range(
|
||||
}
|
||||
}
|
||||
|
||||
Tensor & index_select_out_cpu_dim1_(
|
||||
static Tensor & index_select_out_cpu_dim1_(
|
||||
Tensor & result_contig, const Tensor & self, const Tensor & index_contig) {
|
||||
|
||||
auto self_contig = self.contiguous();
|
||||
@ -1374,10 +1374,6 @@ Tensor index_select_quantized_cpu_(const Tensor & self, int64_t dim, const Tenso
|
||||
return at::native::index_select_out_cpu_(self, dim, index, result);
|
||||
}
|
||||
|
||||
Tensor index_select_backward(const Tensor& grad, at::IntArrayRef self_sizes, int64_t dim, const Tensor& index) {
|
||||
return at::native::index_select_backward_symint(grad, c10::fromIntArrayRefSlow(self_sizes), dim, index);
|
||||
}
|
||||
|
||||
Tensor index_select_backward_symint(const Tensor& grad, c10::SymIntArrayRef self_sizes, int64_t dim, const Tensor& index) {
|
||||
// for composite compliance, use out-of-place variant of
|
||||
// `index_add` if index tensor is a Tensor Subclass.
|
||||
@ -1532,7 +1528,7 @@ static void scatter_reduce_exclude_self_helper(
|
||||
});
|
||||
}
|
||||
|
||||
void _scatter_via_index_put(
|
||||
static void _scatter_via_index_put(
|
||||
const Tensor& self,
|
||||
int64_t dim,
|
||||
const Tensor& index,
|
||||
|
@ -1009,7 +1009,7 @@ Tensor dense_to_sparse_bsc(const Tensor& self, IntArrayRef blocksize, c10::optio
|
||||
return dense_to_sparse_compressed<Layout::SparseBsc>(self, blocksize, dense_dim_opt);
|
||||
}
|
||||
|
||||
void _check_blocksize_matches(
|
||||
static void _check_blocksize_matches(
|
||||
const Tensor& self,
|
||||
c10::optional<IntArrayRef> blocksize_opt,
|
||||
const std::string& name) {
|
||||
@ -1023,7 +1023,7 @@ void _check_blocksize_matches(
|
||||
}
|
||||
}
|
||||
|
||||
Tensor sparse_compressed_clone(
|
||||
static Tensor sparse_compressed_clone(
|
||||
const Tensor& self,
|
||||
c10::optional<IntArrayRef> blocksize,
|
||||
const std::string& name) {
|
||||
@ -1046,7 +1046,7 @@ Tensor sparse_compressed_clone(
|
||||
values.device());
|
||||
}
|
||||
|
||||
Tensor sparse_compressed_to_flipped(
|
||||
static Tensor sparse_compressed_to_flipped(
|
||||
const Tensor& self,
|
||||
c10::optional<IntArrayRef> blocksize,
|
||||
const std::string& name) {
|
||||
|
@ -1,5 +1,6 @@
|
||||
#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
|
||||
#include <ATen/core/Tensor.h>
|
||||
#include <ATen/native/Unfold3d.h>
|
||||
#include <ATen/Config.h>
|
||||
#include <ATen/Dispatch.h>
|
||||
#include <ATen/Parallel.h>
|
||||
|
@ -10,6 +10,8 @@
|
||||
#else
|
||||
#include <ATen/ops/_weight_norm_differentiable_backward_native.h>
|
||||
#include <ATen/ops/_weight_norm_interface.h>
|
||||
#include <ATen/ops/_weight_norm_interface_backward_native.h>
|
||||
#include <ATen/ops/_weight_norm_interface_native.h>
|
||||
#include <ATen/ops/_weight_norm_native.h>
|
||||
#include <ATen/ops/empty_strided.h>
|
||||
#include <ATen/ops/norm_except_dim.h>
|
||||
|
@ -13,7 +13,7 @@ namespace at::native {
|
||||
|
||||
inline namespace CPU_CAPABILITY {
|
||||
|
||||
void pow_tensor_tensor_kernel(TensorIteratorBase& iter) {
|
||||
static void pow_tensor_tensor_kernel(TensorIteratorBase& iter) {
|
||||
const auto dtype = iter.common_dtype();
|
||||
if (isFloatingType(dtype) || isComplexType(dtype)) {
|
||||
AT_DISPATCH_FLOATING_AND_COMPLEX_TYPES_AND2(kHalf, kBFloat16, dtype, "pow", [&]() {
|
||||
@ -90,7 +90,7 @@ void reciprocal_kernel(TensorIteratorBase& iter);
|
||||
void rsqrt_kernel(TensorIteratorBase& iter);
|
||||
void sqrt_kernel(TensorIteratorBase& iter);
|
||||
|
||||
void pow_tensor_scalar_kernel(
|
||||
static void pow_tensor_scalar_kernel(
|
||||
TensorIteratorBase& iter,
|
||||
const Scalar& exp_scalar) {
|
||||
// prevent multiple calls to iter.common_dtype()
|
||||
|
@ -32,6 +32,7 @@ namespace mkl {
|
||||
|
||||
namespace {
|
||||
|
||||
#if AT_USE_MKL_SPARSE()
|
||||
c10::MaybeOwned<Tensor> prepare_dense_matrix_for_mkl(
|
||||
const Tensor& tensor) {
|
||||
if (tensor.is_non_overlapping_and_dense() ||
|
||||
@ -110,7 +111,6 @@ void inline col_indices_and_values_resize_(const Tensor& input, int64_t nnz) {
|
||||
/*
|
||||
Resizes `input` tensor and fills it with the data from MKL.
|
||||
*/
|
||||
#if AT_USE_MKL_SPARSE()
|
||||
template <typename scalar_t>
|
||||
void mkl_result_copy_(const Tensor& input, sparse_matrix_t mkl_desc) {
|
||||
sparse_index_base_t indexing = SPARSE_INDEX_BASE_ZERO;
|
||||
|
@ -6,7 +6,7 @@
|
||||
namespace at {
|
||||
namespace native {
|
||||
|
||||
TensorOptions verify_empty_parameters(
|
||||
static TensorOptions verify_empty_parameters(
|
||||
const at::Tensor& self,
|
||||
c10::optional<ScalarType> dtype,
|
||||
c10::optional<Layout> layout,
|
||||
|
@ -78,64 +78,6 @@ Tensor bmm_nested(const Tensor& self, const Tensor& mat2) {
|
||||
return output;
|
||||
}
|
||||
|
||||
// utilities support `matmul_nested`
|
||||
namespace {
|
||||
// Args:
|
||||
// self_sizes: the sizes of `self` in `matmul_nested`
|
||||
// mat2_sizes: the sizes of `mat2` in `matmul_nested`
|
||||
// buffer_op: the options for new buffer
|
||||
// sizemat_op: the options for new size matrix
|
||||
// Returns:
|
||||
// the batch size of each input underlying tensor, i.e. the product of batch-dimension sizes
|
||||
// the empty output nested tensor
|
||||
inline std::tuple<std::vector<int64_t>, Tensor>
|
||||
matmul_nested_helper(
|
||||
const std::vector<IntArrayRef>& self_sizes,
|
||||
const std::vector<IntArrayRef>& mat2_sizes,
|
||||
const c10::TensorOptions& buffer_op,
|
||||
const c10::TensorOptions& sizemat_op) {
|
||||
int64_t ntensors = self_sizes.size(),
|
||||
ndims = self_sizes[0].size();
|
||||
std::vector<int64_t> batch_sizes(ntensors, 1);
|
||||
Tensor sizemat = at::empty({ntensors, ndims}, sizemat_op);
|
||||
int64_t* sizemat_ptr = sizemat.mutable_data_ptr<int64_t>();
|
||||
int64_t numel = 0;
|
||||
for (int64_t i = 0; i < ntensors; i++) {
|
||||
const IntArrayRef& self_size = self_sizes[i],
|
||||
& mat2_size = mat2_sizes[i];
|
||||
int64_t& batch_size = batch_sizes[i];
|
||||
// batch dimensions
|
||||
for (int64_t j = 0; j < ndims - 2; j++) {
|
||||
const int64_t& self_sizej = self_size[j],
|
||||
& mat2_sizej = mat2_size[j];
|
||||
TORCH_CHECK(
|
||||
self_sizej == mat2_sizej,
|
||||
"matmul: For nested tensors, no broadcasting is currently performed: ",
|
||||
i, "-th nested matrices in batch at dimension ", j + 1,
|
||||
" have mismatching sizes ", self_sizej, " and ", mat2_sizej);
|
||||
sizemat_ptr[j] = self_sizej;
|
||||
batch_size *= sizemat_ptr[j];
|
||||
}
|
||||
// matrix multiplication dimensions
|
||||
const int64_t& self_size0 = self_size[ndims - 2], & self_size1 = self_size[ndims - 1],
|
||||
& mat2_size0 = mat2_size[ndims - 2], & mat2_size1 = mat2_size[ndims - 1];
|
||||
TORCH_CHECK(
|
||||
self_size1 == mat2_size0,
|
||||
"matmul: ",
|
||||
i, "-th nested matrices in batch cannot be multiplied (",
|
||||
self_size0, "x", self_size1, " and ",
|
||||
mat2_size0, "x", mat2_size1, ")");
|
||||
sizemat_ptr[ndims - 2] = self_size0;
|
||||
sizemat_ptr[ndims - 1] = mat2_size1;
|
||||
sizemat_ptr += ndims;
|
||||
numel += batch_size * self_size0 * mat2_size1;
|
||||
}
|
||||
Tensor buffer = at::empty(numel, buffer_op);
|
||||
Tensor output = wrap_buffer(buffer, sizemat);
|
||||
return std::make_tuple(batch_sizes, output);
|
||||
}
|
||||
}
|
||||
|
||||
Tensor matmul_with_bmm_nested(const Tensor& self, const Tensor& mat2) {
|
||||
// Tensor self = self_.contiguous();
|
||||
// Tensor mat2 = mat2_.contiguous();
|
||||
|
@ -128,7 +128,7 @@ Tensor fake_quantize_per_channel_affine_cachemask_backward(
|
||||
return dY * mask;
|
||||
}
|
||||
|
||||
Tensor _get_rounded_zero_point(
|
||||
static Tensor _get_rounded_zero_point(
|
||||
const Tensor& zero_point,
|
||||
int64_t quant_min,
|
||||
int64_t quant_max) {
|
||||
|
@ -133,7 +133,7 @@ Tensor fake_quantize_per_tensor_affine_cachemask_backward(
|
||||
return dY * mask;
|
||||
}
|
||||
|
||||
int64_t _get_zero_point_from_tensor(
|
||||
static int64_t _get_zero_point_from_tensor(
|
||||
const Tensor& zero_point,
|
||||
int64_t quant_min,
|
||||
int64_t quant_max,
|
||||
|
@ -286,7 +286,7 @@ std::tuple<double, int64_t> _choose_qparams_per_tensor(
|
||||
return std::make_tuple(q_params.scale, q_params.zero_point);
|
||||
}
|
||||
|
||||
float calculate_quant_loss(
|
||||
static float calculate_quant_loss(
|
||||
const float* input,
|
||||
int numel,
|
||||
float xmin,
|
||||
|
@ -171,15 +171,6 @@ Tensor mean_quantized_cpu(
|
||||
return result;
|
||||
}
|
||||
|
||||
Tensor mean_quantized_cpu(
|
||||
const Tensor& self,
|
||||
DimnameList dim,
|
||||
bool keepdim,
|
||||
optional<ScalarType> dtype) {
|
||||
return mean_quantized_cpu(
|
||||
self, dimnames_to_positions(self, dim), keepdim, dtype);
|
||||
}
|
||||
|
||||
Tensor& mean_out_quantized_cpu(
|
||||
Tensor& result,
|
||||
const Tensor& self,
|
||||
|
@ -9,7 +9,7 @@ namespace native {
|
||||
|
||||
DEFINE_DISPATCH(qdropout_stub);
|
||||
|
||||
Tensor quantized_dropout(
|
||||
static Tensor quantized_dropout(
|
||||
const Tensor& qx, double output_scale, int64_t output_zero_point, const Scalar& p, bool training) {
|
||||
return qx;
|
||||
}
|
||||
|
@ -35,7 +35,7 @@ DEFINE_DISPATCH(qrelu_leaky_stub);
|
||||
DEFINE_DISPATCH(qprelu_stub);
|
||||
|
||||
#ifdef USE_PYTORCH_QNNPACK
|
||||
Tensor qnnpack_relu(Tensor input) {
|
||||
static Tensor qnnpack_relu(Tensor input) {
|
||||
Tensor qy;
|
||||
TORCH_CHECK(
|
||||
input.ndimension() > 0, "qnnpack_relu(): Got empty input tensor");
|
||||
|
@ -122,7 +122,7 @@ bool solve_arange(const Tensor& input, int64_t& start, int64_t& end, int64_t& st
|
||||
formats with support to batched and dense dimensions.
|
||||
*/
|
||||
|
||||
void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_indices, const Tensor& plain_indices, const Tensor& values, const IntArrayRef size, const Layout& layout) {
|
||||
static void _validate_sparse_compressed_tensor_args_worker(const Tensor& compressed_indices, const Tensor& plain_indices, const Tensor& values, const IntArrayRef size, const Layout& layout) {
|
||||
// Layout must be Sparse Compressed, 2.4
|
||||
AT_DISPATCH_ALL_SPARSE_COMPRESSED_LAYOUTS(layout, "validate_sparse_compressed_tensor_args", [&]{});
|
||||
|
||||
@ -321,7 +321,7 @@ void _validate_sparse_bsc_tensor_args(const Tensor& ccol_indices, const Tensor&
|
||||
// of historical reasons (that ought to be removed in future) and does
|
||||
// not mean that the corresponding functionality would be CSR layout
|
||||
// only specific.
|
||||
SparseCsrTensor new_compressed_tensor(const TensorOptions& options) {
|
||||
static SparseCsrTensor new_compressed_tensor(const TensorOptions& options) {
|
||||
// TODO: remove this comment after enabling autograd support for CSR tensor
|
||||
// constructor.
|
||||
// TORCH_INTERNAL_ASSERT(impl::variable_excluded_from_dispatch());
|
||||
@ -401,7 +401,7 @@ SPARSE_COMPRESSED_TENSOR_UNSAFE(csc, kSparseCsc);
|
||||
SPARSE_COMPRESSED_TENSOR_UNSAFE(bsr, kSparseBsr);
|
||||
SPARSE_COMPRESSED_TENSOR_UNSAFE(bsc, kSparseBsc);
|
||||
|
||||
DimVector _estimate_sparse_compressed_tensor_size(
|
||||
static DimVector _estimate_sparse_compressed_tensor_size(
|
||||
const Tensor& compressed_indices,
|
||||
const Tensor& plain_indices,
|
||||
const Tensor& values,
|
||||
@ -716,12 +716,6 @@ int64_t dense_dim_sparse_csr(const SparseCsrTensor& self) {
|
||||
return get_sparse_csr_impl(self)->dense_dim();
|
||||
}
|
||||
|
||||
bool _is_same_size_as_sparse_compressed(
|
||||
const SparseCsrTensor& self,
|
||||
const SparseCsrTensor& src) {
|
||||
return self.sizes().equals(src.sizes());
|
||||
}
|
||||
|
||||
const SparseCsrTensor& resize_as_sparse_compressed_(
|
||||
const SparseCsrTensor& self,
|
||||
const SparseCsrTensor& src) {
|
||||
|
@ -341,16 +341,6 @@ inline Tensor get_result_tensor_for_unary_op(F op, const Tensor& input) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
static constexpr bool is_mkl_supported() {
|
||||
#ifdef _MSC_VER
|
||||
return false;
|
||||
#elif __APPLE__ || __MACH__
|
||||
return false;
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
// Only accept squares sparse matrices or dense input as a vector
|
||||
// TODO: Check what happens with MKL, the output error reported with non square
|
||||
// matrices tends to be high See:
|
||||
|
@ -78,20 +78,6 @@
|
||||
namespace at::native {
|
||||
|
||||
using namespace at::sparse;
|
||||
// --------------------------------------------------------------------
|
||||
// Utility functions
|
||||
// --------------------------------------------------------------------
|
||||
|
||||
namespace {
|
||||
|
||||
inline SparseTensor get_result_tensor_for_unary_op(const SparseTensor& input) {
|
||||
if (c10::isIntegralType(input.scalar_type(), /*includeBool=*/true)) {
|
||||
return at::empty_like(input, input.options().dtype(c10::get_default_dtype()));
|
||||
}
|
||||
return at::empty_like(input);
|
||||
}
|
||||
}
|
||||
|
||||
// --------------------------------------------------------------------
|
||||
// zero_(SparseTensor)
|
||||
// --------------------------------------------------------------------
|
||||
|
@ -16,7 +16,7 @@ nnapi_wrapper* nnapi;
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables)
|
||||
nnapi_wrapper* check_nnapi;
|
||||
|
||||
void load_platform_library() {
|
||||
static void load_platform_library() {
|
||||
static int run_once = [](){
|
||||
nnapi_wrapper_load(&nnapi, &check_nnapi);
|
||||
CAFFE_ENFORCE(nnapi);
|
||||
|
@ -23,7 +23,7 @@
|
||||
static int loaded = 0;
|
||||
static struct nnapi_wrapper nnapi_;
|
||||
static struct nnapi_wrapper check_nnapi_;
|
||||
int check__getDeviceCount(uint32_t* numDevices) {
|
||||
static int check__getDeviceCount(uint32_t* numDevices) {
|
||||
CAFFE_ENFORCE(nnapi_._getDeviceCount);
|
||||
int ret = nnapi_._getDeviceCount(numDevices);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -33,7 +33,7 @@ int check__getDeviceCount(uint32_t* numDevices) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check__getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) {
|
||||
static int check__getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) {
|
||||
CAFFE_ENFORCE(nnapi_._getDevice);
|
||||
int ret = nnapi_._getDevice(devIndex,device);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -43,7 +43,7 @@ int check__getDevice(uint32_t devIndex, ANeuralNetworksDevice** device) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Device_getName(const ANeuralNetworksDevice* device, const char** name) {
|
||||
static int check_Device_getName(const ANeuralNetworksDevice* device, const char** name) {
|
||||
CAFFE_ENFORCE(nnapi_.Device_getName);
|
||||
int ret = nnapi_.Device_getName(device,name);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -53,7 +53,7 @@ int check_Device_getName(const ANeuralNetworksDevice* device, const char** name)
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Device_getVersion(const ANeuralNetworksDevice* device, const char** version) {
|
||||
static int check_Device_getVersion(const ANeuralNetworksDevice* device, const char** version) {
|
||||
CAFFE_ENFORCE(nnapi_.Device_getVersion);
|
||||
int ret = nnapi_.Device_getVersion(device,version);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -63,7 +63,7 @@ int check_Device_getVersion(const ANeuralNetworksDevice* device, const char** ve
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Device_getFeatureLevel(const ANeuralNetworksDevice* device, int64_t* featureLevel) {
|
||||
static int check_Device_getFeatureLevel(const ANeuralNetworksDevice* device, int64_t* featureLevel) {
|
||||
CAFFE_ENFORCE(nnapi_.Device_getFeatureLevel);
|
||||
int ret = nnapi_.Device_getFeatureLevel(device,featureLevel);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -73,7 +73,7 @@ int check_Device_getFeatureLevel(const ANeuralNetworksDevice* device, int64_t* f
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_getSupportedOperationsForDevices( const ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, bool* supportedOps) {
|
||||
static int check_Model_getSupportedOperationsForDevices( const ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, bool* supportedOps) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_getSupportedOperationsForDevices);
|
||||
int ret = nnapi_.Model_getSupportedOperationsForDevices(model,devices,numDevices,supportedOps);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -83,7 +83,7 @@ int check_Model_getSupportedOperationsForDevices( const ANeuralNetworksModel* mo
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Compilation_createForDevices(ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, ANeuralNetworksCompilation** compilation) {
|
||||
static int check_Compilation_createForDevices(ANeuralNetworksModel* model, const ANeuralNetworksDevice* const* devices, uint32_t numDevices, ANeuralNetworksCompilation** compilation) {
|
||||
CAFFE_ENFORCE(nnapi_.Compilation_createForDevices);
|
||||
int ret = nnapi_.Compilation_createForDevices(model,devices,numDevices,compilation);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -93,7 +93,7 @@ int check_Compilation_createForDevices(ANeuralNetworksModel* model, const ANeura
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_compute(ANeuralNetworksExecution* execution) {
|
||||
static int check_Execution_compute(ANeuralNetworksExecution* execution) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_compute);
|
||||
int ret = nnapi_.Execution_compute(execution);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -103,7 +103,7 @@ int check_Execution_compute(ANeuralNetworksExecution* execution) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Memory_createFromFd(size_t size, int protect, int fd, size_t offset, ANeuralNetworksMemory** memory) {
|
||||
static int check_Memory_createFromFd(size_t size, int protect, int fd, size_t offset, ANeuralNetworksMemory** memory) {
|
||||
CAFFE_ENFORCE(nnapi_.Memory_createFromFd);
|
||||
int ret = nnapi_.Memory_createFromFd(size,protect,fd,offset,memory);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -113,11 +113,11 @@ int check_Memory_createFromFd(size_t size, int protect, int fd, size_t offset, A
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
void check_Memory_free(ANeuralNetworksMemory* memory) {
|
||||
static void check_Memory_free(ANeuralNetworksMemory* memory) {
|
||||
CAFFE_ENFORCE(nnapi_.Memory_free);
|
||||
nnapi_.Memory_free(memory);
|
||||
}
|
||||
int check_Model_create(ANeuralNetworksModel** model) {
|
||||
static int check_Model_create(ANeuralNetworksModel** model) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_create);
|
||||
int ret = nnapi_.Model_create(model);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -127,11 +127,11 @@ int check_Model_create(ANeuralNetworksModel** model) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
void check_Model_free(ANeuralNetworksModel* model) {
|
||||
static void check_Model_free(ANeuralNetworksModel* model) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_free);
|
||||
nnapi_.Model_free(model);
|
||||
}
|
||||
int check_Model_finish(ANeuralNetworksModel* model) {
|
||||
static int check_Model_finish(ANeuralNetworksModel* model) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_finish);
|
||||
int ret = nnapi_.Model_finish(model);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -141,7 +141,7 @@ int check_Model_finish(ANeuralNetworksModel* model) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_addOperand(ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
|
||||
static int check_Model_addOperand(ANeuralNetworksModel* model, const ANeuralNetworksOperandType* type) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_addOperand);
|
||||
int ret = nnapi_.Model_addOperand(model,type);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -151,7 +151,7 @@ int check_Model_addOperand(ANeuralNetworksModel* model, const ANeuralNetworksOpe
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_setOperandValue(ANeuralNetworksModel* model, int32_t index, const void* buffer, size_t length) {
|
||||
static int check_Model_setOperandValue(ANeuralNetworksModel* model, int32_t index, const void* buffer, size_t length) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_setOperandValue);
|
||||
int ret = nnapi_.Model_setOperandValue(model,index,buffer,length);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -161,7 +161,7 @@ int check_Model_setOperandValue(ANeuralNetworksModel* model, int32_t index, cons
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
|
||||
static int check_Model_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t index, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_setOperandValueFromMemory);
|
||||
int ret = nnapi_.Model_setOperandValueFromMemory(model,index,memory,offset,length);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -171,7 +171,7 @@ int check_Model_setOperandValueFromMemory(ANeuralNetworksModel* model, int32_t i
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_addOperation(ANeuralNetworksModel* model, ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
|
||||
static int check_Model_addOperation(ANeuralNetworksModel* model, ANeuralNetworksOperationType type, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_addOperation);
|
||||
int ret = nnapi_.Model_addOperation(model,type,inputCount,inputs,outputCount,outputs);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -181,7 +181,7 @@ int check_Model_addOperation(ANeuralNetworksModel* model, ANeuralNetworksOperati
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
|
||||
static int check_Model_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t inputCount, const uint32_t* inputs, uint32_t outputCount, const uint32_t* outputs) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_identifyInputsAndOutputs);
|
||||
int ret = nnapi_.Model_identifyInputsAndOutputs(model,inputCount,inputs,outputCount,outputs);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -191,7 +191,7 @@ int check_Model_identifyInputsAndOutputs(ANeuralNetworksModel* model, uint32_t i
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Model_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bool allow) {
|
||||
static int check_Model_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bool allow) {
|
||||
CAFFE_ENFORCE(nnapi_.Model_relaxComputationFloat32toFloat16);
|
||||
int ret = nnapi_.Model_relaxComputationFloat32toFloat16(model,allow);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -201,7 +201,7 @@ int check_Model_relaxComputationFloat32toFloat16(ANeuralNetworksModel* model, bo
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Compilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
|
||||
static int check_Compilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompilation** compilation) {
|
||||
CAFFE_ENFORCE(nnapi_.Compilation_create);
|
||||
int ret = nnapi_.Compilation_create(model,compilation);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -211,11 +211,11 @@ int check_Compilation_create(ANeuralNetworksModel* model, ANeuralNetworksCompila
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
void check_Compilation_free(ANeuralNetworksCompilation* compilation) {
|
||||
static void check_Compilation_free(ANeuralNetworksCompilation* compilation) {
|
||||
CAFFE_ENFORCE(nnapi_.Compilation_free);
|
||||
nnapi_.Compilation_free(compilation);
|
||||
}
|
||||
int check_Compilation_setPreference(ANeuralNetworksCompilation* compilation, int32_t preference) {
|
||||
static int check_Compilation_setPreference(ANeuralNetworksCompilation* compilation, int32_t preference) {
|
||||
CAFFE_ENFORCE(nnapi_.Compilation_setPreference);
|
||||
int ret = nnapi_.Compilation_setPreference(compilation,preference);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -225,7 +225,7 @@ int check_Compilation_setPreference(ANeuralNetworksCompilation* compilation, int
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Compilation_finish(ANeuralNetworksCompilation* compilation) {
|
||||
static int check_Compilation_finish(ANeuralNetworksCompilation* compilation) {
|
||||
CAFFE_ENFORCE(nnapi_.Compilation_finish);
|
||||
int ret = nnapi_.Compilation_finish(compilation);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -235,7 +235,7 @@ int check_Compilation_finish(ANeuralNetworksCompilation* compilation) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_create(ANeuralNetworksCompilation* compilation, ANeuralNetworksExecution** execution) {
|
||||
static int check_Execution_create(ANeuralNetworksCompilation* compilation, ANeuralNetworksExecution** execution) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_create);
|
||||
int ret = nnapi_.Execution_create(compilation,execution);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -245,11 +245,11 @@ int check_Execution_create(ANeuralNetworksCompilation* compilation, ANeuralNetwo
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
void check_Execution_free(ANeuralNetworksExecution* execution) {
|
||||
static void check_Execution_free(ANeuralNetworksExecution* execution) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_free);
|
||||
nnapi_.Execution_free(execution);
|
||||
}
|
||||
int check_Execution_setInput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
|
||||
static int check_Execution_setInput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const void* buffer, size_t length) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_setInput);
|
||||
int ret = nnapi_.Execution_setInput(execution,index,type,buffer,length);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -259,7 +259,7 @@ int check_Execution_setInput(ANeuralNetworksExecution* execution, int32_t index,
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
|
||||
static int check_Execution_setInputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_setInputFromMemory);
|
||||
int ret = nnapi_.Execution_setInputFromMemory(execution,index,type,memory,offset,length);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -269,7 +269,7 @@ int check_Execution_setInputFromMemory(ANeuralNetworksExecution* execution, int3
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_setOutput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
|
||||
static int check_Execution_setOutput(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, void* buffer, size_t length) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_setOutput);
|
||||
int ret = nnapi_.Execution_setOutput(execution,index,type,buffer,length);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -279,7 +279,7 @@ int check_Execution_setOutput(ANeuralNetworksExecution* execution, int32_t index
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
|
||||
static int check_Execution_setOutputFromMemory(ANeuralNetworksExecution* execution, int32_t index, const ANeuralNetworksOperandType* type, const ANeuralNetworksMemory* memory, size_t offset, size_t length) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_setOutputFromMemory);
|
||||
int ret = nnapi_.Execution_setOutputFromMemory(execution,index,type,memory,offset,length);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -289,7 +289,7 @@ int check_Execution_setOutputFromMemory(ANeuralNetworksExecution* execution, int
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_startCompute(ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
|
||||
static int check_Execution_startCompute(ANeuralNetworksExecution* execution, ANeuralNetworksEvent** event) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_startCompute);
|
||||
int ret = nnapi_.Execution_startCompute(execution,event);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -299,7 +299,7 @@ int check_Execution_startCompute(ANeuralNetworksExecution* execution, ANeuralNet
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Event_wait(ANeuralNetworksEvent* event) {
|
||||
static int check_Event_wait(ANeuralNetworksEvent* event) {
|
||||
CAFFE_ENFORCE(nnapi_.Event_wait);
|
||||
int ret = nnapi_.Event_wait(event);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -309,11 +309,11 @@ int check_Event_wait(ANeuralNetworksEvent* event) {
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
void check_Event_free(ANeuralNetworksEvent* event) {
|
||||
static void check_Event_free(ANeuralNetworksEvent* event) {
|
||||
CAFFE_ENFORCE(nnapi_.Event_free);
|
||||
nnapi_.Event_free(event);
|
||||
}
|
||||
int check_Execution_getOutputOperandRank(ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank) {
|
||||
static int check_Execution_getOutputOperandRank(ANeuralNetworksExecution* execution, int32_t index, uint32_t* rank) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_getOutputOperandRank);
|
||||
int ret = nnapi_.Execution_getOutputOperandRank(execution,index,rank);
|
||||
// TODO: Maybe add better logging here.
|
||||
@ -323,7 +323,7 @@ int check_Execution_getOutputOperandRank(ANeuralNetworksExecution* execution, in
|
||||
);
|
||||
return ret;
|
||||
}
|
||||
int check_Execution_getOutputOperandDimensions(ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions) {
|
||||
static int check_Execution_getOutputOperandDimensions(ANeuralNetworksExecution* execution, int32_t index, uint32_t* dimensions) {
|
||||
CAFFE_ENFORCE(nnapi_.Execution_getOutputOperandDimensions);
|
||||
int ret = nnapi_.Execution_getOutputOperandDimensions(execution,index,dimensions);
|
||||
// TODO: Maybe add better logging here.
|
||||
|
@ -83,7 +83,7 @@ QTensorImpl* get_qtensorimpl(const TensorBase& self) {
|
||||
return static_cast<QTensorImpl*>(self.unsafeGetTensorImpl());
|
||||
}
|
||||
|
||||
int64_t get_sub_byte_tensor_size(IntArrayRef sizes, size_t dtype_itemsize, at::ScalarType t) {
|
||||
static int64_t get_sub_byte_tensor_size(IntArrayRef sizes, size_t dtype_itemsize, at::ScalarType t) {
|
||||
// NOLINTNEXTLINE(cppcoreguidelines-init-variables)
|
||||
int64_t element_per_byte;
|
||||
switch(t) {
|
||||
@ -178,7 +178,7 @@ Tensor PerTensorAffineQuantizer::quantize(const Tensor& rtensor) {
|
||||
return qtensor;
|
||||
}
|
||||
|
||||
void per_tensor_affine_dequantize_impl(
|
||||
static void per_tensor_affine_dequantize_impl(
|
||||
Tensor& rtensor,
|
||||
const Tensor& qtensor,
|
||||
const double scale,
|
||||
@ -228,7 +228,7 @@ Tensor PerChannelAffineQuantizer::quantize(const Tensor& rtensor) {
|
||||
return qtensor;
|
||||
}
|
||||
|
||||
void per_channel_affine_dequantize_impl(
|
||||
static void per_channel_affine_dequantize_impl(
|
||||
Tensor& rtensor,
|
||||
const Tensor& qtensor,
|
||||
const Tensor& scale,
|
||||
@ -278,7 +278,7 @@ Tensor PerChannelAffineFloatQParamsQuantizer::quantize(const Tensor& rtensor) {
|
||||
return qtensor;
|
||||
}
|
||||
|
||||
void per_channel_affine_float_q_params_dequantize_impl(
|
||||
static void per_channel_affine_float_q_params_dequantize_impl(
|
||||
Tensor& rtensor,
|
||||
const Tensor& qtensor,
|
||||
const Tensor& scale,
|
||||
|
@ -22,6 +22,9 @@ class VulkanImplRegistrar {
|
||||
};
|
||||
|
||||
at::Tensor& vulkan_copy_(at::Tensor& self, const at::Tensor& src);
|
||||
namespace native {
|
||||
bool is_vulkan_available();
|
||||
}// namespace native
|
||||
|
||||
} // namespace vulkan
|
||||
} // namespace at
|
||||
|
@ -17,7 +17,7 @@
|
||||
|
||||
namespace F = torch::nn::functional;
|
||||
|
||||
F::PadFuncOptions::mode_t _get_pad_mode_from_conv_padding_mode(
|
||||
static F::PadFuncOptions::mode_t _get_pad_mode_from_conv_padding_mode(
|
||||
torch::nn::detail::conv_padding_mode_t conv_padding_mode) {
|
||||
F::PadFuncOptions::mode_t pad_mode;
|
||||
if (c10::get_if<torch::enumtype::kReflect>(&conv_padding_mode)) {
|
||||
|
@ -28,7 +28,7 @@ namespace nn {
|
||||
/// https://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#cudnnRNNMode_t
|
||||
enum class CuDNNMode { RNN_RELU = 0, RNN_TANH = 1, LSTM = 2, GRU = 3 };
|
||||
|
||||
CuDNNMode get_cudnn_mode_for_rnn(
|
||||
static CuDNNMode get_cudnn_mode_for_rnn(
|
||||
detail::RNNOptionsBase::rnn_options_base_mode_t mode) {
|
||||
if (c10::get_if<enumtype::kRNN_RELU>(&mode)) {
|
||||
return CuDNNMode::RNN_RELU;
|
||||
@ -43,7 +43,7 @@ CuDNNMode get_cudnn_mode_for_rnn(
|
||||
}
|
||||
}
|
||||
|
||||
Tensor apply_permutation(
|
||||
static Tensor apply_permutation(
|
||||
const Tensor& tensor,
|
||||
const Tensor& permutation,
|
||||
int64_t dim = 1) {
|
||||
@ -397,8 +397,8 @@ template class RNNImplBase<RNNImpl>;
|
||||
|
||||
// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RNN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
||||
|
||||
detail::RNNOptionsBase::rnn_options_base_mode_t compute_rnn_options_base_mode(
|
||||
RNNOptions::nonlinearity_t nonlinearity) {
|
||||
static detail::RNNOptionsBase::rnn_options_base_mode_t
|
||||
compute_rnn_options_base_mode(RNNOptions::nonlinearity_t nonlinearity) {
|
||||
if (c10::get_if<enumtype::kTanh>(&nonlinearity)) {
|
||||
return torch::kRNN_TANH;
|
||||
} else if (c10::get_if<enumtype::kReLU>(&nonlinearity)) {
|
||||
|
@ -187,7 +187,7 @@ std::tuple<double, Tensor> LBFGS::_directional_evaluate(
|
||||
return std::make_tuple(loss, flat_grad);
|
||||
}
|
||||
|
||||
double _cubic_interpolate(
|
||||
static double _cubic_interpolate(
|
||||
double x1,
|
||||
double f1,
|
||||
double g1,
|
||||
@ -236,7 +236,7 @@ using Function = std::function<std::tuple<double, Tensor>(
|
||||
const std::vector<Tensor>& x,
|
||||
double t,
|
||||
const Tensor& d)>;
|
||||
std::tuple<double, Tensor, double, int64_t> _strong_wolfe(
|
||||
static std::tuple<double, Tensor, double, int64_t> _strong_wolfe(
|
||||
const Function& obj_func,
|
||||
const std::vector<Tensor>& x,
|
||||
double t,
|
||||
|
@ -13,7 +13,7 @@
|
||||
|
||||
namespace c10d {
|
||||
|
||||
ProcessGroup::BackendType strToBackendType(std::string backend) {
|
||||
static ProcessGroup::BackendType strToBackendType(std::string backend) {
|
||||
if (backend == "undefined") {
|
||||
return ProcessGroup::BackendType::UNDEFINED;
|
||||
} else if (backend == "gloo") {
|
||||
@ -29,7 +29,7 @@ ProcessGroup::BackendType strToBackendType(std::string backend) {
|
||||
}
|
||||
}
|
||||
|
||||
std::string backendTypeToStr(ProcessGroup::BackendType backendType) {
|
||||
static std::string backendTypeToStr(ProcessGroup::BackendType backendType) {
|
||||
switch (backendType) {
|
||||
case ProcessGroup::BackendType::UNDEFINED:
|
||||
return "undefined";
|
||||
|
@ -2596,7 +2596,7 @@ c10::intrusive_ptr<Work> ProcessGroupGloo::alltoall_base(
|
||||
return work;
|
||||
}
|
||||
|
||||
at::Tensor& checkSingleTensor(std::vector<at::Tensor>& tensors) {
|
||||
static at::Tensor& checkSingleTensor(std::vector<at::Tensor>& tensors) {
|
||||
if (tensors.size() != 1) {
|
||||
TORCH_CHECK(false, "ProcessGroupGloo::send takes a single tensor");
|
||||
}
|
||||
@ -2610,7 +2610,7 @@ at::Tensor& checkSingleTensor(std::vector<at::Tensor>& tensors) {
|
||||
return tensor;
|
||||
}
|
||||
|
||||
uint32_t checkTag(int32_t tag) {
|
||||
static uint32_t checkTag(int32_t tag) {
|
||||
TORCH_CHECK(tag >= 0, "Tag must be nonnegative");
|
||||
return (uint32_t)tag;
|
||||
}
|
||||
|
@ -9,7 +9,7 @@ namespace quantization {
|
||||
|
||||
// TODO: The kernels are copied from fbgemm_gpu, we should dedup them later
|
||||
|
||||
void FloatToBFloat16Quantized_ref(
|
||||
static void FloatToBFloat16Quantized_ref(
|
||||
const float* const input,
|
||||
const size_t nrows,
|
||||
const size_t ncols,
|
||||
@ -26,7 +26,7 @@ void FloatToBFloat16Quantized_ref(
|
||||
}
|
||||
}
|
||||
|
||||
void BFloat16QuantizedToFloat_ref(
|
||||
static void BFloat16QuantizedToFloat_ref(
|
||||
const at::BFloat16* const input,
|
||||
const size_t nrows,
|
||||
const size_t ncols,
|
||||
|
@ -41,7 +41,7 @@ std::unordered_map<std::string, worker_id_t> collectNames(
|
||||
return nameToId;
|
||||
}
|
||||
|
||||
std::vector<std::string> splitString(
|
||||
static std::vector<std::string> splitString(
|
||||
const std::string& s,
|
||||
const std::string& delim) {
|
||||
std::vector<std::string> tokens;
|
||||
@ -154,7 +154,7 @@ const string storeKeyActiveCallCount = "ACTIVE_CALLS";
|
||||
const string storeKeyReady = "READY";
|
||||
static std::atomic<int> barrierId(0);
|
||||
|
||||
std::tuple<std::string, std::string, std::string> getNextKeyIds() {
|
||||
static std::tuple<std::string, std::string, std::string> getNextKeyIds() {
|
||||
barrierId++;
|
||||
std::string processCountKey =
|
||||
fmt::format("{}{}{}", storeKeyProcessCount, storeKeyBarrierId, barrierId);
|
||||
|
@ -7,7 +7,7 @@ namespace torch {
|
||||
namespace distributed {
|
||||
namespace rpc {
|
||||
|
||||
std::string fromVecToString(const std::vector<char>& vec) {
|
||||
static std::string fromVecToString(const std::vector<char>& vec) {
|
||||
return std::string(vec.begin(), vec.end());
|
||||
}
|
||||
|
||||
|
@ -1,25 +1,25 @@
|
||||
#include <c10/macros/Export.h>
|
||||
#include <ittnotify.h>
|
||||
#include <torch/csrc/itt_wrapper.h>
|
||||
#include <torch/csrc/profiler/stubs/base.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
__itt_domain* _itt_domain = __itt_domain_create("PyTorch");
|
||||
|
||||
TORCH_API bool itt_is_available() {
|
||||
bool itt_is_available() {
|
||||
return torch::profiler::impl::ittStubs()->enabled();
|
||||
}
|
||||
|
||||
TORCH_API void itt_range_push(const char* msg) {
|
||||
void itt_range_push(const char* msg) {
|
||||
__itt_string_handle* hsMsg = __itt_string_handle_create(msg);
|
||||
__itt_task_begin(_itt_domain, __itt_null, __itt_null, hsMsg);
|
||||
}
|
||||
|
||||
TORCH_API void itt_range_pop() {
|
||||
void itt_range_pop() {
|
||||
__itt_task_end(_itt_domain);
|
||||
}
|
||||
|
||||
TORCH_API void itt_mark(const char* msg) {
|
||||
void itt_mark(const char* msg) {
|
||||
__itt_string_handle* hsMsg = __itt_string_handle_create(msg);
|
||||
__itt_task_begin(_itt_domain, __itt_null, __itt_null, hsMsg);
|
||||
__itt_task_end(_itt_domain);
|
||||
|
@ -1,12 +1,13 @@
|
||||
#ifndef PROFILER_ITT_H
|
||||
#define PROFILER_ITT_H
|
||||
#include <c10/macros/Export.h>
|
||||
|
||||
namespace torch {
|
||||
namespace profiler {
|
||||
bool itt_is_available();
|
||||
void itt_range_push(const char* msg);
|
||||
void itt_range_pop();
|
||||
void itt_mark(const char* msg);
|
||||
TORCH_API bool itt_is_available();
|
||||
TORCH_API void itt_range_push(const char* msg);
|
||||
TORCH_API void itt_range_pop();
|
||||
TORCH_API void itt_mark(const char* msg);
|
||||
} // namespace profiler
|
||||
} // namespace torch
|
||||
|
||||
|
@ -55,7 +55,7 @@ T& toGraphFunctionImpl(F& function) {
|
||||
|
||||
} // namespace
|
||||
|
||||
void placeholderCreator(GraphFunction&) {
|
||||
static void placeholderCreator(GraphFunction&) {
|
||||
throw RecursiveMethodCallError();
|
||||
}
|
||||
|
||||
|
@ -163,7 +163,7 @@ void Module::to(at::Device device, bool non_blocking) {
|
||||
to_impl(device, /*dtype=*/c10::nullopt, non_blocking);
|
||||
}
|
||||
|
||||
void module_state_to(
|
||||
static void module_state_to(
|
||||
const autograd::Variable& variable,
|
||||
const c10::optional<at::Device>& device,
|
||||
const c10::optional<at::ScalarType>& dtype,
|
||||
|
@ -53,7 +53,8 @@ bool hasFusionBackend(at::Device::Type backend_type) {
|
||||
return getFusionBackends().count(backend_type);
|
||||
}
|
||||
|
||||
const FusedKernelConstructor& getConstructor(at::Device::Type backend_type) {
|
||||
static const FusedKernelConstructor& getConstructor(
|
||||
at::Device::Type backend_type) {
|
||||
std::lock_guard<std::mutex> guard(fusionBackendLock());
|
||||
return getFusionBackends().at(backend_type);
|
||||
}
|
||||
|
@ -190,7 +190,7 @@ static void compressContiguous(
|
||||
|
||||
// Launches the requested fusion on the given device with the given inputs.
|
||||
// Output pointers are stored in outputs (to be put on the stack later).
|
||||
void launchFusion(
|
||||
static void launchFusion(
|
||||
const FusedKernel& fusion,
|
||||
const at::Device device,
|
||||
const at::ArrayRef<at::Tensor>& inputs,
|
||||
|
@ -12,7 +12,7 @@ namespace torch::jit {
|
||||
// Transforms a Loop that has both a trip count specified and a loop
|
||||
// body condition so that the iter count is no longer specified
|
||||
// and it is recognizable as a python while loop.
|
||||
void canonicalizeModifiedLoop(Node* n) {
|
||||
static void canonicalizeModifiedLoop(Node* n) {
|
||||
LoopView loop(n);
|
||||
if (loop.loopType() != LoopView::ModifiedLoop) {
|
||||
return;
|
||||
@ -48,7 +48,7 @@ void canonicalizeModifiedLoop(Node* n) {
|
||||
loop.bodyBlock()->insertOutput(0, new_condition);
|
||||
}
|
||||
|
||||
void canonicalizeModifiedLoops(Block* block) {
|
||||
static void canonicalizeModifiedLoops(Block* block) {
|
||||
for (Node* n : block->nodes()) {
|
||||
for (Block* b : n->blocks()) {
|
||||
canonicalizeModifiedLoops(b);
|
||||
|
@ -522,7 +522,7 @@ struct ExitTransformer {
|
||||
std::shared_ptr<Graph> graph_;
|
||||
};
|
||||
|
||||
bool inlineConsecutiveIfs(Node* node) {
|
||||
static bool inlineConsecutiveIfs(Node* node) {
|
||||
if (node->kind() != prim::If || node->next()->kind() != prim::If) {
|
||||
return false;
|
||||
}
|
||||
@ -605,7 +605,7 @@ bool inlineConsecutiveIfs(Node* node) {
|
||||
// return 1
|
||||
// else:
|
||||
// return 2
|
||||
void inlineConsecutiveIfs(Block* block) {
|
||||
static void inlineConsecutiveIfs(Block* block) {
|
||||
for (auto it = block->nodes().begin(), end = block->nodes().end();
|
||||
it != end;) {
|
||||
for (Block* b : it->blocks()) {
|
||||
|
@ -30,7 +30,7 @@ void InlineBlockBeforeNode(Node* before_node, Block* block) {
|
||||
// <body>
|
||||
// BlockExit(continue_condition, loop_carried_block*)
|
||||
// }
|
||||
void inlineLoopCondition(Node* n) {
|
||||
static void inlineLoopCondition(Node* n) {
|
||||
Block* body_block = n->blocks().at(0);
|
||||
|
||||
auto pre_header = n->blocks().at(1);
|
||||
@ -45,7 +45,7 @@ void inlineLoopCondition(Node* n) {
|
||||
n->eraseBlock(1);
|
||||
}
|
||||
|
||||
void inlineLoopCondition(Block* block) {
|
||||
static void inlineLoopCondition(Block* block) {
|
||||
for (Node* n : block->nodes()) {
|
||||
for (Block* b : n->blocks()) {
|
||||
inlineLoopCondition(b);
|
||||
|
@ -187,7 +187,7 @@ struct CondValue {
|
||||
};
|
||||
|
||||
enum NoneStatus { ALWAYS, MAYBE, NEVER };
|
||||
NoneStatus canBeNone(Value* v) {
|
||||
static NoneStatus canBeNone(Value* v) {
|
||||
if (v->node()->mustBeNone()) {
|
||||
return ALWAYS;
|
||||
}
|
||||
@ -5605,7 +5605,7 @@ std::vector<Function*> CompilationUnit::define(
|
||||
self);
|
||||
}
|
||||
|
||||
void eraseListLiterals(std::shared_ptr<Graph>& graph) {
|
||||
static void eraseListLiterals(std::shared_ptr<Graph>& graph) {
|
||||
DepthFirstGraphNodeIterator it(graph);
|
||||
|
||||
for (auto next_node = it.next(); next_node != nullptr;) {
|
||||
|
@ -548,17 +548,6 @@ MatchedSchema matchSchema(
|
||||
throw ErrorReport(loc) << failure_messages.str();
|
||||
}
|
||||
|
||||
MatchedSchema matchSchema(
|
||||
const ::c10::FunctionSchema& schema,
|
||||
const SourceRange& loc,
|
||||
Graph& graph,
|
||||
at::ArrayRef<Value*> args,
|
||||
at::ArrayRef<NamedValue> kwargs) {
|
||||
std::vector<NamedValue> named_args =
|
||||
fmap(args, [](Value* v) { return NamedValue(v); });
|
||||
return matchSchema(schema, loc, graph, named_args, kwargs);
|
||||
}
|
||||
|
||||
static std::string prefixLine(
|
||||
const std::string& str,
|
||||
const std::string& prefix) {
|
||||
|
@ -110,7 +110,7 @@ void TracingState::delValue(const IValue& var) {
|
||||
Value* getValueTrace(const IValue& var) {
|
||||
return getTracingState()->getValue(var);
|
||||
}
|
||||
Value* getOptTensorValueTrace(const c10::optional<at::Tensor>& var) {
|
||||
static Value* getOptTensorValueTrace(const c10::optional<at::Tensor>& var) {
|
||||
return getValueTrace(IValue(var));
|
||||
}
|
||||
Value* TracingState::getValue(const IValue& var) {
|
||||
@ -783,19 +783,6 @@ void addInputs(
|
||||
n->addInput(list_node->output());
|
||||
}
|
||||
|
||||
void addInputs(
|
||||
Node* n,
|
||||
const char* name,
|
||||
c10::optional<caffe2::TypeMeta> opt_dtype) {
|
||||
if (opt_dtype.has_value()) {
|
||||
return addInputs(n, name, at::typeMetaToScalarType(*opt_dtype));
|
||||
} else {
|
||||
Graph* g = n->owningGraph();
|
||||
Value* none = g->insertNode(g->createNone())->output();
|
||||
n->addInput(none);
|
||||
}
|
||||
}
|
||||
|
||||
void addInputs(Node* n, const char* name, at::IntArrayRef value) {
|
||||
using ArgumentStash = jit::tracer::ArgumentStash;
|
||||
std::vector<Value*> info = ArgumentStash::hasIntArrayRef(name)
|
||||
@ -1062,7 +1049,7 @@ void ArgumentStash::stashValue(
|
||||
// Stack trace recording
|
||||
////////////////////////////////////////////////////////////////////////////////
|
||||
// no python present so we just do not record source information
|
||||
void defaultRecordSourceLocation(Node* n) {}
|
||||
static void defaultRecordSourceLocation(Node* n) {}
|
||||
std::atomic<decltype(&defaultRecordSourceLocation)> record_source_location(
|
||||
defaultRecordSourceLocation);
|
||||
void recordSourceLocation(Node* n) {
|
||||
@ -1072,7 +1059,7 @@ void setRecordSourceLocation(void (*v)(Node*)) {
|
||||
record_source_location.store(v);
|
||||
}
|
||||
|
||||
std::vector<StackEntry> defaultPythonCallstack() {
|
||||
static std::vector<StackEntry> defaultPythonCallstack() {
|
||||
return std::vector<StackEntry>();
|
||||
}
|
||||
std::atomic<decltype(&defaultPythonCallstack)> python_callstack_fn(
|
||||
@ -1084,7 +1071,7 @@ void setPythonCallstack(std::vector<StackEntry> (*v)()) {
|
||||
python_callstack_fn.store(v);
|
||||
}
|
||||
|
||||
void defaultWarn(const std::string& str) {
|
||||
static void defaultWarn(const std::string& str) {
|
||||
TORCH_WARN(str);
|
||||
}
|
||||
std::atomic<warn_fn_type> warn_callback{defaultWarn};
|
||||
|
@ -8,13 +8,13 @@
|
||||
|
||||
namespace torch::jit {
|
||||
|
||||
bool insertableTensor(const at::Tensor& ten) {
|
||||
static bool insertableTensor(const at::Tensor& ten) {
|
||||
// bail if tensor has no storage i.e. opaque tensor used in MKLdnn.
|
||||
// or gradients because we have no way of serializing them & are mutable
|
||||
return !ten.requires_grad() && ten.has_storage() && !ten.is_nested();
|
||||
}
|
||||
|
||||
bool insertableIValue(const IValue& ivalue) {
|
||||
static bool insertableIValue(const IValue& ivalue) {
|
||||
if (ivalue.isInt() || ivalue.isNone() || ivalue.isBool() ||
|
||||
ivalue.isDouble() || ivalue.isComplexDouble() || ivalue.isString() ||
|
||||
ivalue.isDevice() || ivalue.isEnum()) {
|
||||
|
@ -122,13 +122,15 @@ static std::ostream& printValueRefs(
|
||||
// Can't make these two overloads directly a template, it'll be ambiguous with
|
||||
// the global printer for operator<<.
|
||||
|
||||
std::ostream& operator<<(
|
||||
static std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const at::ArrayRef<const Value*> nodes) {
|
||||
return printValueRefs(out, nodes);
|
||||
}
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const at::ArrayRef<Value*> nodes) {
|
||||
static std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const at::ArrayRef<Value*> nodes) {
|
||||
return printValueRefs(out, nodes);
|
||||
}
|
||||
|
||||
@ -141,7 +143,7 @@ struct const_value_list_with_types {
|
||||
: values(values), delim(std::move(delim_)) {}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(
|
||||
static std::ostream& operator<<(
|
||||
std::ostream& out,
|
||||
const const_value_list_with_types& l) {
|
||||
size_t i = 0;
|
||||
@ -967,7 +969,7 @@ void Value::replaceAllUsesDominatedByNodeWith(
|
||||
uses_.end());
|
||||
}
|
||||
|
||||
size_t findArgument(
|
||||
static size_t findArgument(
|
||||
const FunctionSchema& the_schema,
|
||||
const std::string& unqualName) {
|
||||
for (const auto i : c10::irange(the_schema.arguments().size())) {
|
||||
@ -980,7 +982,7 @@ size_t findArgument(
|
||||
std::string("Couldn't find an argument called ") + unqualName);
|
||||
}
|
||||
|
||||
size_t findArgument(const FunctionSchema& the_schema, Symbol name) {
|
||||
static size_t findArgument(const FunctionSchema& the_schema, Symbol name) {
|
||||
const auto unqualName = name.toUnqualString();
|
||||
return findArgument(the_schema, unqualName);
|
||||
}
|
||||
@ -2047,7 +2049,7 @@ void inlineCallStackOfNode(
|
||||
Node* to_replace,
|
||||
c10::optional<ModuleInstanceInfo> m_info);
|
||||
|
||||
void inlineCallStackOfBlock(
|
||||
static void inlineCallStackOfBlock(
|
||||
Block* b,
|
||||
std::unordered_map<InlinedCallStack*, InlinedCallStackPtr>& new_cs_entries,
|
||||
Function* callee,
|
||||
|
@ -14,7 +14,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
std::unordered_map<std::string, int64_t>& passes_to_current_counter() {
|
||||
static std::unordered_map<std::string, int64_t>& passes_to_current_counter() {
|
||||
static std::unordered_map<std::string, int64_t> passes_to_current_counter;
|
||||
return passes_to_current_counter;
|
||||
}
|
||||
|
@ -95,7 +95,7 @@ uint64_t _get_model_bytecode_version(
|
||||
return _get_model_bytecode_version_from_bytes(data.get(), size);
|
||||
}
|
||||
|
||||
uint64_t _get_model_bytecode_version_zip(
|
||||
static uint64_t _get_model_bytecode_version_zip(
|
||||
std::shared_ptr<ReadAdapterInterface> rai) {
|
||||
if (!check_zip_file(rai)) {
|
||||
TORCH_CHECK(
|
||||
|
@ -238,7 +238,7 @@ std::map<std::string, at::Tensor> mobile_module_to_parameter_map(
|
||||
"' in deserialized mobile::Module");
|
||||
}
|
||||
|
||||
std::map<std::string, at::Tensor> _load_parameters_bytes(
|
||||
static std::map<std::string, at::Tensor> _load_parameters_bytes(
|
||||
std::shared_ptr<char> data,
|
||||
size_t size,
|
||||
c10::optional<at::Device> device) {
|
||||
|
@ -316,7 +316,7 @@ c10::IValue Method::operator()(std::vector<c10::IValue> stack) const {
|
||||
return stack.front();
|
||||
}
|
||||
|
||||
c10::optional<std::string> print_type(const c10::Type& t) {
|
||||
static c10::optional<std::string> print_type(const c10::Type& t) {
|
||||
auto namedType = t.cast<c10::NamedType>();
|
||||
if (namedType && namedType->name()) {
|
||||
return namedType->name().value().qualifiedName();
|
||||
|
@ -4,7 +4,8 @@ namespace torch {
|
||||
namespace jit {
|
||||
namespace mobile {
|
||||
|
||||
std::unordered_map<std::string, std::function<void(Stack&)>>& primOpsFnTable() {
|
||||
static std::unordered_map<std::string, std::function<void(Stack&)>>&
|
||||
primOpsFnTable() {
|
||||
static std::unordered_map<std::string, std::function<void(Stack&)>>
|
||||
prim_ops_fn;
|
||||
return prim_ops_fn;
|
||||
|
@ -21,6 +21,7 @@ class SGDParamState {
|
||||
return std::make_unique<SGDParamState>(
|
||||
static_cast<const SGDParamState&>(*this));
|
||||
}
|
||||
friend bool operator==(const SGDParamState& lhs, const SGDParamState& rhs);
|
||||
~SGDParamState() = default;
|
||||
};
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
void AnnotateWarns(Block* b) {
|
||||
static void AnnotateWarns(Block* b) {
|
||||
static std::atomic<int64_t> idx(0);
|
||||
for (Node* n : b->nodes()) {
|
||||
for (Block* child_b : n->blocks()) {
|
||||
|
@ -83,7 +83,7 @@ c10::AliasAnalysisKind aliasAnalysisIsSpecialCase() {
|
||||
// Tunable parameter. Set to something larger if it turns out to be better.
|
||||
static constexpr size_t min_fusion_size = 4;
|
||||
|
||||
bool have_same_shape(at::TensorList inputs) {
|
||||
static bool have_same_shape(at::TensorList inputs) {
|
||||
auto expected_sizes = inputs[0].sizes();
|
||||
return (std::all_of(
|
||||
inputs.begin(), inputs.end(), [expected_sizes](const at::Tensor& t) {
|
||||
@ -91,17 +91,19 @@ bool have_same_shape(at::TensorList inputs) {
|
||||
}));
|
||||
}
|
||||
|
||||
bool should_be_transposed(at::TensorList inputs) {
|
||||
static bool should_be_transposed(at::TensorList inputs) {
|
||||
return (std::all_of(inputs.begin(), inputs.end(), [](const at::Tensor& t) {
|
||||
return t.stride(0) == 1 && t.stride(1) == t.size(0);
|
||||
}));
|
||||
}
|
||||
|
||||
std::vector<at::Tensor> transpose_inputs(at::TensorList inputs) {
|
||||
static std::vector<at::Tensor> transpose_inputs(at::TensorList inputs) {
|
||||
return fmap(inputs, [](const at::Tensor& i) { return i.t(); });
|
||||
}
|
||||
|
||||
bool shape_is_fast_for_reduce(const at::Tensor& lhs, const at::Tensor& rhs) {
|
||||
static bool shape_is_fast_for_reduce(
|
||||
const at::Tensor& lhs,
|
||||
const at::Tensor& rhs) {
|
||||
size_t l = lhs.size(0);
|
||||
size_t m = lhs.size(1);
|
||||
size_t r = rhs.size(1);
|
||||
@ -251,7 +253,7 @@ struct TreeToken {
|
||||
|
||||
enum class Side { LHS, RHS };
|
||||
|
||||
void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
|
||||
static void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
|
||||
auto graph = block->owningGraph();
|
||||
|
||||
// Look for trees in the block
|
||||
@ -316,7 +318,7 @@ void BatchMMTreeReduce(Block* block, AliasDb& alias_db) {
|
||||
}
|
||||
}
|
||||
|
||||
bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
|
||||
static bool shape_is_fast_for_side(const at::Tensor& other_side_input) {
|
||||
// Cutoff chosed by benchmarking on a TITAN V
|
||||
return other_side_input.numel() <= 1024 * 2048;
|
||||
}
|
||||
@ -368,7 +370,7 @@ RegisterOperators mm_batch_side_reg({Operator(
|
||||
},
|
||||
aliasAnalysisIsSpecialCase())});
|
||||
|
||||
std::pair<std::vector<Node*>, std::vector<Node*>> gatherIndependentMMUses(
|
||||
static std::pair<std::vector<Node*>, std::vector<Node*>> gatherIndependentMMUses(
|
||||
Value* value,
|
||||
AliasDb& alias_db) {
|
||||
const auto postprocess = [&](std::vector<Node*> mms) {
|
||||
@ -413,7 +415,7 @@ std::pair<std::vector<Node*>, std::vector<Node*>> gatherIndependentMMUses(
|
||||
postprocess(std::move(lhses)), postprocess(std::move(rhses)));
|
||||
}
|
||||
|
||||
void BatchMMSide(Block* block, AliasDb& alias_db) {
|
||||
static void BatchMMSide(Block* block, AliasDb& alias_db) {
|
||||
// NB: 8 is the current loop unrolling factor
|
||||
static constexpr size_t how_many_is_many = 8;
|
||||
const auto batch_side = [&](std::vector<Node*>& mms, Side side) {
|
||||
@ -462,7 +464,7 @@ void BatchMMSide(Block* block, AliasDb& alias_db) {
|
||||
}
|
||||
}
|
||||
|
||||
bool hasMutableOperators(Block* block) {
|
||||
static bool hasMutableOperators(Block* block) {
|
||||
for (auto n : block->nodes()) {
|
||||
if (n->kind().is_aten() && n->schema().is_mutable())
|
||||
return true;
|
||||
@ -474,7 +476,7 @@ bool hasMutableOperators(Block* block) {
|
||||
return false;
|
||||
}
|
||||
|
||||
bool hasMMOperators(std::shared_ptr<Graph>& graph) {
|
||||
static bool hasMMOperators(std::shared_ptr<Graph>& graph) {
|
||||
DepthFirstGraphNodeIterator it(graph);
|
||||
Node* n = nullptr;
|
||||
while ((n = it.next()) != nullptr) {
|
||||
|
@ -51,7 +51,7 @@ std::shared_ptr<Graph> Canonicalize(
|
||||
}
|
||||
|
||||
// Which index in b's owning Node is b
|
||||
size_t blockIndex(const Block* b) {
|
||||
static size_t blockIndex(const Block* b) {
|
||||
auto n = b->owningNode();
|
||||
AT_ASSERT(n);
|
||||
for (size_t i = 0; i < n->blocks().size(); ++i) {
|
||||
@ -73,7 +73,7 @@ size_t blockIndex(const Block* b) {
|
||||
* NB: this is not a topological index. Topologically, two nodes in
|
||||
* different blocks of an if node are not topologically < or > each other.
|
||||
*/
|
||||
bool isBefore(Node* n1, Node* n2) {
|
||||
static bool isBefore(Node* n1, Node* n2) {
|
||||
// Invalid to call with the same node as both args
|
||||
AT_ASSERT(n1 != n2);
|
||||
|
||||
@ -122,7 +122,7 @@ bool isBefore(Node* n1, Node* n2) {
|
||||
}
|
||||
}
|
||||
|
||||
bool isBefore(const Use& a, const Use& b) {
|
||||
static bool isBefore(const Use& a, const Use& b) {
|
||||
// If two uses are the same node, we order on offset
|
||||
if (a.user == b.user) {
|
||||
return a.offset < b.offset;
|
||||
@ -131,7 +131,7 @@ bool isBefore(const Use& a, const Use& b) {
|
||||
return isBefore(a.user, b.user);
|
||||
}
|
||||
|
||||
bool isAfter(const Use& a, const Use& b) {
|
||||
static bool isAfter(const Use& a, const Use& b) {
|
||||
if (a.user == b.user && a.offset == b.offset) {
|
||||
return false;
|
||||
}
|
||||
@ -157,14 +157,14 @@ c10::optional<const Use> firstOrLastUse(Value* v, bool find_first) {
|
||||
return extreme_use;
|
||||
}
|
||||
|
||||
std::vector<c10::optional<const Use>> gatherFirstUses(
|
||||
static std::vector<c10::optional<const Use>> gatherFirstUses(
|
||||
at::ArrayRef<Value*> values) {
|
||||
return fmap(values, [&](Value* v) -> c10::optional<const Use> {
|
||||
return firstOrLastUse(v, true);
|
||||
});
|
||||
}
|
||||
|
||||
std::vector<size_t> sort_indexes(at::ArrayRef<Value*> values) {
|
||||
static std::vector<size_t> sort_indexes(at::ArrayRef<Value*> values) {
|
||||
// initialize original index locations
|
||||
std::vector<size_t> idx(values.size());
|
||||
std::iota(idx.begin(), idx.end(), 0);
|
||||
@ -194,17 +194,17 @@ std::vector<size_t> sort_indexes(at::ArrayRef<Value*> values) {
|
||||
return idx;
|
||||
}
|
||||
|
||||
void CanonicalizeLoopOutputs(Node* n) {
|
||||
static void CanonicalizeLoopOutputs(Node* n) {
|
||||
auto new_indices = sort_indexes(n->outputs());
|
||||
LoopView(n).permuteLoopCarried(new_indices);
|
||||
}
|
||||
|
||||
void CanonicalizeIfOutputs(Node* n) {
|
||||
static void CanonicalizeIfOutputs(Node* n) {
|
||||
auto new_indices = sort_indexes(n->outputs());
|
||||
IfView(n).permuteOutputs(new_indices);
|
||||
}
|
||||
|
||||
void CanonicalizeOutputs(Block* block) {
|
||||
static void CanonicalizeOutputs(Block* block) {
|
||||
// We iterate in reverse since ordering of a node's outputs is dependent on
|
||||
// the value use following it in the graph
|
||||
for (Node* n : block->nodes().reverse()) {
|
||||
|
@ -22,12 +22,12 @@ bool isStrictFusion(Value* value) {
|
||||
|
||||
} // namespace
|
||||
|
||||
bool fusionGuardCheck(Symbol k) {
|
||||
static bool fusionGuardCheck(Symbol k) {
|
||||
return k == Symbol::prim("TensorExprDynamicGuard") || k == prim::TypeCheck ||
|
||||
k == prim::CudaFusionGuard || k == prim::RequiresGradCheck;
|
||||
}
|
||||
|
||||
std::unordered_set<Node*> collectValuesUsedInGuard(
|
||||
static std::unordered_set<Node*> collectValuesUsedInGuard(
|
||||
Node* guarding_if,
|
||||
Node* enter_node) {
|
||||
// DFS to collect
|
||||
@ -58,7 +58,7 @@ std::unordered_set<Node*> collectValuesUsedInGuard(
|
||||
return visited_nodes;
|
||||
}
|
||||
|
||||
void checkForUnfusedOps(Node* enter_node) {
|
||||
static void checkForUnfusedOps(Node* enter_node) {
|
||||
std::vector<Node*> unsupported_nodes;
|
||||
std::vector<Node*> guarding_ifs; // if multiple, we will throw
|
||||
for (Node* node = enter_node->next(); node->kind() != prim::Exit;
|
||||
|
@ -5,7 +5,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
void clearUndefinedness(Value* o) {
|
||||
static void clearUndefinedness(Value* o) {
|
||||
if (o->type()->kind() == TensorType::Kind) {
|
||||
o->setType(TensorType::get());
|
||||
} else if (
|
||||
@ -16,7 +16,7 @@ void clearUndefinedness(Value* o) {
|
||||
}
|
||||
}
|
||||
|
||||
void clearUndefinedness(Block* block) {
|
||||
static void clearUndefinedness(Block* block) {
|
||||
for (auto n : block->nodes()) {
|
||||
for (auto o : n->outputs()) {
|
||||
clearUndefinedness(o);
|
||||
|
@ -22,7 +22,7 @@ c10::AliasAnalysisKind aliasAnalysisFromSchema() {
|
||||
// helper to determine if an optional tensor argument/value passed in is
|
||||
// statically defined (neither a None constant nor a Optional[Tensor] type)
|
||||
// return yes, no, or no value if we can't tell
|
||||
c10::optional<bool> isDefined(Value* tensor) {
|
||||
static c10::optional<bool> isDefined(Value* tensor) {
|
||||
if (tensor->type()->isSubtypeOf(*TensorType::get())) {
|
||||
return true;
|
||||
}
|
||||
@ -32,7 +32,7 @@ c10::optional<bool> isDefined(Value* tensor) {
|
||||
return {};
|
||||
}
|
||||
|
||||
bool isDecomposableNorm(Node* normalize_op) {
|
||||
static bool isDecomposableNorm(Node* normalize_op) {
|
||||
static const OperatorSet decomposable_normalization_ops = {
|
||||
"aten::batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps, bool cudnn_enabled) -> Tensor",
|
||||
"aten::layer_norm(Tensor input, int[] normalized_shape, Tensor? weight, Tensor? bias, float eps, bool cudnn_enable) -> Tensor",
|
||||
@ -85,7 +85,7 @@ RegisterOperators reg_ops(
|
||||
},
|
||||
aliasAnalysisFromSchema())});
|
||||
|
||||
bool DecomposeOps(Block* block, CompilationUnit& decompose_funcs) {
|
||||
static bool DecomposeOps(Block* block, CompilationUnit& decompose_funcs) {
|
||||
bool decomposed = false;
|
||||
for (auto it = block->nodes().begin(), end = block->nodes().end(); it != end;
|
||||
++it) {
|
||||
|
@ -9,7 +9,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
void SetNumTypeToTensorType(Value* v) {
|
||||
static void SetNumTypeToTensorType(Value* v) {
|
||||
if (v->type()->isSubtypeOf(*NumberType::get())) {
|
||||
v->setType(TensorType::fromNumberType(*v->type()));
|
||||
} else if (v->type()->isSubtypeOf(*BoolType::get())) {
|
||||
|
@ -34,7 +34,7 @@ c10::optional<IValue> getIValue(
|
||||
return toIValue(getValue(name, match_vmap, vmap));
|
||||
}
|
||||
|
||||
std::unordered_map<std::string, c10::IValue> getConvParams(
|
||||
static std::unordered_map<std::string, c10::IValue> getConvParams(
|
||||
const Match& match,
|
||||
const std::unordered_map<std::string, Value*>& vmap) {
|
||||
std::unordered_map<std::string, c10::IValue> calc_values;
|
||||
|
@ -36,7 +36,7 @@ namespace jit {
|
||||
// %n =
|
||||
// prim::GetAttr[name="{prefix}.name1{...}.name(n-1)._packed_params"][%self]
|
||||
//
|
||||
void hoistConvPackedParams(
|
||||
static void hoistConvPackedParams(
|
||||
Module& rootModule,
|
||||
Node* getConvPackedParamsNode,
|
||||
const std::string& prefix,
|
||||
|
@ -4,7 +4,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
void InlineForkWait(
|
||||
static void InlineForkWait(
|
||||
Block* b,
|
||||
std::unordered_map<Value*, Value*>& future_remap) {
|
||||
auto nodes = b->nodes();
|
||||
|
@ -16,7 +16,7 @@ namespace jit {
|
||||
// subgraph, replace the context unpacking value with the new graph input.
|
||||
// fork(foo) ->
|
||||
// def foo(a, b):
|
||||
void inlineForkedClosure(Node* fork_closure, NodeKind genKind) {
|
||||
static void inlineForkedClosure(Node* fork_closure, NodeKind genKind) {
|
||||
Node* function_context_node = fork_closure->input()->node();
|
||||
|
||||
if (function_context_node->inputs().size() != 2 ||
|
||||
@ -58,7 +58,7 @@ void inlineForkedClosure(Node* fork_closure, NodeKind genKind) {
|
||||
runCleanupPasses(fork_graph);
|
||||
}
|
||||
|
||||
void inlineForkedClosures(Block* block) {
|
||||
static void inlineForkedClosures(Block* block) {
|
||||
for (auto it = block->nodes().begin(); it != block->nodes().end();) {
|
||||
Node* n = *it;
|
||||
it++;
|
||||
|
@ -30,7 +30,7 @@ GraphFunction* tryToGraphFunction(Node* n) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
void inlineCalls(Block* block) {
|
||||
static void inlineCalls(Block* block) {
|
||||
for (auto it = block->nodes().begin(), end = block->nodes().end();
|
||||
it != end;) {
|
||||
Node* cur = *it++;
|
||||
|
@ -3,7 +3,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
void CheckInplace(Block* block) {
|
||||
static void CheckInplace(Block* block) {
|
||||
for (auto node : block->nodes()) {
|
||||
if (node->kind() == prim::PythonOp && node->hasAttribute(attr::inplace)) {
|
||||
if (node->i(attr::inplace)) {
|
||||
|
@ -16,7 +16,7 @@ namespace jit {
|
||||
// closure block.
|
||||
// Within the closure subgraph, the context tuple is unpacked and the unpacked
|
||||
// values are used for closed over values.
|
||||
void liftClosure(Node* closure) {
|
||||
static void liftClosure(Node* closure) {
|
||||
auto block = closure->blocks().at(0);
|
||||
auto subgraph = std::make_shared<Graph>();
|
||||
// closures/forks can be nested, so use closure owning graph
|
||||
@ -56,7 +56,7 @@ void liftClosure(Node* closure) {
|
||||
runCleanupPasses(closure->g(attr::Subgraph));
|
||||
}
|
||||
|
||||
void liftClosures(Block* block) {
|
||||
static void liftClosures(Block* block) {
|
||||
for (auto it = block->nodes().begin(); it != block->nodes().end();) {
|
||||
Node* n = *it;
|
||||
it++;
|
||||
|
@ -21,7 +21,7 @@ struct Slot {
|
||||
// parameters/attributes with extra_ivalue input Slots that hold what value to
|
||||
// pass into the graph. Used for ONNX export to remove first-class modules
|
||||
// so it can deal purely with parameters and inputs
|
||||
std::pair<std::shared_ptr<Graph>, std::vector<Slot>> lower_graph(
|
||||
static std::pair<std::shared_ptr<Graph>, std::vector<Slot>> lower_graph(
|
||||
const ModulePtr& self,
|
||||
Graph& g_,
|
||||
size_t self_offset = 0) {
|
||||
|
@ -240,31 +240,12 @@ void metalFusePrePackedConvWithClamp(script::Module& module) {
|
||||
fuseHardtanhWithPackedOps(graph);
|
||||
}
|
||||
|
||||
void metalInsertCopyOps(script::Module& module) {
|
||||
auto graph = module.get_method("forward").graph();
|
||||
auto&& outputs = graph->outputs();
|
||||
for (const auto i : c10::irange(outputs.size())) {
|
||||
Value* output = outputs[i];
|
||||
auto namedValue = NamedValue("", output);
|
||||
if (namedValue.type()->kind() == TypeKind::TensorType) {
|
||||
// find the insertion point
|
||||
WithInsertPoint ip(output->node()->next());
|
||||
Value* replaced_output = graph->insert(
|
||||
Symbol::fromQualString("metal::copy_to_host"), {namedValue});
|
||||
// replaced the output
|
||||
graph->block()->replaceOutput(i, replaced_output);
|
||||
}
|
||||
}
|
||||
SubgraphRewriter rewriter;
|
||||
rewriter.runOnGraph(graph);
|
||||
}
|
||||
|
||||
void metalRemoveMutation(script::Module& module) {
|
||||
static void metalRemoveMutation(script::Module& module) {
|
||||
auto graph = module.get_method("forward").graph();
|
||||
RemoveTensorMutation(graph);
|
||||
}
|
||||
|
||||
void metalRunCanonicalOptimizations(script::Module& module) {
|
||||
static void metalRunCanonicalOptimizations(script::Module& module) {
|
||||
auto graph = module.get_method("forward").graph();
|
||||
runOptimization(graph, false /* no loop unrolling */);
|
||||
}
|
||||
|
@ -21,7 +21,7 @@ GraphPassNameType registerPostPass(GraphPass p) {
|
||||
return graphPassID++;
|
||||
}
|
||||
|
||||
GraphPassNameType registerPass(GraphPass p) {
|
||||
static GraphPassNameType registerPass(GraphPass p) {
|
||||
return registerPostPass(std::move(p));
|
||||
}
|
||||
|
||||
|
@ -332,7 +332,7 @@ struct PeepholeOptimizeImpl {
|
||||
bool shape_peepholes_;
|
||||
};
|
||||
|
||||
bool FuseAddMM(Block* block) {
|
||||
static bool FuseAddMM(Block* block) {
|
||||
bool changed = false;
|
||||
for (Node* node : block->nodes()) {
|
||||
// XXX: remember that if you want to simplify an expression by combining
|
||||
|
@ -15,7 +15,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
c10::optional<size_t> normalizeIndex(int64_t index, size_t len) {
|
||||
static c10::optional<size_t> normalizeIndex(int64_t index, size_t len) {
|
||||
if (index < 0) {
|
||||
index = index + len;
|
||||
}
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <torch/csrc/jit/passes/peephole.h>
|
||||
#include <torch/csrc/jit/passes/peephole_non_tensor.h>
|
||||
|
||||
#include <ATen/core/jit_type.h>
|
||||
#include <c10/util/irange.h>
|
||||
|
@ -168,7 +168,7 @@ void FoldQuantizedPrepackingOps(Module& module) {
|
||||
PrePackingOpsFolder(module, filter_fn, "quantized");
|
||||
}
|
||||
|
||||
std::unordered_set<std::string> RegisterPrePackingParams(
|
||||
static std::unordered_set<std::string> RegisterPrePackingParams(
|
||||
Module& module,
|
||||
const std::string& method_name) {
|
||||
auto filter_fn = [](const Node* n) -> bool {
|
||||
|
@ -253,7 +253,7 @@ bool matchCallFuncToUse(
|
||||
|
||||
// Check any use of `v` matches the aten function call
|
||||
// or CallFunction patterns
|
||||
bool matchArgPattern(
|
||||
static bool matchArgPattern(
|
||||
Value* v,
|
||||
const AtenFuncArgs& aten_func_args,
|
||||
const CallFuncArgs& call_func_args) {
|
||||
@ -395,7 +395,8 @@ std::vector<Value*> getPassThroughInputs(Value* v) {
|
||||
return {};
|
||||
}
|
||||
|
||||
std::vector<NodeKind> toAtenSymbol(const std::vector<std::string>& func_names) {
|
||||
static std::vector<NodeKind> toAtenSymbol(
|
||||
const std::vector<std::string>& func_names) {
|
||||
std::vector<NodeKind> symbols;
|
||||
std::transform(
|
||||
func_names.begin(),
|
||||
@ -405,18 +406,18 @@ std::vector<NodeKind> toAtenSymbol(const std::vector<std::string>& func_names) {
|
||||
return symbols;
|
||||
}
|
||||
|
||||
bool isAtenFunc(Node* n, const std::vector<NodeKind>& aten_funcs) {
|
||||
static bool isAtenFunc(Node* n, const std::vector<NodeKind>& aten_funcs) {
|
||||
return std::find(aten_funcs.begin(), aten_funcs.end(), n->kind()) !=
|
||||
aten_funcs.end();
|
||||
}
|
||||
|
||||
bool isAtenFunc(Node* n, const std::vector<std::string>& aten_funcs) {
|
||||
static bool isAtenFunc(Node* n, const std::vector<std::string>& aten_funcs) {
|
||||
const auto& symbols = toAtenSymbol(aten_funcs);
|
||||
return isAtenFunc(n, symbols);
|
||||
}
|
||||
|
||||
// TODO: factor out isCallFunc
|
||||
bool isFunctionNode(
|
||||
static bool isFunctionNode(
|
||||
Node* n,
|
||||
const std::vector<std::string>& call_funcs,
|
||||
const std::vector<std::string>& aten_funcs) {
|
||||
@ -669,7 +670,7 @@ bool is_int_constant(
|
||||
return v && v->isInt() && v->toInt() == value;
|
||||
}
|
||||
|
||||
bool is_functional(
|
||||
static bool is_functional(
|
||||
const Match& match,
|
||||
const std::unordered_map<std::string, Value*>& vmap,
|
||||
const std::string& vname,
|
||||
@ -693,7 +694,7 @@ c10::optional<std::string> getModuleName(Value* value) {
|
||||
return c10::nullopt;
|
||||
}
|
||||
|
||||
bool is_module(
|
||||
static bool is_module(
|
||||
const Match& match,
|
||||
const std::unordered_map<std::string, Value*>& vmap,
|
||||
const std::string& vname,
|
||||
|
@ -282,7 +282,7 @@ QuantFusionInfo getObservedQParamOpFusionInfo(
|
||||
|
||||
} // namespace
|
||||
|
||||
std::vector<QuantFusionInfo> quant_fusion_pattern_and_replacements() {
|
||||
static std::vector<QuantFusionInfo> quant_fusion_pattern_and_replacements() {
|
||||
// aten::conv1d
|
||||
std::string conv1d = R"(
|
||||
graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups):
|
||||
@ -1105,7 +1105,8 @@ graph(%packed_params, %a):
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<QuantFusionInfo> dynamic_quant_fusion_pattern_and_replacements() {
|
||||
static std::vector<QuantFusionInfo>
|
||||
dynamic_quant_fusion_pattern_and_replacements() {
|
||||
std::string linear_dynamic = R"(
|
||||
graph(%packed_params, %a, %reduce_range, %a_dtype):
|
||||
%a_scale : float, %a_zero_point : int = aten::_choose_qparams_per_tensor(%a, %reduce_range)
|
||||
@ -1142,7 +1143,7 @@ graph(%packed_params, %a):
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<QuantFusionInfo> linear_prepack_unpack_patterns() {
|
||||
static std::vector<QuantFusionInfo> linear_prepack_unpack_patterns() {
|
||||
std::string linear_with_quant = R"(
|
||||
graph(%a_dequant, %w_quant, %b):
|
||||
%w_dequant = aten::dequantize(%w_quant)
|
||||
@ -1178,7 +1179,7 @@ graph(%w, %a_dq, %b):
|
||||
};
|
||||
}
|
||||
|
||||
std::vector<QuantFusionInfo> conv_prepack_unpack_patterns() {
|
||||
static std::vector<QuantFusionInfo> conv_prepack_unpack_patterns() {
|
||||
std::string conv1d_with_quant = R"(
|
||||
graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups):
|
||||
%w_dequant = aten::dequantize(%w_quant)
|
||||
|
@ -7,7 +7,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
bool certainlyThrows(Block* block) {
|
||||
static bool certainlyThrows(Block* block) {
|
||||
for (Node* n : block->nodes()) {
|
||||
if (n->kind() == prim::RaiseException) {
|
||||
return true;
|
||||
@ -16,7 +16,7 @@ bool certainlyThrows(Block* block) {
|
||||
return false;
|
||||
}
|
||||
|
||||
void EliminateExceptions(Block* block) {
|
||||
static void EliminateExceptions(Block* block) {
|
||||
auto graph = block->owningGraph();
|
||||
Value* false_const = graph->insertConstant(IValue(false));
|
||||
Value* true_const = graph->insertConstant(IValue(true));
|
||||
|
@ -75,7 +75,7 @@ Node* MutationRemover::createSpecialMappedOp(Node* n) {
|
||||
return new_node;
|
||||
}
|
||||
|
||||
bool removableSetItem(Node* n) {
|
||||
static bool removableSetItem(Node* n) {
|
||||
if (n->kind() != aten::_set_item ||
|
||||
n->input(1)->node()->kind() != prim::Constant) {
|
||||
return false;
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <torch/csrc/jit/passes/dead_code_elimination.h>
|
||||
#include <torch/csrc/jit/passes/remove_redundant_profiles.h>
|
||||
|
||||
#include <torch/csrc/jit/ir/alias_analysis.h>
|
||||
#include <torch/csrc/jit/ir/ir_views.h>
|
||||
|
@ -50,7 +50,7 @@ bool mergeTypes(
|
||||
return changed;
|
||||
}
|
||||
|
||||
void applyTypes(ArrayRef<Value*> src, ArrayRef<Value*> dst) {
|
||||
static void applyTypes(ArrayRef<Value*> src, ArrayRef<Value*> dst) {
|
||||
AT_ASSERT(src.size() == dst.size());
|
||||
for (const auto i : c10::irange(src.size())) {
|
||||
dst[i]->setType(src[i]->type());
|
||||
|
@ -103,7 +103,7 @@ struct ShapeArg
|
||||
}
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const ShapeArg& sa) {
|
||||
static std::ostream& operator<<(std::ostream& out, const ShapeArg& sa) {
|
||||
if (auto val = sa.asConstantInt()) {
|
||||
out << *val;
|
||||
} else if (auto ss = sa.asShapeSymbol()) {
|
||||
@ -149,7 +149,7 @@ struct ShapeArguments {
|
||||
std::vector<ShapeArg> maybe_shape_symbols_;
|
||||
};
|
||||
|
||||
std::ostream& operator<<(std::ostream& os, const ShapeArguments& sa) {
|
||||
static std::ostream& operator<<(std::ostream& os, const ShapeArguments& sa) {
|
||||
if (!sa.has_dim()) {
|
||||
os << "(UNKNOWN DIM)";
|
||||
return os;
|
||||
@ -176,7 +176,7 @@ bool symbolicShapeAnalysisTestModeEnabled() {
|
||||
|
||||
using SSArgument = c10::variant<ShapeArguments, IValue>;
|
||||
|
||||
std::ostream& operator<<(std::ostream& out, const SSArgument& sa) {
|
||||
static std::ostream& operator<<(std::ostream& out, const SSArgument& sa) {
|
||||
if (const IValue* iv = c10::get_if<IValue>(&sa)) {
|
||||
out << *iv;
|
||||
} else {
|
||||
|
@ -20,7 +20,7 @@ namespace jit {
|
||||
|
||||
// Inserts the Compute for Each Symbolic Shape in the TensorExpr Graph
|
||||
// and returns back a map from Symbolic Shape Value to its runtime Value *
|
||||
std::map<int64_t, Value*> InsertSymbolicShapesCompute(
|
||||
static std::map<int64_t, Value*> InsertSymbolicShapesCompute(
|
||||
const ShapeComputeGraphMapping& shape_mapping,
|
||||
Node* tensorexpr_graph) {
|
||||
WithInsertPoint guard(tensorexpr_graph);
|
||||
@ -140,7 +140,7 @@ inline StrideInput summarizeStrideDim(
|
||||
}
|
||||
}
|
||||
|
||||
std::vector<StrideInput> summarizeInputStrides(const TensorType& tt) {
|
||||
static std::vector<StrideInput> summarizeInputStrides(const TensorType& tt) {
|
||||
auto strides = *tt.strides().concrete_sizes();
|
||||
auto sizes = *tt.sizes().concrete_sizes();
|
||||
if (c10::is_contiguous_strides(sizes, strides)) {
|
||||
@ -158,7 +158,7 @@ std::vector<StrideInput> summarizeInputStrides(const TensorType& tt) {
|
||||
};
|
||||
|
||||
// Todo: incorporate in codegen
|
||||
StrideInput summarizeOutputStrides(const TensorType& tt) {
|
||||
static StrideInput summarizeOutputStrides(const TensorType& tt) {
|
||||
auto strides = *tt.strides().concrete_sizes();
|
||||
auto sizes = *tt.sizes().concrete_sizes();
|
||||
// We only try to maintain output striding for channels last tensors,
|
||||
@ -178,7 +178,7 @@ StrideInput summarizeOutputStrides(const TensorType& tt) {
|
||||
// Also summarize input striding behavior. The Size information is stored on the
|
||||
// type, The striding is returned. See StrideInput for description of stride
|
||||
// specializations
|
||||
c10::optional<std::vector<std::vector<StrideInput>>>
|
||||
static c10::optional<std::vector<std::vector<StrideInput>>>
|
||||
TryGeneralizeInputDimensionsToSymbolicShapes(
|
||||
std::shared_ptr<Graph> tensorexpr_graph) {
|
||||
std::map<size_t, int64_t> shape_to_sym_shape;
|
||||
@ -212,7 +212,7 @@ TryGeneralizeInputDimensionsToSymbolicShapes(
|
||||
return input_striding;
|
||||
}
|
||||
|
||||
void moveConstantTensorsOutOfSubgraph(
|
||||
static void moveConstantTensorsOutOfSubgraph(
|
||||
Node* tensorexpr_graph_node,
|
||||
std::shared_ptr<Graph> tensorexpr_graph) {
|
||||
auto parent = tensorexpr_graph_node->owningGraph();
|
||||
@ -304,7 +304,7 @@ bool GenerateGuard(Node* tensorexpr_graph_node, bool add_composed_op) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void inlineFallbackGraphAndAddSRCopyOutOp(std::shared_ptr<Graph> graph) {
|
||||
static void inlineFallbackGraphAndAddSRCopyOutOp(std::shared_ptr<Graph> graph) {
|
||||
DepthFirstGraphNodeIterator it(graph);
|
||||
|
||||
Node* n = nullptr;
|
||||
@ -495,7 +495,7 @@ void insertDynamicShapesGuard(
|
||||
// tensors
|
||||
// Note: this logic is meant to reflect the invocation of the TE Kernel
|
||||
// and `runWithAllocatedOutputs` in tensorexpr_fuser.cpp
|
||||
Operation StaticRuntimeCopyOuts(const Node* node) {
|
||||
static Operation StaticRuntimeCopyOuts(const Node* node) {
|
||||
auto num_ten_inputs = node->inputs().size();
|
||||
return [num_ten_inputs](Stack& stack) {
|
||||
std::vector<IValue> inputs = pop(stack, num_ten_inputs);
|
||||
@ -721,7 +721,7 @@ void runTensorExprDynamicGroup(const Code& code, Stack& stack) {
|
||||
interpreter.run(stack);
|
||||
}
|
||||
|
||||
Operation createTensorExprDynamicGroup(const Node* node) {
|
||||
static Operation createTensorExprDynamicGroup(const Node* node) {
|
||||
const auto& graph = node->g(attr::Subgraph);
|
||||
Code code(graph, "");
|
||||
// This implementation creates a Code object and InterpreterState on every
|
||||
|
@ -43,7 +43,7 @@ namespace jit {
|
||||
|
||||
static bool texpr_reductions_enabled = false;
|
||||
|
||||
bool isSupportedForBlock(Node* node) {
|
||||
static bool isSupportedForBlock(Node* node) {
|
||||
switch (node->kind()) {
|
||||
case aten::add:
|
||||
case aten::mul:
|
||||
@ -187,7 +187,7 @@ bool texprReductionsEnabled() {
|
||||
return texpr_reductions_enabled;
|
||||
}
|
||||
|
||||
void removeProfileNodesAndSpecializeTypes(Block* b) {
|
||||
static void removeProfileNodesAndSpecializeTypes(Block* b) {
|
||||
for (auto it = b->nodes().begin(); it != b->nodes().end(); it++) {
|
||||
if (it->kind() == prim::profile) {
|
||||
GRAPH_DEBUG("Removing prim::profile: %", it->output()->debugName());
|
||||
@ -275,7 +275,7 @@ bool hasTensorTypeSpecialization(Value* v) {
|
||||
return true;
|
||||
}
|
||||
|
||||
void removeTensorTypeSpecialization(Value* v) {
|
||||
static void removeTensorTypeSpecialization(Value* v) {
|
||||
if (hasTensorTypeSpecialization(v)) {
|
||||
v->setType(TensorType::get());
|
||||
}
|
||||
@ -1364,7 +1364,7 @@ void FuseTensorExprs(
|
||||
GRAPH_DUMP("After TExprFuser: ", graph);
|
||||
}
|
||||
|
||||
Operation createTensorExprOp(const Node* node) {
|
||||
static Operation createTensorExprOp(const Node* node) {
|
||||
bool dynamic_shape_fusion_node =
|
||||
node->hasAttribute(attr::striding_inputs_desc);
|
||||
if (!dynamic_shape_fusion_node) {
|
||||
|
@ -6,7 +6,7 @@
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
|
||||
void UpdateDifferentiableGraphRequiresGrad(
|
||||
static void UpdateDifferentiableGraphRequiresGrad(
|
||||
Block* block,
|
||||
c10::optional<bool> new_requires_grad) {
|
||||
for (Node* n : block->nodes()) {
|
||||
|
@ -227,7 +227,7 @@ void unmergeSubgraph(Node* subgraphNode) {
|
||||
subgraphNode->destroy();
|
||||
}
|
||||
|
||||
void collectNestedUses(
|
||||
static void collectNestedUses(
|
||||
std::unordered_set<Value*>& closed_over_values,
|
||||
std::unordered_set<Value*>& new_values,
|
||||
std::unordered_map<Value*, Value*>& externalValuesMap,
|
||||
@ -271,7 +271,7 @@ void collectNestedUses(
|
||||
}
|
||||
}
|
||||
|
||||
std::unordered_set<Value*> closedOverValues(
|
||||
static std::unordered_set<Value*> closedOverValues(
|
||||
Node* toMerge,
|
||||
std::unordered_map<Value*, Value*>& externalValuesMap) {
|
||||
std::unordered_set<Value*> closed_over_values;
|
||||
@ -602,7 +602,7 @@ void unmergeNode(Node* n, Node* subgraphNode) {
|
||||
n->destroy();
|
||||
}
|
||||
|
||||
std::string truncateStrWithHash(const std::string& s, size_t maxlen) {
|
||||
static std::string truncateStrWithHash(const std::string& s, size_t maxlen) {
|
||||
if (s.size() <= maxlen) {
|
||||
return s;
|
||||
}
|
||||
|
@ -399,12 +399,12 @@ void vulkanFoldPrePackingOps(script::Module& m) {
|
||||
PrePackingOpsFolder(m, filter_fn, "prepack_folding");
|
||||
}
|
||||
|
||||
void vulkanRemoveMutation(script::Module& module) {
|
||||
static void vulkanRemoveMutation(script::Module& module) {
|
||||
auto graph = module.get_method("forward").graph();
|
||||
RemoveTensorMutation(graph);
|
||||
}
|
||||
|
||||
void vulkanRunCanonicalOptimizations(script::Module& module) {
|
||||
static void vulkanRunCanonicalOptimizations(script::Module& module) {
|
||||
auto graph = module.get_method("forward").graph();
|
||||
for (const auto& method : module.get_methods()) {
|
||||
auto method_graph = method.graph();
|
||||
|
@ -22,19 +22,13 @@ namespace jit {
|
||||
using value_map = std::unordered_map<Value*, Value*>;
|
||||
using value_set = std::unordered_set<Value*>;
|
||||
|
||||
void wrapDim(int64_t& dim, const std::vector<int64_t>& sizes) {
|
||||
if (dim < 0) {
|
||||
dim += sizes.size();
|
||||
}
|
||||
}
|
||||
|
||||
// need_trim_grad_ops contains functions that return multiple outputs in
|
||||
// forward, but only the first one requires grad.
|
||||
// Example:
|
||||
// kthvalue returns (kthvalue, index of kthvalue), currently autodiff only
|
||||
// supports at most one output that requires grad. Thus we need to remove
|
||||
// the grad for index that doesn't require grad.
|
||||
bool needTrimGrad(Node* n) {
|
||||
static bool needTrimGrad(Node* n) {
|
||||
static OperatorSet need_trim_grad_ops = {
|
||||
"aten::kthvalue(Tensor self, int k, int dim, bool keepdim) -> (Tensor, Tensor)",
|
||||
"aten::topk(Tensor self, int k, int dim, bool largest, bool sorted) -> (Tensor, Tensor)",
|
||||
@ -835,7 +829,7 @@ static void lambdaLiftReverse(Gradient& grad_desc, ReverseDetails& rev_info) {
|
||||
reverse_block->owningNode()->destroy();
|
||||
}
|
||||
|
||||
void packReturnValuesIntoTuple(const std::shared_ptr<Graph>& graph) {
|
||||
static void packReturnValuesIntoTuple(const std::shared_ptr<Graph>& graph) {
|
||||
auto returnNode = graph->block()->return_node();
|
||||
WithInsertPoint wip(returnNode);
|
||||
auto tuple = graph->insertNode(graph->createTuple(returnNode->inputs()));
|
||||
|
@ -70,7 +70,7 @@ void loadDecompositionFunctions() {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
void DecomposeOp(Node* n) {
|
||||
static void DecomposeOp(Node* n) {
|
||||
auto schema = n->maybeSchema();
|
||||
if (!schema) {
|
||||
return;
|
||||
@ -89,7 +89,7 @@ void DecomposeOp(Node* n) {
|
||||
n->destroy();
|
||||
}
|
||||
|
||||
void RunDecompositions(Block* block) {
|
||||
static void RunDecompositions(Block* block) {
|
||||
for (auto it = block->nodes().begin(); it != block->nodes().end();) {
|
||||
Node* n = *it;
|
||||
it++; // advance iterator bc the current node may be destroyed
|
||||
|
@ -5,7 +5,7 @@
|
||||
|
||||
namespace torch {
|
||||
namespace jit {
|
||||
std::ostream& operator<<(std::ostream& out, OpCode op) {
|
||||
static std::ostream& operator<<(std::ostream& out, OpCode op) {
|
||||
switch (op) {
|
||||
#define OP_STRING(x, _) \
|
||||
case x: \
|
||||
@ -27,7 +27,7 @@ char const* toString(OpCode op) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const char* OpInfo(OpCode op) {
|
||||
static const char* OpInfo(OpCode op) {
|
||||
switch (op) {
|
||||
#define OP_INFO(x, info) \
|
||||
case x: \
|
||||
|
@ -95,6 +95,7 @@ std::ostream& operator<<(std::ostream& out, Instruction inst);
|
||||
|
||||
bool isOpSupportedInMobile(OpCode op);
|
||||
char const* toString(OpCode op);
|
||||
OpCode parseOpCode(const char* str);
|
||||
std::ostream& operator<<(std::ostream& out, Instruction inst);
|
||||
|
||||
} // namespace jit
|
||||
|
@ -83,7 +83,7 @@ static std::atomic<bool> profiling_mode{true};
|
||||
|
||||
static std::mutex fusion_strategy_lock;
|
||||
|
||||
FusionStrategy getInitialStrategy() {
|
||||
static FusionStrategy getInitialStrategy() {
|
||||
if (FLAGS_torch_jit_always_dynamic) {
|
||||
return {{FusionBehavior::DYNAMIC, 12}};
|
||||
}
|
||||
@ -245,7 +245,7 @@ static C10_UNUSED void setRequiresGradOnDiffGraph(Node* dnode) {
|
||||
}
|
||||
}
|
||||
|
||||
bool guardDifferentiableGraph(Node* dnode) {
|
||||
static bool guardDifferentiableGraph(Node* dnode) {
|
||||
auto gi = dnode->g(attr::Subgraph)->inputs();
|
||||
bool all_inputs_seen = true;
|
||||
for (const auto i : c10::irange(gi.size())) {
|
||||
@ -323,7 +323,7 @@ void runNooptPassPipeline(std::shared_ptr<Graph>& graph) {
|
||||
"After EliminateDeadCode (end of runNooptPassPipeline)\n", *graph);
|
||||
}
|
||||
|
||||
void runPreAutodiffPassPipeline(std::shared_ptr<Graph>& graph) {
|
||||
static void runPreAutodiffPassPipeline(std::shared_ptr<Graph>& graph) {
|
||||
GRAPH_DEBUG(
|
||||
"Before InsertGuards (beginning of runPreAutodiffPassPipeline)\n",
|
||||
*graph);
|
||||
@ -700,7 +700,7 @@ GraphExecutorState ProfilingGraphExecutorImpl::getDebugState() {
|
||||
return state;
|
||||
}
|
||||
|
||||
Node* insertFallbackFunctionCall(
|
||||
static Node* insertFallbackFunctionCall(
|
||||
Graph* graph,
|
||||
GraphFunction* func,
|
||||
ArrayRef<Value*> inputs) {
|
||||
@ -721,7 +721,7 @@ Node* insertFallbackFunctionCall(
|
||||
return fun_unpack_tuple;
|
||||
}
|
||||
|
||||
GraphFunction* createFallbackPathFunction(
|
||||
static GraphFunction* createFallbackPathFunction(
|
||||
Block* b,
|
||||
const std::string& function_name) {
|
||||
auto value_map = [](Value* v) { return v; };
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user