mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[4/N] Fix Wextra-semi warning (#139256)
Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139256 Approved by: https://github.com/ezyang
This commit is contained in:
@ -157,7 +157,7 @@ inline int64_t _max_dim_tensors(ArrayRef<Tensor> tensors) {
|
||||
return dim;
|
||||
}
|
||||
|
||||
inline void iterate(int64_t /*size*/){};
|
||||
inline void iterate(int64_t /*size*/) {}
|
||||
|
||||
template <typename Arg, typename... Args>
|
||||
inline void iterate(int64_t size, Arg& iter, Args&... iter_tail) {
|
||||
@ -168,7 +168,7 @@ inline void iterate(int64_t size, Arg& iter, Args&... iter_tail) {
|
||||
|
||||
inline bool iterate_continue() {
|
||||
return true;
|
||||
};
|
||||
}
|
||||
|
||||
template <typename Arg, typename... Args>
|
||||
inline bool iterate_continue(Arg& iter, Args&... iter_tail) {
|
||||
@ -178,7 +178,7 @@ inline bool iterate_continue(Arg& iter, Args&... iter_tail) {
|
||||
|
||||
inline int64_t max_iterate_size() {
|
||||
return std::numeric_limits<int64_t>::max();
|
||||
};
|
||||
}
|
||||
|
||||
template <typename Arg, typename... Args>
|
||||
inline int64_t max_iterate_size(Arg& iter, Args&... iter_tail) {
|
||||
@ -187,7 +187,7 @@ inline int64_t max_iterate_size(Arg& iter, Args&... iter_tail) {
|
||||
max_iterate_size(iter_tail...));
|
||||
}
|
||||
|
||||
inline void iterate_overflow(){};
|
||||
inline void iterate_overflow() {}
|
||||
|
||||
template <typename Arg, typename... Args>
|
||||
inline void iterate_overflow(Arg& iter, Args&... iter_tail) {
|
||||
@ -204,7 +204,7 @@ inline void iterate_overflow(Arg& iter, Args&... iter_tail) {
|
||||
iterate_overflow(iter_tail...);
|
||||
}
|
||||
|
||||
inline void forward(int64_t /*offset*/){};
|
||||
inline void forward(int64_t /*offset*/) {}
|
||||
|
||||
template <typename Arg, typename... Args>
|
||||
inline void forward(int64_t offset, Arg& iter, Args&... iter_tail) {
|
||||
@ -227,7 +227,7 @@ inline int64_t max_dim(Arg& iter, Args&... iter_tail) {
|
||||
return std::max(iter.dim_, max_dim(iter_tail...));
|
||||
}
|
||||
|
||||
inline void apply_op(){};
|
||||
inline void apply_op() {}
|
||||
|
||||
template <typename Op, typename... Args>
|
||||
inline void apply_op(
|
||||
|
@ -686,43 +686,43 @@ _scaled_dot_product_cudnn_attention_batch_rule(
|
||||
#endif
|
||||
|
||||
#define LINALG_CHECK_MATRIX_UNARY_ONE_OUT(fn, op_name) \
|
||||
LINALG_STRING_CONST(fn, op_name);\
|
||||
LINALG_STRING_CONST(fn, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT(fn, LINALG_CHECK_MATRIX_UNARY_BATCH_RULE(fn, one));\
|
||||
}
|
||||
|
||||
#define LINALG_CHECK_MATRIX_UNARY_ONE_OUT2(fn, overload, op_name) \
|
||||
LINALG_STRING_CONST2(fn, overload, op_name);\
|
||||
LINALG_STRING_CONST2(fn, overload, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT2(fn, overload, LINALG_CHECK_MATRIX_UNARY_BATCH_RULE2(fn, overload, one));\
|
||||
}
|
||||
|
||||
#define LINALG_CHECK_MATRIX_UNARY_TWO_OUT(fn, op_name) \
|
||||
LINALG_STRING_CONST(fn, op_name);\
|
||||
LINALG_STRING_CONST(fn, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT(fn, LINALG_CHECK_MATRIX_UNARY_BATCH_RULE(fn, two));\
|
||||
}
|
||||
|
||||
#define LINALG_CHECK_MATRIX_UNARY_THREE_OUT(fn, op_name) \
|
||||
LINALG_STRING_CONST(fn, op_name);\
|
||||
LINALG_STRING_CONST(fn, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT(fn, LINALG_CHECK_MATRIX_UNARY_BATCH_RULE(fn, three));\
|
||||
}
|
||||
|
||||
#define LINALG_CHECK_MATRIX_UNARY_FOUR_OUT(fn, op_name) \
|
||||
LINALG_STRING_CONST(fn, op_name);\
|
||||
LINALG_STRING_CONST(fn, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT(fn, LINALG_CHECK_MATRIX_UNARY_BATCH_RULE(fn, four));\
|
||||
}
|
||||
|
||||
#define LINALG_CHECK_MATRIX_BINARY_ONE_OUT(fn, op_name) \
|
||||
LINALG_STRING_CONST(fn, op_name);\
|
||||
LINALG_STRING_CONST(fn, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT(fn, LINALG_CHECK_MATRIX_BINARY_BATCH_RULE(fn, one));\
|
||||
}
|
||||
|
||||
#define LINALG_CHECK_MATRIX_BINARY_TWO_OUT(fn, op_name) \
|
||||
LINALG_STRING_CONST(fn, op_name);\
|
||||
LINALG_STRING_CONST(fn, op_name)\
|
||||
TORCH_LIBRARY_IMPL(aten, FuncTorchBatched, m) {\
|
||||
VMAP_SUPPORT(fn, LINALG_CHECK_MATRIX_BINARY_BATCH_RULE(fn, two));\
|
||||
}
|
||||
|
@ -321,14 +321,14 @@ TORCH_META_FUNC(tanh_backward) (const Tensor& grad_output, const Tensor& output)
|
||||
build_borrowing_binary_op(maybe_get_output(), self, other); \
|
||||
}
|
||||
|
||||
CREATE_BINARY_META_FUNC(logaddexp);
|
||||
CREATE_BINARY_META_FUNC(logaddexp2);
|
||||
CREATE_BINARY_META_FUNC(gcd);
|
||||
CREATE_BINARY_META_FUNC(lcm);
|
||||
CREATE_BINARY_META_FUNC(hypot);
|
||||
CREATE_BINARY_META_FUNC(igamma);
|
||||
CREATE_BINARY_META_FUNC(igammac);
|
||||
CREATE_BINARY_META_FUNC(nextafter);
|
||||
CREATE_BINARY_META_FUNC(logaddexp)
|
||||
CREATE_BINARY_META_FUNC(logaddexp2)
|
||||
CREATE_BINARY_META_FUNC(gcd)
|
||||
CREATE_BINARY_META_FUNC(lcm)
|
||||
CREATE_BINARY_META_FUNC(hypot)
|
||||
CREATE_BINARY_META_FUNC(igamma)
|
||||
CREATE_BINARY_META_FUNC(igammac)
|
||||
CREATE_BINARY_META_FUNC(nextafter)
|
||||
|
||||
TORCH_META_FUNC(maximum) (const Tensor& self, const Tensor& other) {
|
||||
TORCH_CHECK(!self.is_complex() && !other.is_complex(), "maximum not implemented for complex tensors.");
|
||||
@ -362,12 +362,12 @@ TORCH_META_FUNC(fmin) (const Tensor& self, const Tensor& other) {
|
||||
build_borrowing_except_last_argument_comparison_op(maybe_get_output(), self, other_tensor); \
|
||||
}
|
||||
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(eq);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(ne);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(lt);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(le);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(gt);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(ge);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(eq)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(ne)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(lt)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(le)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(gt)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_META_FUNC(ge)
|
||||
|
||||
} // namespace at::meta
|
||||
|
||||
@ -532,24 +532,24 @@ TORCH_IMPL_FUNC(func_out) (const Tensor& self, const Tensor& other, const Tensor
|
||||
func_stub(device_type(), *this); \
|
||||
}
|
||||
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(bitwise_and_out, bitwise_and_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(bitwise_or_out, bitwise_or_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(bitwise_xor_out, bitwise_xor_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(maximum_out, maximum_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(minimum_out, minimum_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(fmax_out, fmax_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(fmin_out, fmin_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(fmod_out, fmod_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(logaddexp_out, logaddexp_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(logaddexp2_out, logaddexp2_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(gcd_out, gcd_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(lcm_out, lcm_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(hypot_out, hypot_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(igamma_out, igamma_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(igammac_out, igammac_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(nextafter_out, nextafter_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(remainder_out, remainder_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(xlogy_out, xlogy_stub);
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(bitwise_and_out, bitwise_and_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(bitwise_or_out, bitwise_or_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(bitwise_xor_out, bitwise_xor_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(maximum_out, maximum_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(minimum_out, minimum_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(fmax_out, fmax_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(fmin_out, fmin_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(fmod_out, fmod_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(logaddexp_out, logaddexp_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(logaddexp2_out, logaddexp2_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(gcd_out, gcd_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(lcm_out, lcm_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(hypot_out, hypot_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(igamma_out, igamma_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(igammac_out, igammac_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(nextafter_out, nextafter_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(remainder_out, remainder_stub)
|
||||
CREATE_BINARY_TORCH_IMPL_FUNC(xlogy_out, xlogy_stub)
|
||||
|
||||
Tensor special_xlog1py(const Scalar& x, const Tensor& y) {
|
||||
return at::special_xlog1py(wrapped_scalar_tensor(x), y);
|
||||
@ -1462,12 +1462,12 @@ Tensor& greater_equal_(Tensor& self, const Scalar& other) { return self.ge_(othe
|
||||
func##_stub(device_type(), *this); \
|
||||
}
|
||||
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(eq);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(ne);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(gt);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(ge);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(lt);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(le);
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(eq)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(ne)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(gt)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(ge)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(lt)
|
||||
CREATE_COMPARISON_SCALAR_TENSOR_IMPL_FUNC(le)
|
||||
|
||||
// not_equal, alias for torch.ne
|
||||
Tensor& not_equal_out(const Tensor& self, const Tensor& other, Tensor& result) { return at::ne_out(result, self, other); }
|
||||
|
@ -943,6 +943,6 @@ static std::tuple<Tensor, Tensor, Tensor> slow_conv_transpose3d_backward_cpu(
|
||||
return std::tuple<Tensor, Tensor, Tensor>(grad_input, grad_weight, grad_bias);
|
||||
}
|
||||
|
||||
REGISTER_ALL_CPU_DISPATCH(slow_conv_transpose3d_backward_stub, &slow_conv_transpose3d_backward_cpu);
|
||||
REGISTER_ALL_CPU_DISPATCH(slow_conv_transpose3d_backward_stub, &slow_conv_transpose3d_backward_cpu)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -479,8 +479,8 @@ DEFINE_DISPATCH(index_put_with_sort_stub);
|
||||
DEFINE_DISPATCH(put_stub);
|
||||
DEFINE_DISPATCH(take_stub);
|
||||
DEFINE_DISPATCH(masked_fill_stub);
|
||||
REGISTER_NO_CPU_DISPATCH(index_put_with_sort_stub);
|
||||
REGISTER_NO_CPU_DISPATCH(index_put_with_sort_quantized_stub);
|
||||
REGISTER_NO_CPU_DISPATCH(index_put_with_sort_stub)
|
||||
REGISTER_NO_CPU_DISPATCH(index_put_with_sort_quantized_stub)
|
||||
DEFINE_DISPATCH(masked_select_serial_stub);
|
||||
DEFINE_DISPATCH(masked_select_stub);
|
||||
DEFINE_DISPATCH(masked_scatter_stub);
|
||||
|
@ -1400,17 +1400,17 @@ void prelu_backward_kernel(TensorIterator& iter) {
|
||||
} // namespace
|
||||
|
||||
|
||||
REGISTER_DISPATCH(hardsigmoid_stub, &hardsigmoid_kernel);
|
||||
REGISTER_DISPATCH(hardsigmoid_backward_stub, &hardsigmoid_backward_kernel);
|
||||
REGISTER_DISPATCH(threshold_stub, &threshold_kernel);
|
||||
REGISTER_DISPATCH(leaky_relu_stub, &leaky_relu_kernel);
|
||||
REGISTER_DISPATCH(leaky_relu_backward_stub, &leaky_relu_backward_kernel);
|
||||
REGISTER_DISPATCH(prelu_stub, &prelu_kernel);
|
||||
REGISTER_DISPATCH(prelu_backward_stub, &prelu_backward_kernel);
|
||||
REGISTER_DISPATCH(hardtanh_backward_stub, &hardtanh_backward_kernel);
|
||||
REGISTER_DISPATCH(hardshrink_stub, &hardshrink_kernel);
|
||||
REGISTER_DISPATCH(softshrink_stub, &softshrink_kernel);
|
||||
REGISTER_DISPATCH(shrink_backward_stub, &shrink_backward_kernel);
|
||||
REGISTER_DISPATCH(hardsigmoid_stub, &hardsigmoid_kernel)
|
||||
REGISTER_DISPATCH(hardsigmoid_backward_stub, &hardsigmoid_backward_kernel)
|
||||
REGISTER_DISPATCH(threshold_stub, &threshold_kernel)
|
||||
REGISTER_DISPATCH(leaky_relu_stub, &leaky_relu_kernel)
|
||||
REGISTER_DISPATCH(leaky_relu_backward_stub, &leaky_relu_backward_kernel)
|
||||
REGISTER_DISPATCH(prelu_stub, &prelu_kernel)
|
||||
REGISTER_DISPATCH(prelu_backward_stub, &prelu_backward_kernel)
|
||||
REGISTER_DISPATCH(hardtanh_backward_stub, &hardtanh_backward_kernel)
|
||||
REGISTER_DISPATCH(hardshrink_stub, &hardshrink_kernel)
|
||||
REGISTER_DISPATCH(softshrink_stub, &softshrink_kernel)
|
||||
REGISTER_DISPATCH(shrink_backward_stub, &shrink_backward_kernel)
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_sigmoid_cpu_stub, &log_sigmoid_cpu_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_sigmoid_backward_stub, &log_sigmoid_backward_cpu_kernel);
|
||||
|
@ -854,9 +854,9 @@ void adapative_avg_pool3d_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(adaptive_avg_pool2d_kernel, &adaptive_avg_pool2d_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_avg_pool2d_backward_kernel, &adapative_avg_pool2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_avg_pool3d_kernel, &adaptive_avg_pool3d_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_avg_pool3d_backward_kernel, &adapative_avg_pool3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_avg_pool2d_kernel, &adaptive_avg_pool2d_kernel_impl)
|
||||
REGISTER_DISPATCH(adaptive_avg_pool2d_backward_kernel, &adapative_avg_pool2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(adaptive_avg_pool3d_kernel, &adaptive_avg_pool3d_kernel_impl)
|
||||
REGISTER_DISPATCH(adaptive_avg_pool3d_backward_kernel, &adapative_avg_pool3d_backward_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -980,9 +980,9 @@ void adaptive_max_pool3d_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(adaptive_max_pool2d_kernel, &adaptive_max_pool2d_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_max_pool2d_backward_kernel, &adaptive_max_pool2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_max_pool3d_kernel, &adaptive_max_pool3d_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_max_pool3d_backward_kernel, &adaptive_max_pool3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(adaptive_max_pool2d_kernel, &adaptive_max_pool2d_kernel_impl)
|
||||
REGISTER_DISPATCH(adaptive_max_pool2d_backward_kernel, &adaptive_max_pool2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(adaptive_max_pool3d_kernel, &adaptive_max_pool3d_kernel_impl)
|
||||
REGISTER_DISPATCH(adaptive_max_pool3d_backward_kernel, &adaptive_max_pool3d_backward_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -192,7 +192,7 @@ at::Tensor& _amp_update_scale_cpu_kernel(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(_amp_foreach_non_finite_check_and_unscale_cpu_stub, &_amp_foreach_non_finite_check_and_unscale_cpu_kernel);
|
||||
REGISTER_DISPATCH(_amp_update_scale_cpu_stub, &_amp_update_scale_cpu_kernel);
|
||||
REGISTER_DISPATCH(_amp_foreach_non_finite_check_and_unscale_cpu_stub, &_amp_foreach_non_finite_check_and_unscale_cpu_kernel)
|
||||
REGISTER_DISPATCH(_amp_update_scale_cpu_stub, &_amp_update_scale_cpu_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -1130,9 +1130,9 @@ void avg_pool3d_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(avg_pool2d_kernel, &avg_pool2d_kernel_impl);
|
||||
REGISTER_DISPATCH(avg_pool2d_backward_kernel, &avg_pool2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(avg_pool3d_kernel, &avg_pool3d_kernel_impl);
|
||||
REGISTER_DISPATCH(avg_pool3d_backward_kernel, &avg_pool3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(avg_pool2d_kernel, &avg_pool2d_kernel_impl)
|
||||
REGISTER_DISPATCH(avg_pool2d_backward_kernel, &avg_pool2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(avg_pool3d_kernel, &avg_pool3d_kernel_impl)
|
||||
REGISTER_DISPATCH(avg_pool3d_backward_kernel, &avg_pool3d_backward_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -272,7 +272,7 @@ inline Vectorized<scalar_t> div_floor_floating_vec(
|
||||
floordiv = vec_t::blendv(floordiv, zero.copysign(basic_div), div == zero);
|
||||
floordiv = vec_t::blendv(floordiv, basic_div, b == zero);
|
||||
return floordiv;
|
||||
};
|
||||
}
|
||||
|
||||
void div_floor_kernel(TensorIteratorBase& iter) {
|
||||
const auto dtype = iter.common_dtype();
|
||||
@ -1375,44 +1375,44 @@ void shifted_chebyshev_polynomial_w_kernel(TensorIteratorBase& iterator) {
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(add_clamp_stub, &add_clamp_kernel);
|
||||
REGISTER_DISPATCH(mul_stub, &mul_kernel);
|
||||
REGISTER_DISPATCH(div_true_stub, &div_true_kernel);
|
||||
REGISTER_DISPATCH(div_trunc_stub, &div_trunc_kernel);
|
||||
REGISTER_DISPATCH(div_floor_stub, &div_floor_kernel);
|
||||
REGISTER_DISPATCH(bitwise_and_stub, &bitwise_and_kernel);
|
||||
REGISTER_DISPATCH(bitwise_or_stub, &bitwise_or_kernel);
|
||||
REGISTER_DISPATCH(bitwise_xor_stub, &bitwise_xor_kernel);
|
||||
REGISTER_DISPATCH(lshift_stub, &lshift_kernel);
|
||||
REGISTER_DISPATCH(rshift_stub, &rshift_kernel);
|
||||
REGISTER_DISPATCH(logical_xor_stub, &logical_xor_kernel);
|
||||
REGISTER_DISPATCH(logical_and_stub, &logical_and_kernel);
|
||||
REGISTER_DISPATCH(logical_or_stub, &logical_or_kernel);
|
||||
REGISTER_DISPATCH(lt_stub, <_kernel);
|
||||
REGISTER_DISPATCH(le_stub, &le_kernel);
|
||||
REGISTER_DISPATCH(gt_stub, >_kernel);
|
||||
REGISTER_DISPATCH(ge_stub, &ge_kernel);
|
||||
REGISTER_DISPATCH(eq_stub, &eq_kernel);
|
||||
REGISTER_DISPATCH(ne_stub, &ne_kernel);
|
||||
REGISTER_DISPATCH(maximum_stub, &maximum_kernel);
|
||||
REGISTER_DISPATCH(minimum_stub, &minimum_kernel);
|
||||
REGISTER_DISPATCH(fmax_stub, &fmax_kernel);
|
||||
REGISTER_DISPATCH(fmin_stub, &fmin_kernel);
|
||||
REGISTER_DISPATCH(copysign_stub, ©sign_kernel);
|
||||
REGISTER_DISPATCH(remainder_stub, &remainder_kernel);
|
||||
REGISTER_DISPATCH(fmod_stub, &fmod_kernel);
|
||||
REGISTER_DISPATCH(gcd_stub, &gcd_kernel);
|
||||
REGISTER_DISPATCH(lcm_stub, &lcm_kernel);
|
||||
REGISTER_DISPATCH(xlogy_stub, &xlogy_kernel);
|
||||
REGISTER_DISPATCH(xlog1py_stub, &xlog1py_kernel);
|
||||
REGISTER_DISPATCH(zeta_stub, &zeta_kernel);
|
||||
REGISTER_DISPATCH(nextafter_stub, &nextafter_kernel);
|
||||
REGISTER_DISPATCH(heaviside_stub, &heaviside_kernel);
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_t_stub, &chebyshev_polynomial_t_kernel);
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_v_stub, &chebyshev_polynomial_v_kernel);
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_w_stub, &chebyshev_polynomial_w_kernel);
|
||||
REGISTER_DISPATCH(laguerre_polynomial_l_stub, &laguerre_polynomial_l_kernel);
|
||||
REGISTER_DISPATCH(legendre_polynomial_p_stub, &legendre_polynomial_p_kernel);
|
||||
REGISTER_DISPATCH(add_clamp_stub, &add_clamp_kernel)
|
||||
REGISTER_DISPATCH(mul_stub, &mul_kernel)
|
||||
REGISTER_DISPATCH(div_true_stub, &div_true_kernel)
|
||||
REGISTER_DISPATCH(div_trunc_stub, &div_trunc_kernel)
|
||||
REGISTER_DISPATCH(div_floor_stub, &div_floor_kernel)
|
||||
REGISTER_DISPATCH(bitwise_and_stub, &bitwise_and_kernel)
|
||||
REGISTER_DISPATCH(bitwise_or_stub, &bitwise_or_kernel)
|
||||
REGISTER_DISPATCH(bitwise_xor_stub, &bitwise_xor_kernel)
|
||||
REGISTER_DISPATCH(lshift_stub, &lshift_kernel)
|
||||
REGISTER_DISPATCH(rshift_stub, &rshift_kernel)
|
||||
REGISTER_DISPATCH(logical_xor_stub, &logical_xor_kernel)
|
||||
REGISTER_DISPATCH(logical_and_stub, &logical_and_kernel)
|
||||
REGISTER_DISPATCH(logical_or_stub, &logical_or_kernel)
|
||||
REGISTER_DISPATCH(lt_stub, <_kernel)
|
||||
REGISTER_DISPATCH(le_stub, &le_kernel)
|
||||
REGISTER_DISPATCH(gt_stub, >_kernel)
|
||||
REGISTER_DISPATCH(ge_stub, &ge_kernel)
|
||||
REGISTER_DISPATCH(eq_stub, &eq_kernel)
|
||||
REGISTER_DISPATCH(ne_stub, &ne_kernel)
|
||||
REGISTER_DISPATCH(maximum_stub, &maximum_kernel)
|
||||
REGISTER_DISPATCH(minimum_stub, &minimum_kernel)
|
||||
REGISTER_DISPATCH(fmax_stub, &fmax_kernel)
|
||||
REGISTER_DISPATCH(fmin_stub, &fmin_kernel)
|
||||
REGISTER_DISPATCH(copysign_stub, ©sign_kernel)
|
||||
REGISTER_DISPATCH(remainder_stub, &remainder_kernel)
|
||||
REGISTER_DISPATCH(fmod_stub, &fmod_kernel)
|
||||
REGISTER_DISPATCH(gcd_stub, &gcd_kernel)
|
||||
REGISTER_DISPATCH(lcm_stub, &lcm_kernel)
|
||||
REGISTER_DISPATCH(xlogy_stub, &xlogy_kernel)
|
||||
REGISTER_DISPATCH(xlog1py_stub, &xlog1py_kernel)
|
||||
REGISTER_DISPATCH(zeta_stub, &zeta_kernel)
|
||||
REGISTER_DISPATCH(nextafter_stub, &nextafter_kernel)
|
||||
REGISTER_DISPATCH(heaviside_stub, &heaviside_kernel)
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_t_stub, &chebyshev_polynomial_t_kernel)
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_v_stub, &chebyshev_polynomial_v_kernel)
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_w_stub, &chebyshev_polynomial_w_kernel)
|
||||
REGISTER_DISPATCH(laguerre_polynomial_l_stub, &laguerre_polynomial_l_kernel)
|
||||
REGISTER_DISPATCH(legendre_polynomial_p_stub, &legendre_polynomial_p_kernel)
|
||||
REGISTER_DISPATCH(
|
||||
shifted_chebyshev_polynomial_t_stub,
|
||||
&shifted_chebyshev_polynomial_t_kernel);
|
||||
@ -1426,9 +1426,9 @@ REGISTER_DISPATCH(
|
||||
shifted_chebyshev_polynomial_w_stub,
|
||||
&shifted_chebyshev_polynomial_w_kernel);
|
||||
// Might enable AVX512 dispatch after enabling explicit vectorization for them.
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_u_stub, &chebyshev_polynomial_u_kernel);
|
||||
REGISTER_DISPATCH(hermite_polynomial_h_stub, &hermite_polynomial_h_kernel);
|
||||
REGISTER_DISPATCH(hermite_polynomial_he_stub, &hermite_polynomial_he_kernel);
|
||||
REGISTER_DISPATCH(chebyshev_polynomial_u_stub, &chebyshev_polynomial_u_kernel)
|
||||
REGISTER_DISPATCH(hermite_polynomial_h_stub, &hermite_polynomial_h_kernel)
|
||||
REGISTER_DISPATCH(hermite_polynomial_he_stub, &hermite_polynomial_he_kernel)
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(atan2_stub, &atan2_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(smooth_l1_stub, &smooth_l1_kernel);
|
||||
|
@ -529,8 +529,8 @@ void cpublas_copy_impl(at::ScalarType type, int64_t n, const void *_x, int64_t i
|
||||
}} // namespace cpublas::(anonymous)
|
||||
|
||||
|
||||
REGISTER_DISPATCH(cpublas::gemm_stub, &cpublas::cpublas_gemm_impl);
|
||||
REGISTER_DISPATCH(cpublas::axpy_stub, &cpublas::cpublas_axpy_impl);
|
||||
REGISTER_DISPATCH(cpublas::copy_stub, &cpublas::cpublas_copy_impl);
|
||||
REGISTER_DISPATCH(cpublas::gemm_stub, &cpublas::cpublas_gemm_impl)
|
||||
REGISTER_DISPATCH(cpublas::axpy_stub, &cpublas::cpublas_axpy_impl)
|
||||
REGISTER_DISPATCH(cpublas::copy_stub, &cpublas::cpublas_copy_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -78,6 +78,6 @@ void cat_serial_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(cat_serial_stub, &cat_serial_kernel);
|
||||
REGISTER_DISPATCH(cat_serial_stub, &cat_serial_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -111,6 +111,6 @@ void channel_shuffle_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(channel_shuffle_kernel, &channel_shuffle_kernel_impl);
|
||||
REGISTER_DISPATCH(channel_shuffle_kernel, &channel_shuffle_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -25,7 +25,7 @@ void polar_kernel(TensorIterator& iter) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(complex_stub, &complex_kernel);
|
||||
REGISTER_DISPATCH(complex_stub, &complex_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(polar_stub, &polar_kernel);
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -325,6 +325,6 @@ void copy_kernel(TensorIterator& iter, bool /*non_blocking*/) {
|
||||
|
||||
} // namespace CPU_CAPABILITY
|
||||
|
||||
REGISTER_DISPATCH(copy_stub, ©_kernel);
|
||||
REGISTER_DISPATCH(copy_stub, ©_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -76,6 +76,6 @@ static void cross_kernel_impl(const Tensor& result, const Tensor& a, const Tenso
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(cross_stub, &cross_kernel_impl);
|
||||
REGISTER_DISPATCH(cross_stub, &cross_kernel_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -443,9 +443,9 @@ static void cdist_backward_kernel_impl(Tensor& result, const Tensor& grad, const
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(pdist_forward_stub, &pdist_forward_kernel_impl);
|
||||
REGISTER_DISPATCH(pdist_backward_stub, &pdist_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(cdist_stub, &cdist_kernel_impl);
|
||||
REGISTER_DISPATCH(cdist_backward_stub, &cdist_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(pdist_forward_stub, &pdist_forward_kernel_impl)
|
||||
REGISTER_DISPATCH(pdist_backward_stub, &pdist_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(cdist_stub, &cdist_kernel_impl)
|
||||
REGISTER_DISPATCH(cdist_backward_stub, &cdist_backward_kernel_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -235,16 +235,16 @@ static void random_full_64_bits_range_kernel(TensorIteratorBase& iter, std::opti
|
||||
|
||||
} // namespace (anonymous)
|
||||
|
||||
REGISTER_DISPATCH(bernoulli_tensor_stub, &bernoulli_tensor_kernel);
|
||||
REGISTER_DISPATCH(bernoulli_scalar_stub, &bernoulli_scalar_kernel);
|
||||
REGISTER_DISPATCH(cauchy_stub, &cauchy_kernel);
|
||||
REGISTER_DISPATCH(exponential_stub, &exponential_kernel);
|
||||
REGISTER_DISPATCH(geometric_stub, &geometric_kernel);
|
||||
REGISTER_DISPATCH(log_normal_stub, &log_normal_kernel);
|
||||
REGISTER_DISPATCH(normal_stub, &normal_kernel);
|
||||
REGISTER_DISPATCH(uniform_stub, &uniform_kernel);
|
||||
REGISTER_DISPATCH(random_from_to_stub, &random_from_to_kernel);
|
||||
REGISTER_DISPATCH(random_full_64_bits_range_stub, &random_full_64_bits_range_kernel);
|
||||
REGISTER_DISPATCH(random_stub, &random_kernel);
|
||||
REGISTER_DISPATCH(bernoulli_tensor_stub, &bernoulli_tensor_kernel)
|
||||
REGISTER_DISPATCH(bernoulli_scalar_stub, &bernoulli_scalar_kernel)
|
||||
REGISTER_DISPATCH(cauchy_stub, &cauchy_kernel)
|
||||
REGISTER_DISPATCH(exponential_stub, &exponential_kernel)
|
||||
REGISTER_DISPATCH(geometric_stub, &geometric_kernel)
|
||||
REGISTER_DISPATCH(log_normal_stub, &log_normal_kernel)
|
||||
REGISTER_DISPATCH(normal_stub, &normal_kernel)
|
||||
REGISTER_DISPATCH(uniform_stub, &uniform_kernel)
|
||||
REGISTER_DISPATCH(random_from_to_stub, &random_from_to_kernel)
|
||||
REGISTER_DISPATCH(random_full_64_bits_range_stub, &random_full_64_bits_range_kernel)
|
||||
REGISTER_DISPATCH(random_stub, &random_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -67,6 +67,6 @@ void fill_kernel(TensorIterator& iter, const Scalar& value_scalar) {
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(fill_stub, &fill_kernel);
|
||||
REGISTER_DISPATCH(fill_stub, &fill_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -52,6 +52,6 @@ void _compute_linear_combination_cpu_kernel(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(_compute_linear_combination_stub, &_compute_linear_combination_cpu_kernel);
|
||||
REGISTER_DISPATCH(_compute_linear_combination_stub, &_compute_linear_combination_cpu_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -214,5 +214,5 @@ void fused_adagrad_kernel(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fused_adagrad_stub, &fused_adagrad_kernel);
|
||||
REGISTER_DISPATCH(fused_adagrad_stub, &fused_adagrad_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -364,5 +364,5 @@ void fused_adam_kernel(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fused_adam_stub, &fused_adam_kernel);
|
||||
REGISTER_DISPATCH(fused_adam_stub, &fused_adam_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -264,5 +264,5 @@ void fused_sgd_kernel(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fused_sgd_stub, &fused_sgd_kernel);
|
||||
REGISTER_DISPATCH(fused_sgd_stub, &fused_sgd_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -1315,8 +1315,8 @@ void grid_sampler_2d_backward_cpu_kernel_impl(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(grid_sampler_2d_cpu_kernel, &grid_sampler_2d_cpu_kernel_impl);
|
||||
REGISTER_DISPATCH(grid_sampler_2d_backward_cpu_kernel, &grid_sampler_2d_backward_cpu_kernel_impl);
|
||||
REGISTER_DISPATCH(grid_sampler_2d_cpu_kernel, &grid_sampler_2d_cpu_kernel_impl)
|
||||
REGISTER_DISPATCH(grid_sampler_2d_backward_cpu_kernel, &grid_sampler_2d_backward_cpu_kernel_impl)
|
||||
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -307,8 +307,8 @@ static void histogram_select_outer_bin_edges_impl(const Tensor& input, const int
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(histogramdd_stub, &histogramdd_kernel_impl);
|
||||
REGISTER_DISPATCH(histogramdd_linear_stub, &histogramdd_linear_kernel_impl);
|
||||
REGISTER_DISPATCH(histogram_select_outer_bin_edges_stub, &histogram_select_outer_bin_edges_impl);
|
||||
REGISTER_DISPATCH(histogramdd_stub, &histogramdd_kernel_impl)
|
||||
REGISTER_DISPATCH(histogramdd_linear_stub, &histogramdd_linear_kernel_impl)
|
||||
REGISTER_DISPATCH(histogram_select_outer_bin_edges_stub, &histogram_select_outer_bin_edges_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -781,16 +781,16 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(index_stub, &index_kernel);
|
||||
REGISTER_DISPATCH(index_fill_stub, &index_fill_kernel);
|
||||
REGISTER_DISPATCH(index_copy_stub, &index_copy_kernel);
|
||||
REGISTER_DISPATCH(index_put_stub, &index_put_kernel);
|
||||
REGISTER_DISPATCH(put_stub, &put_kernel);
|
||||
REGISTER_DISPATCH(take_stub, &take_kernel);
|
||||
REGISTER_DISPATCH(masked_fill_stub, &masked_fill_kernel);
|
||||
REGISTER_DISPATCH(masked_select_serial_stub, &masked_select_serial_kernel);
|
||||
REGISTER_DISPATCH(masked_select_stub, &masked_select_kernel);
|
||||
REGISTER_DISPATCH(masked_scatter_stub, &masked_scatter_kernel);
|
||||
REGISTER_DISPATCH(flip_stub, &flip_kernel);
|
||||
REGISTER_DISPATCH(index_stub, &index_kernel)
|
||||
REGISTER_DISPATCH(index_fill_stub, &index_fill_kernel)
|
||||
REGISTER_DISPATCH(index_copy_stub, &index_copy_kernel)
|
||||
REGISTER_DISPATCH(index_put_stub, &index_put_kernel)
|
||||
REGISTER_DISPATCH(put_stub, &put_kernel)
|
||||
REGISTER_DISPATCH(take_stub, &take_kernel)
|
||||
REGISTER_DISPATCH(masked_fill_stub, &masked_fill_kernel)
|
||||
REGISTER_DISPATCH(masked_select_serial_stub, &masked_select_serial_kernel)
|
||||
REGISTER_DISPATCH(masked_select_stub, &masked_select_kernel)
|
||||
REGISTER_DISPATCH(masked_scatter_stub, &masked_scatter_kernel)
|
||||
REGISTER_DISPATCH(flip_stub, &flip_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -158,8 +158,8 @@ void lerp_tensor_kernel(at::TensorIteratorBase& iter) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(lerp_kernel_scalar_weight, &lerp_scalar_kernel);
|
||||
REGISTER_DISPATCH(lerp_kernel_tensor_weight, &lerp_tensor_kernel);
|
||||
REGISTER_DISPATCH(lerp_kernel_scalar_weight, &lerp_scalar_kernel)
|
||||
REGISTER_DISPATCH(lerp_kernel_tensor_weight, &lerp_tensor_kernel)
|
||||
|
||||
} // namespace native
|
||||
} // namespace at
|
||||
|
@ -85,5 +85,5 @@ void addr_kernel(TensorIterator &iter,
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(addr_stub, &addr_kernel);
|
||||
REGISTER_DISPATCH(addr_stub, &addr_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -740,8 +740,8 @@ void max_pool3d_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(max_pool2d_kernel, &max_pool2d_kernel_impl);
|
||||
REGISTER_DISPATCH(max_pool2d_backward_kernel, &max_pool2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(max_pool3d_kernel, &max_pool3d_kernel_impl);
|
||||
REGISTER_DISPATCH(max_pool3d_backward_kernel, &max_pool3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(max_pool2d_kernel, &max_pool2d_kernel_impl)
|
||||
REGISTER_DISPATCH(max_pool2d_backward_kernel, &max_pool2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(max_pool3d_kernel, &max_pool3d_kernel_impl)
|
||||
REGISTER_DISPATCH(max_pool3d_backward_kernel, &max_pool3d_backward_kernel_impl)
|
||||
} // at::native
|
||||
|
@ -59,6 +59,6 @@ void max_pool1d_impl(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(max_pool1d_stub, &max_pool1d_impl);
|
||||
REGISTER_DISPATCH(max_pool1d_stub, &max_pool1d_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -272,7 +272,7 @@ void max_unpool3d_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(max_unpool2d_kernel, &max_unpool2d_kernel_impl);
|
||||
REGISTER_DISPATCH(max_unpool3d_kernel, &max_unpool3d_kernel_impl);
|
||||
REGISTER_DISPATCH(max_unpool2d_kernel, &max_unpool2d_kernel_impl)
|
||||
REGISTER_DISPATCH(max_unpool3d_kernel, &max_unpool3d_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -106,6 +106,6 @@ void transform_bias_rescale_qkv_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(transform_bias_rescale_qkv_stub, &transform_bias_rescale_qkv_kernel_impl);
|
||||
REGISTER_DISPATCH(transform_bias_rescale_qkv_stub, &transform_bias_rescale_qkv_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -711,19 +711,19 @@ void replication_pad3d_backward_kernel_impl(
|
||||
} // anonymous namespace
|
||||
|
||||
// reflection padding
|
||||
REGISTER_DISPATCH(reflection_pad1d_kernel, &reflection_pad1d_kernel_impl);
|
||||
REGISTER_DISPATCH(reflection_pad1d_backward_kernel, &reflection_pad1d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(reflection_pad2d_kernel, &reflection_pad2d_kernel_impl);
|
||||
REGISTER_DISPATCH(reflection_pad2d_backward_kernel, &reflection_pad2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(reflection_pad3d_kernel, &reflection_pad3d_kernel_impl);
|
||||
REGISTER_DISPATCH(reflection_pad3d_backward_kernel, &reflection_pad3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(reflection_pad1d_kernel, &reflection_pad1d_kernel_impl)
|
||||
REGISTER_DISPATCH(reflection_pad1d_backward_kernel, &reflection_pad1d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(reflection_pad2d_kernel, &reflection_pad2d_kernel_impl)
|
||||
REGISTER_DISPATCH(reflection_pad2d_backward_kernel, &reflection_pad2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(reflection_pad3d_kernel, &reflection_pad3d_kernel_impl)
|
||||
REGISTER_DISPATCH(reflection_pad3d_backward_kernel, &reflection_pad3d_backward_kernel_impl)
|
||||
|
||||
// replication padding
|
||||
REGISTER_DISPATCH(replication_pad1d_kernel, &replication_pad1d_kernel_impl);
|
||||
REGISTER_DISPATCH(replication_pad1d_backward_kernel, &replication_pad1d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(replication_pad2d_kernel, &replication_pad2d_kernel_impl);
|
||||
REGISTER_DISPATCH(replication_pad2d_backward_kernel, &replication_pad2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(replication_pad3d_kernel, &replication_pad3d_kernel_impl);
|
||||
REGISTER_DISPATCH(replication_pad3d_backward_kernel, &replication_pad3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(replication_pad1d_kernel, &replication_pad1d_kernel_impl)
|
||||
REGISTER_DISPATCH(replication_pad1d_backward_kernel, &replication_pad1d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(replication_pad2d_kernel, &replication_pad2d_kernel_impl)
|
||||
REGISTER_DISPATCH(replication_pad2d_backward_kernel, &replication_pad2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(replication_pad3d_kernel, &replication_pad3d_kernel_impl)
|
||||
REGISTER_DISPATCH(replication_pad3d_backward_kernel, &replication_pad3d_backward_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -247,7 +247,7 @@ void pixel_unshuffle_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(pixel_shuffle_kernel, &pixel_shuffle_kernel_impl);
|
||||
REGISTER_DISPATCH(pixel_unshuffle_kernel, &pixel_unshuffle_kernel_impl);
|
||||
REGISTER_DISPATCH(pixel_shuffle_kernel, &pixel_shuffle_kernel_impl)
|
||||
REGISTER_DISPATCH(pixel_unshuffle_kernel, &pixel_unshuffle_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -235,10 +235,10 @@ static void mse_backward_cpu_kernel(TensorIterator& iter, const Scalar& value) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(addcmul_stub, &addcmul_cpu_kernel);
|
||||
REGISTER_DISPATCH(addcdiv_stub, &addcdiv_cpu_kernel);
|
||||
REGISTER_DISPATCH(smooth_l1_backward_stub, &smooth_l1_backward_cpu_kernel);
|
||||
REGISTER_DISPATCH(huber_backward_stub, &huber_backward_cpu_kernel);
|
||||
REGISTER_DISPATCH(mse_backward_stub, &mse_backward_cpu_kernel);
|
||||
REGISTER_DISPATCH(addcmul_stub, &addcmul_cpu_kernel)
|
||||
REGISTER_DISPATCH(addcdiv_stub, &addcdiv_cpu_kernel)
|
||||
REGISTER_DISPATCH(smooth_l1_backward_stub, &smooth_l1_backward_cpu_kernel)
|
||||
REGISTER_DISPATCH(huber_backward_stub, &huber_backward_cpu_kernel)
|
||||
REGISTER_DISPATCH(mse_backward_stub, &mse_backward_cpu_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -71,7 +71,7 @@ static void linspace_kernel(TensorIterator& iter, const Scalar& scalar_start, co
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(arange_stub, &arange_kernel);
|
||||
REGISTER_DISPATCH(linspace_stub, &linspace_kernel);
|
||||
REGISTER_DISPATCH(arange_stub, &arange_kernel)
|
||||
REGISTER_DISPATCH(linspace_stub, &linspace_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -220,8 +220,8 @@ static void aminmax_allreduce_kernel(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(min_all_stub, &min_all_kernel_impl);
|
||||
REGISTER_DISPATCH(max_all_stub, &max_all_kernel_impl);
|
||||
REGISTER_DISPATCH(aminmax_allreduce_stub, &aminmax_allreduce_kernel);
|
||||
REGISTER_DISPATCH(min_all_stub, &min_all_kernel_impl)
|
||||
REGISTER_DISPATCH(max_all_stub, &max_all_kernel_impl)
|
||||
REGISTER_DISPATCH(aminmax_allreduce_stub, &aminmax_allreduce_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -436,21 +436,21 @@ static void argmin_kernel_impl(TensorIterator &iter) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(std_var_stub, &std_var_kernel_impl);
|
||||
REGISTER_DISPATCH(prod_stub, &prod_kernel_impl);
|
||||
REGISTER_DISPATCH(std_var_stub, &std_var_kernel_impl)
|
||||
REGISTER_DISPATCH(prod_stub, &prod_kernel_impl)
|
||||
// mean implementation for CPU is in aten/src/ATen/native/ReduceOps.cpp
|
||||
// but mean_stub must be defined for CPU as well
|
||||
REGISTER_DISPATCH(mean_stub, nullptr);
|
||||
REGISTER_DISPATCH(norm_stub, &norm_kernel_tensor_iterator_impl);
|
||||
REGISTER_DISPATCH(and_stub, &and_kernel_impl);
|
||||
REGISTER_DISPATCH(or_stub, &or_kernel_impl);
|
||||
REGISTER_DISPATCH(min_values_stub, &min_values_kernel_impl);
|
||||
REGISTER_DISPATCH(max_values_stub, &max_values_kernel_impl);
|
||||
REGISTER_DISPATCH(argmax_stub, &argmax_kernel_impl);
|
||||
REGISTER_DISPATCH(argmin_stub, &argmin_kernel_impl);
|
||||
REGISTER_DISPATCH(mean_stub, nullptr)
|
||||
REGISTER_DISPATCH(norm_stub, &norm_kernel_tensor_iterator_impl)
|
||||
REGISTER_DISPATCH(and_stub, &and_kernel_impl)
|
||||
REGISTER_DISPATCH(or_stub, &or_kernel_impl)
|
||||
REGISTER_DISPATCH(min_values_stub, &min_values_kernel_impl)
|
||||
REGISTER_DISPATCH(max_values_stub, &max_values_kernel_impl)
|
||||
REGISTER_DISPATCH(argmax_stub, &argmax_kernel_impl)
|
||||
REGISTER_DISPATCH(argmin_stub, &argmin_kernel_impl)
|
||||
|
||||
REGISTER_DISPATCH(cumprod_stub, &cumprod_cpu_kernel);
|
||||
REGISTER_DISPATCH(cumsum_stub, &cumsum_cpu_kernel);
|
||||
REGISTER_DISPATCH(logcumsumexp_stub, &logcumsumexp_cpu_kernel);
|
||||
REGISTER_DISPATCH(cumprod_stub, &cumprod_cpu_kernel)
|
||||
REGISTER_DISPATCH(cumsum_stub, &cumsum_cpu_kernel)
|
||||
REGISTER_DISPATCH(logcumsumexp_stub, &logcumsumexp_cpu_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -33,6 +33,6 @@ void renorm_scale_factor_impl(TensorIteratorBase& iter, double maxnorm) {
|
||||
|
||||
} // namespace (anonymous)
|
||||
|
||||
REGISTER_DISPATCH(renorm_scale_factor_stub, &renorm_scale_factor_impl);
|
||||
REGISTER_DISPATCH(renorm_scale_factor_stub, &renorm_scale_factor_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -94,6 +94,6 @@ void sampled_addmm_sparse_csr_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(sampled_addmm_sparse_csr_stub, &sampled_addmm_sparse_csr_kernel);
|
||||
REGISTER_DISPATCH(sampled_addmm_sparse_csr_stub, &sampled_addmm_sparse_csr_kernel)
|
||||
|
||||
} // at::native
|
||||
|
@ -955,17 +955,17 @@ void scatter_scalar_reduce_cpu_kernel(const Tensor& self, const int64_t dim, con
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(gather_stub, &gather_cpu_kernel);
|
||||
REGISTER_DISPATCH(scatter_stub, &scatter_cpu_kernel);
|
||||
REGISTER_DISPATCH(scatter_fill_stub, &scatter_fill_cpu_kernel);
|
||||
REGISTER_DISPATCH(scatter_add_stub, &scatter_add_cpu_kernel);
|
||||
REGISTER_DISPATCH(scatter_reduce_stub, &scatter_reduce_cpu_kernel);
|
||||
REGISTER_DISPATCH(scatter_scalar_reduce_stub, &scatter_scalar_reduce_cpu_kernel);
|
||||
REGISTER_DISPATCH(scatter_reduce_two_stub, &scatter_reduce_two_cpu_kernel);
|
||||
REGISTER_DISPATCH(gather_stub, &gather_cpu_kernel)
|
||||
REGISTER_DISPATCH(scatter_stub, &scatter_cpu_kernel)
|
||||
REGISTER_DISPATCH(scatter_fill_stub, &scatter_fill_cpu_kernel)
|
||||
REGISTER_DISPATCH(scatter_add_stub, &scatter_add_cpu_kernel)
|
||||
REGISTER_DISPATCH(scatter_reduce_stub, &scatter_reduce_cpu_kernel)
|
||||
REGISTER_DISPATCH(scatter_scalar_reduce_stub, &scatter_scalar_reduce_cpu_kernel)
|
||||
REGISTER_DISPATCH(scatter_reduce_two_stub, &scatter_reduce_two_cpu_kernel)
|
||||
|
||||
// fast paths for GNN usage
|
||||
REGISTER_DISPATCH(scatter_add_expanded_index_stub, &scatter_add_expanded_index_kernel);
|
||||
REGISTER_DISPATCH(scatter_reduce_expanded_index_stub, &scatter_reduce_expanded_index_kernel);
|
||||
REGISTER_DISPATCH(gather_expanded_index_stub, &gather_expanded_index_kernel);
|
||||
REGISTER_DISPATCH(scatter_add_expanded_index_stub, &scatter_add_expanded_index_kernel)
|
||||
REGISTER_DISPATCH(scatter_reduce_expanded_index_stub, &scatter_reduce_expanded_index_kernel)
|
||||
REGISTER_DISPATCH(gather_expanded_index_stub, &gather_expanded_index_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -264,7 +264,7 @@ static void topk_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(sort_stub, &sort_kernel);
|
||||
REGISTER_DISPATCH(topk_stub, &topk_kernel);
|
||||
REGISTER_DISPATCH(sort_stub, &sort_kernel)
|
||||
REGISTER_DISPATCH(topk_stub, &topk_kernel)
|
||||
|
||||
} //at::native
|
||||
|
@ -60,6 +60,6 @@ void _spdiags_kernel_cpu(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(spdiags_kernel_stub, &_spdiags_kernel_cpu);
|
||||
REGISTER_DISPATCH(spdiags_kernel_stub, &_spdiags_kernel_cpu)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -555,11 +555,11 @@ void spmm_reduce_backward_other_arg_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(spmm_reduce_stub, &spmm_reduce_kernel);
|
||||
REGISTER_DISPATCH(spmm_reduce_arg_stub, &spmm_reduce_arg_kernel);
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_input_stub, &spmm_reduce_backward_input_kernel);
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_input_arg_stub, &spmm_reduce_backward_input_arg_kernel);
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_other_stub, &spmm_reduce_backward_other_kernel);
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_other_arg_stub, &spmm_reduce_backward_other_arg_kernel);
|
||||
REGISTER_DISPATCH(spmm_reduce_stub, &spmm_reduce_kernel)
|
||||
REGISTER_DISPATCH(spmm_reduce_arg_stub, &spmm_reduce_arg_kernel)
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_input_stub, &spmm_reduce_backward_input_kernel)
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_input_arg_stub, &spmm_reduce_backward_input_arg_kernel)
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_other_stub, &spmm_reduce_backward_other_kernel)
|
||||
REGISTER_DISPATCH(spmm_reduce_backward_other_arg_stub, &spmm_reduce_backward_other_arg_kernel)
|
||||
|
||||
} // at::native
|
||||
|
@ -19,6 +19,6 @@ void stack_serial_kernel(Tensor& result, TensorList tensors, int64_t dim) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(stack_serial_stub, &stack_serial_kernel);
|
||||
REGISTER_DISPATCH(stack_serial_stub, &stack_serial_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -645,7 +645,7 @@ void nansum_kernel_impl(TensorIterator &iter) {
|
||||
// nansum on Float16 has poor accuracy with AVX2, and more so with AVX512.
|
||||
// So until it's fixed, it won't be dispatched with AVX512. GH issue 59415.
|
||||
// Besides, these kernels are slower with AVX512 than with AVX2.
|
||||
REGISTER_DISPATCH(nansum_stub, &nansum_kernel_impl);
|
||||
REGISTER_DISPATCH(sum_stub, &sum_kernel_impl);
|
||||
REGISTER_DISPATCH(nansum_stub, &nansum_kernel_impl)
|
||||
REGISTER_DISPATCH(sum_stub, &sum_kernel_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -400,17 +400,17 @@ static void clamp_min_scalar_kernel_impl(TensorIteratorBase& iter, Scalar min_)
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(max_stub, &max_kernel_impl);
|
||||
REGISTER_DISPATCH(min_stub, &min_kernel_impl);
|
||||
REGISTER_DISPATCH(aminmax_stub, &aminmax_kernel);
|
||||
REGISTER_DISPATCH(where_kernel, &where_kernel_impl);
|
||||
REGISTER_DISPATCH(isposinf_stub, &isposinf_kernel_impl);
|
||||
REGISTER_DISPATCH(isneginf_stub, &isneginf_kernel_impl);
|
||||
REGISTER_DISPATCH(mode_stub, &mode_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_stub, &clamp_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_scalar_stub, &clamp_scalar_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_min_scalar_stub, &clamp_min_scalar_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_max_scalar_stub, &clamp_max_scalar_kernel_impl);
|
||||
REGISTER_DISPATCH(isin_default_stub, &isin_default_kernel_cpu);
|
||||
REGISTER_DISPATCH(max_stub, &max_kernel_impl)
|
||||
REGISTER_DISPATCH(min_stub, &min_kernel_impl)
|
||||
REGISTER_DISPATCH(aminmax_stub, &aminmax_kernel)
|
||||
REGISTER_DISPATCH(where_kernel, &where_kernel_impl)
|
||||
REGISTER_DISPATCH(isposinf_stub, &isposinf_kernel_impl)
|
||||
REGISTER_DISPATCH(isneginf_stub, &isneginf_kernel_impl)
|
||||
REGISTER_DISPATCH(mode_stub, &mode_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_stub, &clamp_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_scalar_stub, &clamp_scalar_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_min_scalar_stub, &clamp_min_scalar_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_max_scalar_stub, &clamp_max_scalar_kernel_impl)
|
||||
REGISTER_DISPATCH(isin_default_stub, &isin_default_kernel_cpu)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -813,23 +813,23 @@ static void modified_bessel_k1_kernel(TensorIteratorBase& iterator) {
|
||||
} // CPU_CAPABILITY namespace
|
||||
|
||||
// The following kernels are slower with AVX512
|
||||
REGISTER_DISPATCH(round_decimals_stub, &CPU_CAPABILITY::round_decimals_kernel);
|
||||
REGISTER_DISPATCH(abs_stub, &CPU_CAPABILITY::abs_kernel);
|
||||
REGISTER_DISPATCH(angle_stub, &CPU_CAPABILITY::angle_kernel);
|
||||
REGISTER_DISPATCH(neg_stub, &CPU_CAPABILITY::neg_kernel);
|
||||
REGISTER_DISPATCH(signbit_stub, &CPU_CAPABILITY::signbit_kernel);
|
||||
REGISTER_DISPATCH(sinc_stub, &CPU_CAPABILITY::sinc_kernel);
|
||||
REGISTER_DISPATCH(bitwise_not_stub, &CPU_CAPABILITY::bitwise_not_kernel);
|
||||
REGISTER_DISPATCH(logical_not_stub, &CPU_CAPABILITY::logical_not_kernel);
|
||||
REGISTER_DISPATCH(nan_to_num_stub, &CPU_CAPABILITY::nan_to_num_kernel);
|
||||
REGISTER_DISPATCH(conj_physical_stub, &CPU_CAPABILITY::conj_kernel);
|
||||
REGISTER_DISPATCH(rsqrt_stub, &CPU_CAPABILITY::rsqrt_kernel);
|
||||
REGISTER_DISPATCH(frac_stub, &CPU_CAPABILITY::frac_kernel);
|
||||
REGISTER_DISPATCH(special_entr_stub, &CPU_CAPABILITY::entr_kernel);
|
||||
REGISTER_DISPATCH(special_i0e_stub, &CPU_CAPABILITY::i0e_kernel);
|
||||
REGISTER_DISPATCH(special_ndtri_stub, &CPU_CAPABILITY::ndtri_kernel);
|
||||
REGISTER_DISPATCH(special_modified_bessel_k0_stub, &CPU_CAPABILITY::modified_bessel_k0_kernel);
|
||||
REGISTER_DISPATCH(special_modified_bessel_k1_stub, &CPU_CAPABILITY::modified_bessel_k1_kernel);
|
||||
REGISTER_DISPATCH(round_decimals_stub, &CPU_CAPABILITY::round_decimals_kernel)
|
||||
REGISTER_DISPATCH(abs_stub, &CPU_CAPABILITY::abs_kernel)
|
||||
REGISTER_DISPATCH(angle_stub, &CPU_CAPABILITY::angle_kernel)
|
||||
REGISTER_DISPATCH(neg_stub, &CPU_CAPABILITY::neg_kernel)
|
||||
REGISTER_DISPATCH(signbit_stub, &CPU_CAPABILITY::signbit_kernel)
|
||||
REGISTER_DISPATCH(sinc_stub, &CPU_CAPABILITY::sinc_kernel)
|
||||
REGISTER_DISPATCH(bitwise_not_stub, &CPU_CAPABILITY::bitwise_not_kernel)
|
||||
REGISTER_DISPATCH(logical_not_stub, &CPU_CAPABILITY::logical_not_kernel)
|
||||
REGISTER_DISPATCH(nan_to_num_stub, &CPU_CAPABILITY::nan_to_num_kernel)
|
||||
REGISTER_DISPATCH(conj_physical_stub, &CPU_CAPABILITY::conj_kernel)
|
||||
REGISTER_DISPATCH(rsqrt_stub, &CPU_CAPABILITY::rsqrt_kernel)
|
||||
REGISTER_DISPATCH(frac_stub, &CPU_CAPABILITY::frac_kernel)
|
||||
REGISTER_DISPATCH(special_entr_stub, &CPU_CAPABILITY::entr_kernel)
|
||||
REGISTER_DISPATCH(special_i0e_stub, &CPU_CAPABILITY::i0e_kernel)
|
||||
REGISTER_DISPATCH(special_ndtri_stub, &CPU_CAPABILITY::ndtri_kernel)
|
||||
REGISTER_DISPATCH(special_modified_bessel_k0_stub, &CPU_CAPABILITY::modified_bessel_k0_kernel)
|
||||
REGISTER_DISPATCH(special_modified_bessel_k1_stub, &CPU_CAPABILITY::modified_bessel_k1_kernel)
|
||||
IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(ceil);
|
||||
IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(floor);
|
||||
IMPLEMENT_FLOAT_KERNEL_WITHOUT_AVX512(round);
|
||||
@ -853,23 +853,23 @@ ALSO_REGISTER_AVX512_DISPATCH(cosh_stub, &CPU_CAPABILITY::cosh_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(atanh_stub, &CPU_CAPABILITY::atanh_kernel);
|
||||
|
||||
// Might enable AVX512 dispatch after enabling explicit vectorization for them
|
||||
REGISTER_DISPATCH(acosh_stub, &CPU_CAPABILITY::acosh_kernel);
|
||||
REGISTER_DISPATCH(asinh_stub, &CPU_CAPABILITY::asinh_kernel);
|
||||
REGISTER_DISPATCH(digamma_stub, &CPU_CAPABILITY::digamma_kernel);
|
||||
REGISTER_DISPATCH(trigamma_stub, &CPU_CAPABILITY::trigamma_kernel);
|
||||
REGISTER_DISPATCH(polygamma_stub, &CPU_CAPABILITY::polygamma_kernel);
|
||||
REGISTER_DISPATCH(kaiser_window_stub, &CPU_CAPABILITY::kaiser_window_kernel);
|
||||
REGISTER_DISPATCH(frexp_stub, &CPU_CAPABILITY::frexp_kernel);
|
||||
REGISTER_DISPATCH(special_log_ndtr_stub, &CPU_CAPABILITY::log_ndtr_kernel);
|
||||
REGISTER_DISPATCH(special_i1_stub, &CPU_CAPABILITY::i1_kernel);
|
||||
REGISTER_DISPATCH(special_i1e_stub, &CPU_CAPABILITY::i1e_kernel);
|
||||
REGISTER_DISPATCH(special_erfcx_stub, &CPU_CAPABILITY::erfcx_kernel);
|
||||
REGISTER_DISPATCH(special_bessel_j0_stub, &CPU_CAPABILITY::bessel_j0_kernel);
|
||||
REGISTER_DISPATCH(special_bessel_j1_stub, &CPU_CAPABILITY::bessel_j1_kernel);
|
||||
REGISTER_DISPATCH(special_bessel_y0_stub, &CPU_CAPABILITY::bessel_y0_kernel);
|
||||
REGISTER_DISPATCH(special_bessel_y1_stub, &CPU_CAPABILITY::bessel_y1_kernel);
|
||||
REGISTER_DISPATCH(special_modified_bessel_i0_stub, &CPU_CAPABILITY::modified_bessel_i0_kernel);
|
||||
REGISTER_DISPATCH(special_modified_bessel_i1_stub, &CPU_CAPABILITY::modified_bessel_i1_kernel);
|
||||
REGISTER_DISPATCH(acosh_stub, &CPU_CAPABILITY::acosh_kernel)
|
||||
REGISTER_DISPATCH(asinh_stub, &CPU_CAPABILITY::asinh_kernel)
|
||||
REGISTER_DISPATCH(digamma_stub, &CPU_CAPABILITY::digamma_kernel)
|
||||
REGISTER_DISPATCH(trigamma_stub, &CPU_CAPABILITY::trigamma_kernel)
|
||||
REGISTER_DISPATCH(polygamma_stub, &CPU_CAPABILITY::polygamma_kernel)
|
||||
REGISTER_DISPATCH(kaiser_window_stub, &CPU_CAPABILITY::kaiser_window_kernel)
|
||||
REGISTER_DISPATCH(frexp_stub, &CPU_CAPABILITY::frexp_kernel)
|
||||
REGISTER_DISPATCH(special_log_ndtr_stub, &CPU_CAPABILITY::log_ndtr_kernel)
|
||||
REGISTER_DISPATCH(special_i1_stub, &CPU_CAPABILITY::i1_kernel)
|
||||
REGISTER_DISPATCH(special_i1e_stub, &CPU_CAPABILITY::i1e_kernel)
|
||||
REGISTER_DISPATCH(special_erfcx_stub, &CPU_CAPABILITY::erfcx_kernel)
|
||||
REGISTER_DISPATCH(special_bessel_j0_stub, &CPU_CAPABILITY::bessel_j0_kernel)
|
||||
REGISTER_DISPATCH(special_bessel_j1_stub, &CPU_CAPABILITY::bessel_j1_kernel)
|
||||
REGISTER_DISPATCH(special_bessel_y0_stub, &CPU_CAPABILITY::bessel_y0_kernel)
|
||||
REGISTER_DISPATCH(special_bessel_y1_stub, &CPU_CAPABILITY::bessel_y1_kernel)
|
||||
REGISTER_DISPATCH(special_modified_bessel_i0_stub, &CPU_CAPABILITY::modified_bessel_i0_kernel)
|
||||
REGISTER_DISPATCH(special_modified_bessel_i1_stub, &CPU_CAPABILITY::modified_bessel_i1_kernel)
|
||||
|
||||
STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(acos);
|
||||
STATIC_IMPLEMENT_COMPLEX_KERNEL_WITH_AVX512(asin);
|
||||
|
@ -446,7 +446,7 @@ void unfolded2d_copy_kernel(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(unfolded2d_copy_stub, &unfolded2d_copy_kernel);
|
||||
REGISTER_DISPATCH(unfolded2d_acc_stub, &unfolded2d_acc_kernel);
|
||||
REGISTER_DISPATCH(unfolded2d_copy_stub, &unfolded2d_copy_kernel)
|
||||
REGISTER_DISPATCH(unfolded2d_acc_stub, &unfolded2d_acc_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -147,6 +147,6 @@ void unfold_backward_cpu_kernel(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(unfold_backward_stub, &unfold_backward_cpu_kernel);
|
||||
REGISTER_DISPATCH(unfold_backward_stub, &unfold_backward_cpu_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -2060,20 +2060,20 @@ void upsample_bicubic2d_aa_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(upsample_nearest1d_kernel, &upsample_nearest1d_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact1d_kernel, &_upsample_nearest_exact1d_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_nearest2d_kernel, &upsample_nearest2d_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact2d_kernel, &_upsample_nearest_exact2d_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_nearest3d_kernel, &upsample_nearest3d_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact3d_kernel, &_upsample_nearest_exact3d_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_nearest1d_kernel, &upsample_nearest1d_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact1d_kernel, &_upsample_nearest_exact1d_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_nearest2d_kernel, &upsample_nearest2d_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact2d_kernel, &_upsample_nearest_exact2d_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_nearest3d_kernel, &upsample_nearest3d_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact3d_kernel, &_upsample_nearest_exact3d_kernel_impl)
|
||||
|
||||
REGISTER_DISPATCH(upsample_linear1d_kernel, &upsample_linear1d_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_bilinear2d_kernel, &upsample_bilinear2d_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_bilinear2d_aa_kernel, &upsample_bilinear2d_aa_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_bilinear2d_aa_backward_kernel, &upsample_bilinear2d_aa_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_trilinear3d_kernel, &upsample_trilinear3d_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_linear1d_kernel, &upsample_linear1d_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_bilinear2d_kernel, &upsample_bilinear2d_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_bilinear2d_aa_kernel, &upsample_bilinear2d_aa_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_bilinear2d_aa_backward_kernel, &upsample_bilinear2d_aa_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_trilinear3d_kernel, &upsample_trilinear3d_kernel_impl)
|
||||
|
||||
REGISTER_DISPATCH(upsample_bicubic2d_kernel, &upsample_bicubic2d_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_bicubic2d_aa_kernel, &upsample_bicubic2d_aa_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_bicubic2d_aa_backward_kernel, &upsample_bicubic2d_aa_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_bicubic2d_kernel, &upsample_bicubic2d_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_bicubic2d_aa_kernel, &upsample_bicubic2d_aa_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_bicubic2d_aa_backward_kernel, &upsample_bicubic2d_aa_backward_kernel_impl)
|
||||
} // namespace at::native
|
||||
|
@ -788,15 +788,15 @@ void upsample_trilinear3d_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(upsample_nearest1d_backward_kernel, &upsample_nearest1d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact1d_backward_kernel, &_upsample_nearest_exact1d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_nearest2d_backward_kernel, &upsample_nearest2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact2d_backward_kernel, &_upsample_nearest_exact2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_nearest3d_backward_kernel, &upsample_nearest3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact3d_backward_kernel, &_upsample_nearest_exact3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_nearest1d_backward_kernel, &upsample_nearest1d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact1d_backward_kernel, &_upsample_nearest_exact1d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_nearest2d_backward_kernel, &upsample_nearest2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact2d_backward_kernel, &_upsample_nearest_exact2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_nearest3d_backward_kernel, &upsample_nearest3d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(_upsample_nearest_exact3d_backward_kernel, &_upsample_nearest_exact3d_backward_kernel_impl)
|
||||
|
||||
REGISTER_DISPATCH(upsample_linear1d_backward_kernel, &upsample_linear1d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_bilinear2d_backward_kernel, &upsample_bilinear2d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_trilinear3d_backward_kernel, &upsample_trilinear3d_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(upsample_linear1d_backward_kernel, &upsample_linear1d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_bilinear2d_backward_kernel, &upsample_bilinear2d_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(upsample_trilinear3d_backward_kernel, &upsample_trilinear3d_backward_kernel_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -437,7 +437,7 @@ void weight_norm_backward_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(weight_norm_stub, &weight_norm_kernel);
|
||||
REGISTER_DISPATCH(weight_norm_backward_stub, &weight_norm_backward_kernel);
|
||||
REGISTER_DISPATCH(weight_norm_stub, &weight_norm_kernel)
|
||||
REGISTER_DISPATCH(weight_norm_backward_stub, &weight_norm_backward_kernel)
|
||||
|
||||
} // at::native
|
||||
|
@ -20,5 +20,5 @@ static void airy_ai_kernel(TensorIteratorBase& iterator) {
|
||||
} // airy_ai_kernel(TensorIteratorBase& iterator)
|
||||
} // namespace CPU_CAPABILITY
|
||||
|
||||
REGISTER_DISPATCH(special_airy_ai_stub, &CPU_CAPABILITY::airy_ai_kernel);
|
||||
REGISTER_DISPATCH(special_airy_ai_stub, &CPU_CAPABILITY::airy_ai_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -1396,8 +1396,8 @@ void batch_norm_cpu_backward_kernel(Tensor& grad_input, Tensor& grad_weight, Ten
|
||||
|
||||
}// anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(batch_norm_cpu_stub, &batch_norm_cpu_kernel);
|
||||
REGISTER_DISPATCH(batch_norm_cpu_collect_stats_stub, &batch_norm_cpu_collect_stats_kernel);
|
||||
REGISTER_DISPATCH(batch_norm_cpu_backward_stub, &batch_norm_cpu_backward_kernel);
|
||||
REGISTER_DISPATCH(batch_norm_cpu_stub, &batch_norm_cpu_kernel)
|
||||
REGISTER_DISPATCH(batch_norm_cpu_collect_stats_stub, &batch_norm_cpu_collect_stats_kernel)
|
||||
REGISTER_DISPATCH(batch_norm_cpu_backward_stub, &batch_norm_cpu_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -1584,7 +1584,7 @@ void GroupNormBackwardKernelImpl(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(GroupNormKernel, &GroupNormKernelImpl);
|
||||
REGISTER_DISPATCH(GroupNormBackwardKernel, &GroupNormBackwardKernelImpl);
|
||||
REGISTER_DISPATCH(GroupNormKernel, &GroupNormKernelImpl)
|
||||
REGISTER_DISPATCH(GroupNormBackwardKernel, &GroupNormBackwardKernelImpl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -609,7 +609,7 @@ void LayerNormBackwardKernelImpl(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(LayerNormKernel, &LayerNormKernelImpl);
|
||||
REGISTER_DISPATCH(LayerNormBackwardKernel, &LayerNormBackwardKernelImpl);
|
||||
REGISTER_DISPATCH(LayerNormKernel, &LayerNormKernelImpl)
|
||||
REGISTER_DISPATCH(LayerNormBackwardKernel, &LayerNormBackwardKernelImpl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -20,5 +20,5 @@ inline namespace CPU_CAPABILITY {
|
||||
} // scaled_modified_bessel_k0_kernel(TensorIteratorBase& iterator)
|
||||
} // namespace CPU_CAPABILITY
|
||||
|
||||
REGISTER_DISPATCH(special_scaled_modified_bessel_k0_stub, &CPU_CAPABILITY::scaled_modified_bessel_k0_kernel);
|
||||
REGISTER_DISPATCH(special_scaled_modified_bessel_k0_stub, &CPU_CAPABILITY::scaled_modified_bessel_k0_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -20,5 +20,5 @@ inline namespace CPU_CAPABILITY {
|
||||
} // scaled_modified_bessel_k1_kernel(TensorIteratorBase& iterator)
|
||||
} // namespace CPU_CAPABILITY
|
||||
|
||||
REGISTER_DISPATCH(special_scaled_modified_bessel_k1_stub, &CPU_CAPABILITY::scaled_modified_bessel_k1_kernel);
|
||||
REGISTER_DISPATCH(special_scaled_modified_bessel_k1_stub, &CPU_CAPABILITY::scaled_modified_bessel_k1_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -20,5 +20,5 @@ inline namespace CPU_CAPABILITY {
|
||||
} // spherical_bessel_j0_kernel(TensorIteratorBase& iterator)
|
||||
} // namespace CPU_CAPABILITY
|
||||
|
||||
REGISTER_DISPATCH(special_spherical_bessel_j0_stub, &CPU_CAPABILITY::spherical_bessel_j0_kernel);
|
||||
REGISTER_DISPATCH(special_spherical_bessel_j0_stub, &CPU_CAPABILITY::spherical_bessel_j0_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -559,7 +559,7 @@ void lstm_mkldnn(Tensor& output, Tensor& hy, Tensor& cy,
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_ALL_CPU_DISPATCH(lstm_mkldnn_stub, &lstm_mkldnn);
|
||||
REGISTER_ALL_CPU_DISPATCH(lstm_mkldnn_stub, &lstm_mkldnn)
|
||||
|
||||
} // namespace at::native
|
||||
|
||||
|
@ -83,7 +83,7 @@ const std::map<c10::string_view, ideep::algorithm>& fusion_unary_alg_map();
|
||||
const std::map<c10::string_view, ideep::algorithm>& fusion_binary_alg_map();
|
||||
|
||||
#endif // AT_MKLDNN_ENABLED()
|
||||
};
|
||||
}
|
||||
|
||||
#if defined(__aarch64__)
|
||||
inline bool mkldnn_bf16_device_check_arm() {
|
||||
|
@ -87,31 +87,31 @@ using dequantize_tensor_per_tensor_affine_sub_byte_fn =
|
||||
|
||||
DECLARE_DISPATCH(
|
||||
quantize_tensor_per_tensor_affine_fn,
|
||||
quantize_tensor_per_tensor_affine_stub);
|
||||
quantize_tensor_per_tensor_affine_stub)
|
||||
DECLARE_DISPATCH(
|
||||
quantize_tensor_per_channel_affine_fn,
|
||||
quantize_tensor_per_channel_affine_stub);
|
||||
quantize_tensor_per_channel_affine_stub)
|
||||
DECLARE_DISPATCH(
|
||||
quantize_tensor_per_channel_float_qparams_fn,
|
||||
quantize_tensor_per_channel_float_qparams_stub);
|
||||
quantize_tensor_per_channel_float_qparams_stub)
|
||||
|
||||
DECLARE_DISPATCH(
|
||||
dequantize_tensor_per_tensor_affine_fn,
|
||||
dequantize_tensor_per_tensor_affine_stub);
|
||||
dequantize_tensor_per_tensor_affine_stub)
|
||||
DECLARE_DISPATCH(
|
||||
dequantize_tensor_per_channel_affine_fn,
|
||||
dequantize_tensor_per_channel_affine_stub);
|
||||
dequantize_tensor_per_channel_affine_stub)
|
||||
DECLARE_DISPATCH(
|
||||
dequantize_tensor_per_channel_float_qparams_fn,
|
||||
dequantize_tensor_per_channel_float_qparams_stub);
|
||||
dequantize_tensor_per_channel_float_qparams_stub)
|
||||
|
||||
DECLARE_DISPATCH(
|
||||
quantize_tensor_per_tensor_affine_sub_byte_fn,
|
||||
quantize_tensor_per_tensor_affine_sub_byte_stub);
|
||||
quantize_tensor_per_tensor_affine_sub_byte_stub)
|
||||
|
||||
DECLARE_DISPATCH(
|
||||
dequantize_tensor_per_tensor_affine_sub_byte_fn,
|
||||
dequantize_tensor_per_tensor_affine_sub_byte_stub);
|
||||
dequantize_tensor_per_tensor_affine_sub_byte_stub)
|
||||
|
||||
template <typename T>
|
||||
TORCH_API Tensor quantize_tensor(
|
||||
|
@ -4242,8 +4242,8 @@ REGISTER_DISPATCH(dequantize_tensor_per_channel_float_qparams_stub,
|
||||
&dequantize_tensor_per_channel_float_qparams_cpu);
|
||||
REGISTER_DISPATCH(fake_quant_per_channel_cachemask_stub,
|
||||
&fake_quant_per_channel_cachemask_cpu);
|
||||
REGISTER_DISPATCH(qavg_pool2d_nhwc_stub, &qavg_pool2d_nhwc_kernel);
|
||||
REGISTER_DISPATCH(qavg_pool3d_nhwc_stub, &qavg_pool3d_nhwc_kernel);
|
||||
REGISTER_DISPATCH(qavg_pool2d_nhwc_stub, &qavg_pool2d_nhwc_kernel)
|
||||
REGISTER_DISPATCH(qavg_pool3d_nhwc_stub, &qavg_pool3d_nhwc_kernel)
|
||||
#else
|
||||
// These kernels are dispatched to AVX512
|
||||
ALSO_REGISTER_AVX512_DISPATCH(dequantize_tensor_per_channel_affine_stub,
|
||||
@ -4270,33 +4270,33 @@ REGISTER_DISPATCH(qadaptive_avg_pool2d_nhwc_stub,
|
||||
&qadaptive_avg_pool2d_nhwc_kernel);
|
||||
REGISTER_DISPATCH(qadaptive_avg_pool3d_ndhwc_stub,
|
||||
&qadaptive_avg_pool3d_ndhwc_kernel);
|
||||
REGISTER_DISPATCH(qadd_relu_stub, &qadd_kernel<true>);
|
||||
REGISTER_DISPATCH(qadd_scalar_relu_stub, &qadd_scalar_kernel<true>);
|
||||
REGISTER_DISPATCH(qadd_scalar_stub, &qadd_scalar_kernel<false>);
|
||||
REGISTER_DISPATCH(qadd_stub, &qadd_kernel<false>);
|
||||
REGISTER_DISPATCH(qadd_relu_stub, &qadd_kernel<true>)
|
||||
REGISTER_DISPATCH(qadd_scalar_relu_stub, &qadd_scalar_kernel<true>)
|
||||
REGISTER_DISPATCH(qadd_scalar_stub, &qadd_scalar_kernel<false>)
|
||||
REGISTER_DISPATCH(qadd_stub, &qadd_kernel<false>)
|
||||
|
||||
REGISTER_DISPATCH(qbatch_norm_relu_stub, &q_batch_norm_kernel<true>);
|
||||
REGISTER_DISPATCH(qbatch_norm_stub, &q_batch_norm_kernel<false>);
|
||||
REGISTER_DISPATCH(qcat_nhwc_stub, &qcat_nhwc_kernel<false>);
|
||||
REGISTER_DISPATCH(qcat_relu_nhwc_stub, &qcat_nhwc_kernel<true>);
|
||||
REGISTER_DISPATCH(qclamp_stub, &qclamp_kernel);
|
||||
REGISTER_DISPATCH(qclamp_min_stub, &qclamp_min_kernel);
|
||||
REGISTER_DISPATCH(qclamp_max_stub, &qclamp_max_kernel);
|
||||
REGISTER_DISPATCH(qelu_stub, &qelu_kernel);
|
||||
REGISTER_DISPATCH(qhardsigmoid_stub, &qhardsigmoid_kernel);
|
||||
REGISTER_DISPATCH(qhardswish_stub, &qhardswish_kernel);
|
||||
REGISTER_DISPATCH(qmaxpool_2d_nhwc_stub, &qmaxpool_2d_nhwc_kernel);
|
||||
REGISTER_DISPATCH(qmaxpool_3d_nthwc_stub, &qmaxpool_3d_nthwc_kernel);
|
||||
REGISTER_DISPATCH(qmul_relu_stub, &qmul_kernel<true>);
|
||||
REGISTER_DISPATCH(qmul_stub, &qmul_kernel<false>);
|
||||
REGISTER_DISPATCH(qrelu_leaky_stub, &leaky_qrelu_out_kernel);
|
||||
REGISTER_DISPATCH(qrelu_stub, &qrelu_kernel);
|
||||
REGISTER_DISPATCH(qprelu_stub, &qprelu_out_kernel);
|
||||
REGISTER_DISPATCH(qgelu_stub, &qgelu_kernel);
|
||||
REGISTER_DISPATCH(qsigmoid_stub, &qsigmoid_kernel);
|
||||
REGISTER_DISPATCH(qtanh_stub, &qtanh_kernel);
|
||||
REGISTER_DISPATCH(qthreshold_stub, &qthreshold_kernel);
|
||||
REGISTER_DISPATCH(qtopk_stub, &qtopk_kernel);
|
||||
REGISTER_DISPATCH(qbatch_norm_relu_stub, &q_batch_norm_kernel<true>)
|
||||
REGISTER_DISPATCH(qbatch_norm_stub, &q_batch_norm_kernel<false>)
|
||||
REGISTER_DISPATCH(qcat_nhwc_stub, &qcat_nhwc_kernel<false>)
|
||||
REGISTER_DISPATCH(qcat_relu_nhwc_stub, &qcat_nhwc_kernel<true>)
|
||||
REGISTER_DISPATCH(qclamp_stub, &qclamp_kernel)
|
||||
REGISTER_DISPATCH(qclamp_min_stub, &qclamp_min_kernel)
|
||||
REGISTER_DISPATCH(qclamp_max_stub, &qclamp_max_kernel)
|
||||
REGISTER_DISPATCH(qelu_stub, &qelu_kernel)
|
||||
REGISTER_DISPATCH(qhardsigmoid_stub, &qhardsigmoid_kernel)
|
||||
REGISTER_DISPATCH(qhardswish_stub, &qhardswish_kernel)
|
||||
REGISTER_DISPATCH(qmaxpool_2d_nhwc_stub, &qmaxpool_2d_nhwc_kernel)
|
||||
REGISTER_DISPATCH(qmaxpool_3d_nthwc_stub, &qmaxpool_3d_nthwc_kernel)
|
||||
REGISTER_DISPATCH(qmul_relu_stub, &qmul_kernel<true>)
|
||||
REGISTER_DISPATCH(qmul_stub, &qmul_kernel<false>)
|
||||
REGISTER_DISPATCH(qrelu_leaky_stub, &leaky_qrelu_out_kernel)
|
||||
REGISTER_DISPATCH(qrelu_stub, &qrelu_kernel)
|
||||
REGISTER_DISPATCH(qprelu_stub, &qprelu_out_kernel)
|
||||
REGISTER_DISPATCH(qgelu_stub, &qgelu_kernel)
|
||||
REGISTER_DISPATCH(qsigmoid_stub, &qsigmoid_kernel)
|
||||
REGISTER_DISPATCH(qtanh_stub, &qtanh_kernel)
|
||||
REGISTER_DISPATCH(qthreshold_stub, &qthreshold_kernel)
|
||||
REGISTER_DISPATCH(qtopk_stub, &qtopk_kernel)
|
||||
REGISTER_DISPATCH(fake_quant_grad_learnable_channel_stub,
|
||||
&fake_quantize_learnable_channel_grad_kernel_cpu);
|
||||
REGISTER_DISPATCH(
|
||||
@ -4308,8 +4308,8 @@ REGISTER_DISPATCH(
|
||||
REGISTER_DISPATCH(
|
||||
quantize_tensor_per_channel_float_qparams_stub,
|
||||
&quantize_tensor_per_channel_float_qparams_cpu);
|
||||
REGISTER_DISPATCH(quantized_normalize_stub, &quantized_normalize_kernel);
|
||||
REGISTER_DISPATCH(quantized_groupnorm_nhwc_stub, &quantized_groupnorm_nhwc_kernel);
|
||||
REGISTER_DISPATCH(quantized_normalize_stub, &quantized_normalize_kernel)
|
||||
REGISTER_DISPATCH(quantized_groupnorm_nhwc_stub, &quantized_groupnorm_nhwc_kernel)
|
||||
REGISTER_DISPATCH(qupsample_bilinear2d_nhwc_stub,
|
||||
&qupsample_bilinear2d_nhwc_kernel);
|
||||
REGISTER_DISPATCH(
|
||||
@ -4324,7 +4324,7 @@ REGISTER_DISPATCH(
|
||||
REGISTER_DISPATCH(
|
||||
index_put_kernel_quantized_stub,
|
||||
&index_put_kernel_quantized_cpu);
|
||||
REGISTER_DISPATCH(qmean_inner_dim_stub, &qmean_inner_dim_kernel);
|
||||
REGISTER_DISPATCH(qstd_inner_dim_stub, &qstd_inner_dim_kernel);
|
||||
REGISTER_DISPATCH(qmean_inner_dim_stub, &qmean_inner_dim_kernel)
|
||||
REGISTER_DISPATCH(qstd_inner_dim_stub, &qstd_inner_dim_kernel)
|
||||
} // namespace at::native
|
||||
// NOLINTEND(*-c-arrays)
|
||||
|
@ -19,17 +19,17 @@
|
||||
CLOG_DEFINE_LOG_DEBUG(
|
||||
pytorch_qnnp_log_debug,
|
||||
"QNNPACK",
|
||||
PYTORCH_QNNP_LOG_LEVEL);
|
||||
CLOG_DEFINE_LOG_INFO(pytorch_qnnp_log_info, "QNNPACK", PYTORCH_QNNP_LOG_LEVEL);
|
||||
PYTORCH_QNNP_LOG_LEVEL)
|
||||
CLOG_DEFINE_LOG_INFO(pytorch_qnnp_log_info, "QNNPACK", PYTORCH_QNNP_LOG_LEVEL)
|
||||
CLOG_DEFINE_LOG_WARNING(
|
||||
pytorch_qnnp_log_warning,
|
||||
"QNNPACK",
|
||||
PYTORCH_QNNP_LOG_LEVEL);
|
||||
PYTORCH_QNNP_LOG_LEVEL)
|
||||
CLOG_DEFINE_LOG_ERROR(
|
||||
pytorch_qnnp_log_error,
|
||||
"QNNPACK",
|
||||
PYTORCH_QNNP_LOG_LEVEL);
|
||||
PYTORCH_QNNP_LOG_LEVEL)
|
||||
CLOG_DEFINE_LOG_FATAL(
|
||||
pytorch_qnnp_log_fatal,
|
||||
"QNNPACK",
|
||||
PYTORCH_QNNP_LOG_LEVEL);
|
||||
PYTORCH_QNNP_LOG_LEVEL)
|
||||
|
@ -115,9 +115,9 @@ void _fake_quantize_grad_learnable_tensor_kernel_cuda(
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fake_quant_tensor_cachemask_stub, &fake_quantize_tensor_cachemask_kernel_cuda);
|
||||
REGISTER_DISPATCH(fake_quant_tensor_cachemask_tensor_qparams_stub, &fake_quantize_tensor_cachemask_tensor_qparams_kernel_cuda);
|
||||
REGISTER_DISPATCH(fake_quant_grad_learnable_tensor_stub, &_fake_quantize_grad_learnable_tensor_kernel_cuda);
|
||||
REGISTER_DISPATCH(fake_quant_tensor_cachemask_stub, &fake_quantize_tensor_cachemask_kernel_cuda)
|
||||
REGISTER_DISPATCH(fake_quant_tensor_cachemask_tensor_qparams_stub, &fake_quantize_tensor_cachemask_tensor_qparams_kernel_cuda)
|
||||
REGISTER_DISPATCH(fake_quant_grad_learnable_tensor_stub, &_fake_quantize_grad_learnable_tensor_kernel_cuda)
|
||||
|
||||
// Fake quantize per channel
|
||||
|
||||
@ -209,7 +209,7 @@ void _fake_quantize_grad_learnable_channel_kernel_cuda(TensorIterator &iter, int
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fake_quant_per_channel_cachemask_stub, &fake_quant_per_channel_cachemask_cuda);
|
||||
REGISTER_DISPATCH(fake_quant_grad_learnable_channel_stub, &_fake_quantize_grad_learnable_channel_kernel_cuda);
|
||||
REGISTER_DISPATCH(fake_quant_per_channel_cachemask_stub, &fake_quant_per_channel_cachemask_cuda)
|
||||
REGISTER_DISPATCH(fake_quant_grad_learnable_channel_stub, &_fake_quantize_grad_learnable_channel_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -167,42 +167,42 @@ Tensor& coalesced_unary_ufunc_out(const Tensor &self, Tensor &result, const Ufun
|
||||
}); \
|
||||
}
|
||||
|
||||
COALESCED_UNARY_UFUNC(abs);
|
||||
COALESCED_UNARY_UFUNC(asin);
|
||||
COALESCED_UNARY_UFUNC(asinh);
|
||||
COALESCED_UNARY_UFUNC(atan);
|
||||
COALESCED_UNARY_UFUNC(atanh);
|
||||
COALESCED_UNARY_UFUNC(ceil);
|
||||
COALESCED_UNARY_UFUNC(deg2rad);
|
||||
COALESCED_UNARY_UFUNC(erf);
|
||||
COALESCED_UNARY_UFUNC(erfinv);
|
||||
COALESCED_UNARY_UFUNC(expm1);
|
||||
COALESCED_UNARY_UFUNC(floor);
|
||||
COALESCED_UNARY_UFUNC(frac);
|
||||
COALESCED_UNARY_UFUNC(log1p);
|
||||
COALESCED_UNARY_UFUNC(round);
|
||||
COALESCED_UNARY_UFUNC(rad2deg);
|
||||
COALESCED_UNARY_UFUNC(sign);
|
||||
COALESCED_UNARY_UFUNC(sgn);
|
||||
COALESCED_UNARY_UFUNC(sin);
|
||||
COALESCED_UNARY_UFUNC(sinh);
|
||||
COALESCED_UNARY_UFUNC(sqrt);
|
||||
COALESCED_UNARY_UFUNC(tan);
|
||||
COALESCED_UNARY_UFUNC(tanh);
|
||||
COALESCED_UNARY_UFUNC(trunc);
|
||||
COALESCED_UNARY_UFUNC(abs)
|
||||
COALESCED_UNARY_UFUNC(asin)
|
||||
COALESCED_UNARY_UFUNC(asinh)
|
||||
COALESCED_UNARY_UFUNC(atan)
|
||||
COALESCED_UNARY_UFUNC(atanh)
|
||||
COALESCED_UNARY_UFUNC(ceil)
|
||||
COALESCED_UNARY_UFUNC(deg2rad)
|
||||
COALESCED_UNARY_UFUNC(erf)
|
||||
COALESCED_UNARY_UFUNC(erfinv)
|
||||
COALESCED_UNARY_UFUNC(expm1)
|
||||
COALESCED_UNARY_UFUNC(floor)
|
||||
COALESCED_UNARY_UFUNC(frac)
|
||||
COALESCED_UNARY_UFUNC(log1p)
|
||||
COALESCED_UNARY_UFUNC(round)
|
||||
COALESCED_UNARY_UFUNC(rad2deg)
|
||||
COALESCED_UNARY_UFUNC(sign)
|
||||
COALESCED_UNARY_UFUNC(sgn)
|
||||
COALESCED_UNARY_UFUNC(sin)
|
||||
COALESCED_UNARY_UFUNC(sinh)
|
||||
COALESCED_UNARY_UFUNC(sqrt)
|
||||
COALESCED_UNARY_UFUNC(tan)
|
||||
COALESCED_UNARY_UFUNC(tanh)
|
||||
COALESCED_UNARY_UFUNC(trunc)
|
||||
// relu function has no declaration, it may be unused in Pytorch.
|
||||
// But we keep it and ignore the warning here until verified in the future.
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
COALESCED_UNARY_UFUNC(relu);
|
||||
COALESCED_UNARY_UFUNC(relu)
|
||||
#pragma clang diagnostic pop
|
||||
|
||||
COALESCED_UNARY_UFUNC_NO_INPLACE(signbit);
|
||||
COALESCED_UNARY_UFUNC_NO_INPLACE(isneginf);
|
||||
COALESCED_UNARY_UFUNC_NO_INPLACE(isposinf);
|
||||
COALESCED_UNARY_UFUNC_NO_INPLACE(signbit)
|
||||
COALESCED_UNARY_UFUNC_NO_INPLACE(isneginf)
|
||||
COALESCED_UNARY_UFUNC_NO_INPLACE(isposinf)
|
||||
|
||||
COALESCED_UNARY_UFUNC_FUNCTIONAL(isnan);
|
||||
COALESCED_UNARY_UFUNC_FUNCTIONAL(isinf);
|
||||
COALESCED_UNARY_UFUNC_FUNCTIONAL(isnan)
|
||||
COALESCED_UNARY_UFUNC_FUNCTIONAL(isinf)
|
||||
|
||||
Tensor isinf_sparse_meta(const Tensor& self) {
|
||||
TORCH_CHECK_NOT_IMPLEMENTED(0, "nyi isinf for SparseMeta");
|
||||
|
@ -8,6 +8,6 @@ extern "C" {
|
||||
mz_ulong mz_crc32(mz_ulong crc, const mz_uint8* ptr, size_t buf_len) {
|
||||
auto z = crc32_fast(ptr, buf_len, crc);
|
||||
return z;
|
||||
};
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
@ -806,18 +806,18 @@ class SwapSavedVariables {
|
||||
#define NO_OP_VISIT(T) \
|
||||
void before(const T&) {} \
|
||||
void after(const T&) {}
|
||||
NO_OP_VISIT(caffe2::TypeMeta);
|
||||
NO_OP_VISIT(c10::Device);
|
||||
NO_OP_VISIT(c10::DeviceType);
|
||||
NO_OP_VISIT(c10::Layout);
|
||||
NO_OP_VISIT(c10::MemoryFormat);
|
||||
NO_OP_VISIT(c10::ScalarType);
|
||||
NO_OP_VISIT(c10::Scalar);
|
||||
NO_OP_VISIT(c10::TensorOptions);
|
||||
NO_OP_VISIT(std::string);
|
||||
NO_OP_VISIT(int64_t);
|
||||
NO_OP_VISIT(bool);
|
||||
NO_OP_VISIT(double);
|
||||
NO_OP_VISIT(caffe2::TypeMeta)
|
||||
NO_OP_VISIT(c10::Device)
|
||||
NO_OP_VISIT(c10::DeviceType)
|
||||
NO_OP_VISIT(c10::Layout)
|
||||
NO_OP_VISIT(c10::MemoryFormat)
|
||||
NO_OP_VISIT(c10::ScalarType)
|
||||
NO_OP_VISIT(c10::Scalar)
|
||||
NO_OP_VISIT(c10::TensorOptions)
|
||||
NO_OP_VISIT(std::string)
|
||||
NO_OP_VISIT(int64_t)
|
||||
NO_OP_VISIT(bool)
|
||||
NO_OP_VISIT(double)
|
||||
#undef NO_OP_VISIT
|
||||
|
||||
SwapSavedVariables(
|
||||
|
@ -66,7 +66,7 @@ struct LTCGuardImpl : public c10::impl::DeviceGuardImplInterface {
|
||||
}
|
||||
};
|
||||
|
||||
C10_REGISTER_GUARD_IMPL(Lazy, LTCGuardImpl);
|
||||
C10_REGISTER_GUARD_IMPL(Lazy, LTCGuardImpl)
|
||||
|
||||
} // namespace
|
||||
|
||||
|
Reference in New Issue
Block a user