mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[5/N] Fix Wextra-semi warning (#139465)
Fixes #ISSUE_NUMBER Pull Request resolved: https://github.com/pytorch/pytorch/pull/139465 Approved by: https://github.com/ezyang
This commit is contained in:
@ -45,7 +45,7 @@ std::shared_ptr<TaskThreadPoolBase> create_c10_threadpool(
|
||||
|
||||
} // namespace
|
||||
|
||||
C10_REGISTER_CREATOR(ThreadPoolRegistry, C10, create_c10_threadpool);
|
||||
C10_REGISTER_CREATOR(ThreadPoolRegistry, C10, create_c10_threadpool)
|
||||
|
||||
void set_num_interop_threads(int nthreads) {
|
||||
TORCH_CHECK(nthreads > 0, "Expected positive number of threads");
|
||||
|
@ -1119,8 +1119,8 @@ inline void load_fp32_from_##name(const type *data, Vectorized<float>& out1, Vec
|
||||
out1 = out1_values; \
|
||||
out2 = out2_values; \
|
||||
}
|
||||
LOAD_FP32_VECTORIZED_INIT(BFloat16, bf16);
|
||||
LOAD_FP32_VECTORIZED_INIT(Half, fp16);
|
||||
LOAD_FP32_VECTORIZED_INIT(BFloat16, bf16)
|
||||
LOAD_FP32_VECTORIZED_INIT(Half, fp16)
|
||||
|
||||
#else // defined(CPU_CAPABILITY_AVX2)
|
||||
#define LOAD_FP32_NON_VECTORIZED_INIT(type, name) \
|
||||
|
@ -1707,8 +1707,8 @@ inline void load_fp32_from_##name(const type *data, Vectorized<float>& out1, Vec
|
||||
out1 = out1_values; \
|
||||
out2 = out2_values; \
|
||||
}
|
||||
LOAD_FP32_VECTORIZED_INIT(BFloat16, bf16);
|
||||
LOAD_FP32_VECTORIZED_INIT(Half, fp16);
|
||||
LOAD_FP32_VECTORIZED_INIT(BFloat16, bf16)
|
||||
LOAD_FP32_VECTORIZED_INIT(Half, fp16)
|
||||
|
||||
#else // defined(CPU_CAPABILITY_AVX512)
|
||||
#define LOAD_FP32_NON_VECTORIZED_INIT(type, name) \
|
||||
|
@ -11,4 +11,4 @@
|
||||
//
|
||||
// This hack can be removed once PyTorch is out-of-place HIPified, and
|
||||
// doesn't pretend CUDA is HIP.
|
||||
C10_REGISTER_GUARD_IMPL(CUDA, at::cuda::HIPGuardImplMasqueradingAsCUDA);
|
||||
C10_REGISTER_GUARD_IMPL(CUDA, at::cuda::HIPGuardImplMasqueradingAsCUDA)
|
||||
|
@ -55,7 +55,7 @@ class IMpsAllocatorCallback {
|
||||
// MPS allocator will execute every registered callback when a block of memory is freed.
|
||||
C10_DECLARE_REGISTRY(MPSAllocatorCallbacksRegistry, IMpsAllocatorCallback);
|
||||
#define REGISTER_MPS_ALLOCATOR_CALLBACK(name, ...) \
|
||||
C10_REGISTER_CLASS(MPSAllocatorCallbacksRegistry, name, __VA_ARGS__);
|
||||
C10_REGISTER_CLASS(MPSAllocatorCallbacksRegistry, name, __VA_ARGS__)
|
||||
|
||||
IMPSAllocator* getIMPSAllocator(bool sharedAllocator = false);
|
||||
|
||||
|
@ -176,6 +176,6 @@ struct OptionalMPSGuard {
|
||||
};
|
||||
|
||||
|
||||
C10_REGISTER_GUARD_IMPL(MPS, MPSGuardImpl);
|
||||
C10_REGISTER_GUARD_IMPL(MPS, MPSGuardImpl)
|
||||
|
||||
} // namespace at::mps
|
||||
|
@ -1135,108 +1135,108 @@ void unpack_pivots_cpu_kernel(TensorIterator& iter, const int64_t dim_size, cons
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_ARCH_DISPATCH(cholesky_stub, DEFAULT, &cholesky_kernel);
|
||||
REGISTER_AVX512_DISPATCH(cholesky_stub, &cholesky_kernel);
|
||||
REGISTER_AVX2_DISPATCH(cholesky_stub, &cholesky_kernel);
|
||||
REGISTER_VSX_DISPATCH(cholesky_stub, &cholesky_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(cholesky_stub, &cholesky_kernel);
|
||||
REGISTER_SVE256_DISPATCH(cholesky_stub, &cholesky_kernel);
|
||||
REGISTER_ARCH_DISPATCH(cholesky_stub, DEFAULT, &cholesky_kernel)
|
||||
REGISTER_AVX512_DISPATCH(cholesky_stub, &cholesky_kernel)
|
||||
REGISTER_AVX2_DISPATCH(cholesky_stub, &cholesky_kernel)
|
||||
REGISTER_VSX_DISPATCH(cholesky_stub, &cholesky_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(cholesky_stub, &cholesky_kernel)
|
||||
REGISTER_SVE256_DISPATCH(cholesky_stub, &cholesky_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(cholesky_inverse_stub, DEFAULT, &cholesky_inverse_kernel_impl);
|
||||
REGISTER_AVX512_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl);
|
||||
REGISTER_AVX2_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl);
|
||||
REGISTER_VSX_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl);
|
||||
REGISTER_ZVECTOR_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl);
|
||||
REGISTER_SVE256_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl);
|
||||
REGISTER_ARCH_DISPATCH(cholesky_inverse_stub, DEFAULT, &cholesky_inverse_kernel_impl)
|
||||
REGISTER_AVX512_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl)
|
||||
REGISTER_AVX2_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl)
|
||||
REGISTER_VSX_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl)
|
||||
REGISTER_ZVECTOR_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl)
|
||||
REGISTER_SVE256_DISPATCH(cholesky_inverse_stub, &cholesky_inverse_kernel_impl)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(linalg_eig_stub, DEFAULT, &linalg_eig_kernel);
|
||||
REGISTER_AVX512_DISPATCH(linalg_eig_stub, &linalg_eig_kernel);
|
||||
REGISTER_AVX2_DISPATCH(linalg_eig_stub, &linalg_eig_kernel);
|
||||
REGISTER_VSX_DISPATCH(linalg_eig_stub, &linalg_eig_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(linalg_eig_stub, &linalg_eig_kernel);
|
||||
REGISTER_SVE256_DISPATCH(linalg_eig_stub, &linalg_eig_kernel);
|
||||
REGISTER_ARCH_DISPATCH(linalg_eig_stub, DEFAULT, &linalg_eig_kernel)
|
||||
REGISTER_AVX512_DISPATCH(linalg_eig_stub, &linalg_eig_kernel)
|
||||
REGISTER_AVX2_DISPATCH(linalg_eig_stub, &linalg_eig_kernel)
|
||||
REGISTER_VSX_DISPATCH(linalg_eig_stub, &linalg_eig_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(linalg_eig_stub, &linalg_eig_kernel)
|
||||
REGISTER_SVE256_DISPATCH(linalg_eig_stub, &linalg_eig_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(linalg_eigh_stub, DEFAULT, &linalg_eigh_kernel);
|
||||
REGISTER_AVX512_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel);
|
||||
REGISTER_AVX2_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel);
|
||||
REGISTER_VSX_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel);
|
||||
REGISTER_SVE256_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel);
|
||||
REGISTER_ARCH_DISPATCH(linalg_eigh_stub, DEFAULT, &linalg_eigh_kernel)
|
||||
REGISTER_AVX512_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel)
|
||||
REGISTER_AVX2_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel)
|
||||
REGISTER_VSX_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel)
|
||||
REGISTER_SVE256_DISPATCH(linalg_eigh_stub, &linalg_eigh_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(geqrf_stub, DEFAULT, &geqrf_kernel);
|
||||
REGISTER_AVX512_DISPATCH(geqrf_stub, &geqrf_kernel);
|
||||
REGISTER_AVX2_DISPATCH(geqrf_stub, &geqrf_kernel);
|
||||
REGISTER_VSX_DISPATCH(geqrf_stub, &geqrf_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(geqrf_stub, &geqrf_kernel);
|
||||
REGISTER_SVE256_DISPATCH(geqrf_stub, &geqrf_kernel);
|
||||
REGISTER_ARCH_DISPATCH(geqrf_stub, DEFAULT, &geqrf_kernel)
|
||||
REGISTER_AVX512_DISPATCH(geqrf_stub, &geqrf_kernel)
|
||||
REGISTER_AVX2_DISPATCH(geqrf_stub, &geqrf_kernel)
|
||||
REGISTER_VSX_DISPATCH(geqrf_stub, &geqrf_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(geqrf_stub, &geqrf_kernel)
|
||||
REGISTER_SVE256_DISPATCH(geqrf_stub, &geqrf_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(orgqr_stub, DEFAULT, &orgqr_kernel_impl);
|
||||
REGISTER_AVX512_DISPATCH(orgqr_stub, &orgqr_kernel_impl);
|
||||
REGISTER_AVX2_DISPATCH(orgqr_stub, &orgqr_kernel_impl);
|
||||
REGISTER_VSX_DISPATCH(orgqr_stub, &orgqr_kernel_impl);
|
||||
REGISTER_ZVECTOR_DISPATCH(orgqr_stub, &orgqr_kernel_impl);
|
||||
REGISTER_SVE256_DISPATCH(orgqr_stub, &orgqr_kernel_impl);
|
||||
REGISTER_ARCH_DISPATCH(orgqr_stub, DEFAULT, &orgqr_kernel_impl)
|
||||
REGISTER_AVX512_DISPATCH(orgqr_stub, &orgqr_kernel_impl)
|
||||
REGISTER_AVX2_DISPATCH(orgqr_stub, &orgqr_kernel_impl)
|
||||
REGISTER_VSX_DISPATCH(orgqr_stub, &orgqr_kernel_impl)
|
||||
REGISTER_ZVECTOR_DISPATCH(orgqr_stub, &orgqr_kernel_impl)
|
||||
REGISTER_SVE256_DISPATCH(orgqr_stub, &orgqr_kernel_impl)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(ormqr_stub, DEFAULT, &ormqr_kernel);
|
||||
REGISTER_AVX512_DISPATCH(ormqr_stub, &ormqr_kernel);
|
||||
REGISTER_AVX2_DISPATCH(ormqr_stub, &ormqr_kernel);
|
||||
REGISTER_VSX_DISPATCH(ormqr_stub, &ormqr_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(ormqr_stub, &ormqr_kernel);
|
||||
REGISTER_SVE256_DISPATCH(ormqr_stub, &ormqr_kernel);
|
||||
REGISTER_ARCH_DISPATCH(ormqr_stub, DEFAULT, &ormqr_kernel)
|
||||
REGISTER_AVX512_DISPATCH(ormqr_stub, &ormqr_kernel)
|
||||
REGISTER_AVX2_DISPATCH(ormqr_stub, &ormqr_kernel)
|
||||
REGISTER_VSX_DISPATCH(ormqr_stub, &ormqr_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(ormqr_stub, &ormqr_kernel)
|
||||
REGISTER_SVE256_DISPATCH(ormqr_stub, &ormqr_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(lstsq_stub, DEFAULT, &lstsq_kernel);
|
||||
REGISTER_AVX512_DISPATCH(lstsq_stub, &lstsq_kernel);
|
||||
REGISTER_AVX2_DISPATCH(lstsq_stub, &lstsq_kernel);
|
||||
REGISTER_VSX_DISPATCH(lstsq_stub, &lstsq_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(lstsq_stub, &lstsq_kernel);
|
||||
REGISTER_SVE256_DISPATCH(lstsq_stub, &lstsq_kernel);
|
||||
REGISTER_ARCH_DISPATCH(lstsq_stub, DEFAULT, &lstsq_kernel)
|
||||
REGISTER_AVX512_DISPATCH(lstsq_stub, &lstsq_kernel)
|
||||
REGISTER_AVX2_DISPATCH(lstsq_stub, &lstsq_kernel)
|
||||
REGISTER_VSX_DISPATCH(lstsq_stub, &lstsq_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(lstsq_stub, &lstsq_kernel)
|
||||
REGISTER_SVE256_DISPATCH(lstsq_stub, &lstsq_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(triangular_solve_stub, DEFAULT, &triangular_solve_kernel);
|
||||
REGISTER_AVX512_DISPATCH(triangular_solve_stub, &triangular_solve_kernel);
|
||||
REGISTER_AVX2_DISPATCH(triangular_solve_stub, &triangular_solve_kernel);
|
||||
REGISTER_VSX_DISPATCH(triangular_solve_stub, &triangular_solve_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(triangular_solve_stub, &triangular_solve_kernel);
|
||||
REGISTER_SVE256_DISPATCH(triangular_solve_stub, &triangular_solve_kernel);
|
||||
REGISTER_ARCH_DISPATCH(triangular_solve_stub, DEFAULT, &triangular_solve_kernel)
|
||||
REGISTER_AVX512_DISPATCH(triangular_solve_stub, &triangular_solve_kernel)
|
||||
REGISTER_AVX2_DISPATCH(triangular_solve_stub, &triangular_solve_kernel)
|
||||
REGISTER_VSX_DISPATCH(triangular_solve_stub, &triangular_solve_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(triangular_solve_stub, &triangular_solve_kernel)
|
||||
REGISTER_SVE256_DISPATCH(triangular_solve_stub, &triangular_solve_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(lu_factor_stub, DEFAULT, &lu_factor_kernel);
|
||||
REGISTER_AVX512_DISPATCH(lu_factor_stub, &lu_factor_kernel);
|
||||
REGISTER_AVX2_DISPATCH(lu_factor_stub, &lu_factor_kernel);
|
||||
REGISTER_VSX_DISPATCH(lu_factor_stub, &lu_factor_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(lu_factor_stub, &lu_factor_kernel);
|
||||
REGISTER_SVE256_DISPATCH(lu_factor_stub, &lu_factor_kernel);
|
||||
REGISTER_ARCH_DISPATCH(lu_factor_stub, DEFAULT, &lu_factor_kernel)
|
||||
REGISTER_AVX512_DISPATCH(lu_factor_stub, &lu_factor_kernel)
|
||||
REGISTER_AVX2_DISPATCH(lu_factor_stub, &lu_factor_kernel)
|
||||
REGISTER_VSX_DISPATCH(lu_factor_stub, &lu_factor_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(lu_factor_stub, &lu_factor_kernel)
|
||||
REGISTER_SVE256_DISPATCH(lu_factor_stub, &lu_factor_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(ldl_factor_stub, DEFAULT, &ldl_factor_kernel);
|
||||
REGISTER_AVX512_DISPATCH(ldl_factor_stub, &ldl_factor_kernel);
|
||||
REGISTER_AVX2_DISPATCH(ldl_factor_stub, &ldl_factor_kernel);
|
||||
REGISTER_VSX_DISPATCH(ldl_factor_stub, &ldl_factor_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(ldl_factor_stub, &ldl_factor_kernel);
|
||||
REGISTER_SVE256_DISPATCH(ldl_factor_stub, &ldl_factor_kernel);
|
||||
REGISTER_ARCH_DISPATCH(ldl_factor_stub, DEFAULT, &ldl_factor_kernel)
|
||||
REGISTER_AVX512_DISPATCH(ldl_factor_stub, &ldl_factor_kernel)
|
||||
REGISTER_AVX2_DISPATCH(ldl_factor_stub, &ldl_factor_kernel)
|
||||
REGISTER_VSX_DISPATCH(ldl_factor_stub, &ldl_factor_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(ldl_factor_stub, &ldl_factor_kernel)
|
||||
REGISTER_SVE256_DISPATCH(ldl_factor_stub, &ldl_factor_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(ldl_solve_stub, DEFAULT, &ldl_solve_kernel);
|
||||
REGISTER_AVX512_DISPATCH(ldl_solve_stub, &ldl_solve_kernel);
|
||||
REGISTER_AVX2_DISPATCH(ldl_solve_stub, &ldl_solve_kernel);
|
||||
REGISTER_VSX_DISPATCH(ldl_solve_stub, &ldl_solve_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(ldl_solve_stub, &ldl_solve_kernel);
|
||||
REGISTER_SVE256_DISPATCH(ldl_solve_stub, &ldl_solve_kernel);
|
||||
REGISTER_ARCH_DISPATCH(ldl_solve_stub, DEFAULT, &ldl_solve_kernel)
|
||||
REGISTER_AVX512_DISPATCH(ldl_solve_stub, &ldl_solve_kernel)
|
||||
REGISTER_AVX2_DISPATCH(ldl_solve_stub, &ldl_solve_kernel)
|
||||
REGISTER_VSX_DISPATCH(ldl_solve_stub, &ldl_solve_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(ldl_solve_stub, &ldl_solve_kernel)
|
||||
REGISTER_SVE256_DISPATCH(ldl_solve_stub, &ldl_solve_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(lu_solve_stub, DEFAULT, &lu_solve_kernel);
|
||||
REGISTER_AVX512_DISPATCH(lu_solve_stub, &lu_solve_kernel);
|
||||
REGISTER_AVX2_DISPATCH(lu_solve_stub, &lu_solve_kernel);
|
||||
REGISTER_VSX_DISPATCH(lu_solve_stub, &lu_solve_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(lu_solve_stub, &lu_solve_kernel);
|
||||
REGISTER_SVE256_DISPATCH(lu_solve_stub, &lu_solve_kernel);
|
||||
REGISTER_ARCH_DISPATCH(lu_solve_stub, DEFAULT, &lu_solve_kernel)
|
||||
REGISTER_AVX512_DISPATCH(lu_solve_stub, &lu_solve_kernel)
|
||||
REGISTER_AVX2_DISPATCH(lu_solve_stub, &lu_solve_kernel)
|
||||
REGISTER_VSX_DISPATCH(lu_solve_stub, &lu_solve_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(lu_solve_stub, &lu_solve_kernel)
|
||||
REGISTER_SVE256_DISPATCH(lu_solve_stub, &lu_solve_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(svd_stub, DEFAULT, &svd_kernel);
|
||||
REGISTER_AVX512_DISPATCH(svd_stub, &svd_kernel);
|
||||
REGISTER_AVX2_DISPATCH(svd_stub, &svd_kernel);
|
||||
REGISTER_VSX_DISPATCH(svd_stub, &svd_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(svd_stub, &svd_kernel);
|
||||
REGISTER_SVE256_DISPATCH(svd_stub, &svd_kernel);
|
||||
REGISTER_ARCH_DISPATCH(svd_stub, DEFAULT, &svd_kernel)
|
||||
REGISTER_AVX512_DISPATCH(svd_stub, &svd_kernel)
|
||||
REGISTER_AVX2_DISPATCH(svd_stub, &svd_kernel)
|
||||
REGISTER_VSX_DISPATCH(svd_stub, &svd_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(svd_stub, &svd_kernel)
|
||||
REGISTER_SVE256_DISPATCH(svd_stub, &svd_kernel)
|
||||
|
||||
REGISTER_ARCH_DISPATCH(unpack_pivots_stub, DEFAULT, &unpack_pivots_cpu_kernel);
|
||||
REGISTER_AVX512_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel);
|
||||
REGISTER_AVX2_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel);
|
||||
REGISTER_VSX_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel);
|
||||
REGISTER_ZVECTOR_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel);
|
||||
REGISTER_SVE256_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel);
|
||||
REGISTER_ARCH_DISPATCH(unpack_pivots_stub, DEFAULT, &unpack_pivots_cpu_kernel)
|
||||
REGISTER_AVX512_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel)
|
||||
REGISTER_AVX2_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel)
|
||||
REGISTER_VSX_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel)
|
||||
REGISTER_ZVECTOR_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel)
|
||||
REGISTER_SVE256_DISPATCH(unpack_pivots_stub, &unpack_pivots_cpu_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -32,7 +32,7 @@
|
||||
#endif // USE_FBGEMM
|
||||
|
||||
namespace caffe2 {
|
||||
CAFFE_KNOWN_TYPE(c10::intrusive_ptr<LinearPackedParamsBase>);
|
||||
CAFFE_KNOWN_TYPE(c10::intrusive_ptr<LinearPackedParamsBase>)
|
||||
} // namespace caffe2
|
||||
|
||||
#ifdef USE_FBGEMM
|
||||
|
@ -1412,22 +1412,22 @@ REGISTER_DISPATCH(hardshrink_stub, &hardshrink_kernel)
|
||||
REGISTER_DISPATCH(softshrink_stub, &softshrink_kernel)
|
||||
REGISTER_DISPATCH(shrink_backward_stub, &shrink_backward_kernel)
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_sigmoid_cpu_stub, &log_sigmoid_cpu_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_sigmoid_backward_stub, &log_sigmoid_backward_cpu_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(glu_stub, &glu_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(glu_backward_stub, &glu_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(glu_jvp_stub, &glu_jvp_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(elu_stub, &elu_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(elu_backward_stub, &elu_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(GeluKernel, &GeluKernelImpl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(GeluBackwardKernel, &GeluBackwardKernelImpl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(hardswish_stub, &hardswish_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(hardswish_backward_stub, &hardswish_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softplus_stub, &softplus_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softplus_backward_stub, &softplus_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(silu_stub, &silu_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(silu_backward_stub, &silu_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(mish_stub, &mish_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(mish_backward_stub, &mish_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_sigmoid_cpu_stub, &log_sigmoid_cpu_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_sigmoid_backward_stub, &log_sigmoid_backward_cpu_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(glu_stub, &glu_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(glu_backward_stub, &glu_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(glu_jvp_stub, &glu_jvp_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(elu_stub, &elu_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(elu_backward_stub, &elu_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(GeluKernel, &GeluKernelImpl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(GeluBackwardKernel, &GeluBackwardKernelImpl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(hardswish_stub, &hardswish_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(hardswish_backward_stub, &hardswish_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softplus_stub, &softplus_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softplus_backward_stub, &softplus_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(silu_stub, &silu_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(silu_backward_stub, &silu_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(mish_stub, &mish_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(mish_backward_stub, &mish_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -1430,17 +1430,17 @@ REGISTER_DISPATCH(chebyshev_polynomial_u_stub, &chebyshev_polynomial_u_kernel)
|
||||
REGISTER_DISPATCH(hermite_polynomial_h_stub, &hermite_polynomial_h_kernel)
|
||||
REGISTER_DISPATCH(hermite_polynomial_he_stub, &hermite_polynomial_he_kernel)
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(atan2_stub, &atan2_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(smooth_l1_stub, &smooth_l1_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(huber_stub, &huber_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sigmoid_backward_stub, &sigmoid_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logit_backward_stub, &logit_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(tanh_backward_stub, &tanh_backward_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(mse_stub, &mse_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logaddexp_stub, &logaddexp_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logaddexp2_stub, &logaddexp2_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(hypot_stub, &hypot_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(igamma_stub, &igamma_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(igammac_stub, &igammac_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(atan2_stub, &atan2_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(smooth_l1_stub, &smooth_l1_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(huber_stub, &huber_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sigmoid_backward_stub, &sigmoid_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logit_backward_stub, &logit_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(tanh_backward_stub, &tanh_backward_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(mse_stub, &mse_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logaddexp_stub, &logaddexp_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logaddexp2_stub, &logaddexp2_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(hypot_stub, &hypot_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(igamma_stub, &igamma_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(igammac_stub, &igammac_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -26,6 +26,6 @@ void polar_kernel(TensorIterator& iter) {
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(complex_stub, &complex_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(polar_stub, &polar_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(polar_stub, &polar_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -520,6 +520,6 @@ Tensor _convolution_depthwise3x3_winograd(
|
||||
|
||||
} // namespace
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(convolution_depthwise3x3_winograd_stub, &_convolution_depthwise3x3_winograd);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(convolution_depthwise3x3_winograd_stub, &_convolution_depthwise3x3_winograd)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -1265,7 +1265,7 @@ void flash_attention_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(flash_attention_kernel, &flash_attention_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(flash_attention_backward_kernel, &flash_attention_backward_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(flash_attention_kernel, &flash_attention_kernel_impl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(flash_attention_backward_kernel, &flash_attention_backward_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -144,7 +144,7 @@ static void pow_tensor_scalar_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(pow_tensor_tensor_stub, &CPU_CAPABILITY::pow_tensor_tensor_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(pow_tensor_scalar_stub, &CPU_CAPABILITY::pow_tensor_scalar_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(pow_tensor_tensor_stub, &CPU_CAPABILITY::pow_tensor_tensor_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(pow_tensor_scalar_stub, &CPU_CAPABILITY::pow_tensor_scalar_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -1291,8 +1291,8 @@ static void log_softmax_backward_kernel_impl(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softmax_lastdim_kernel, &softmax_lastdim_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_softmax_lastdim_kernel, &log_softmax_lastdim_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softmax_lastdim_kernel, &softmax_lastdim_kernel_impl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_softmax_lastdim_kernel, &log_softmax_lastdim_kernel_impl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(
|
||||
softmax_backward_lastdim_kernel,
|
||||
&softmax_backward_lastdim_kernel_impl);
|
||||
@ -1300,9 +1300,9 @@ ALSO_REGISTER_AVX512_DISPATCH(
|
||||
log_softmax_backward_lastdim_kernel,
|
||||
&log_softmax_backward_lastdim_kernel_impl);
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softmax_kernel, &softmax_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_softmax_kernel, &log_softmax_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softmax_backward_kernel, &softmax_backward_kernel_impl);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softmax_kernel, &softmax_kernel_impl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(log_softmax_kernel, &log_softmax_kernel_impl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(softmax_backward_kernel, &softmax_backward_kernel_impl)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(
|
||||
log_softmax_backward_kernel,
|
||||
&log_softmax_backward_kernel_impl);
|
||||
|
@ -842,15 +842,15 @@ STATIC_IMPLEMENT_COMPLEX_KERNEL_WITHOUT_AVX512(tan);
|
||||
|
||||
// The following kernels are compute-intensive & are compiled with both AVX512
|
||||
// & AVX2
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sign_stub, &CPU_CAPABILITY::sign_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sgn_stub, &CPU_CAPABILITY::sgn_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(reciprocal_stub, &CPU_CAPABILITY::reciprocal_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(exp2_stub, &CPU_CAPABILITY::exp2_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sigmoid_stub, &CPU_CAPABILITY::sigmoid_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logit_stub, &CPU_CAPABILITY::logit_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sinh_stub, &CPU_CAPABILITY::sinh_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(cosh_stub, &CPU_CAPABILITY::cosh_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(atanh_stub, &CPU_CAPABILITY::atanh_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sign_stub, &CPU_CAPABILITY::sign_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sgn_stub, &CPU_CAPABILITY::sgn_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(reciprocal_stub, &CPU_CAPABILITY::reciprocal_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(exp2_stub, &CPU_CAPABILITY::exp2_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sigmoid_stub, &CPU_CAPABILITY::sigmoid_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(logit_stub, &CPU_CAPABILITY::logit_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(sinh_stub, &CPU_CAPABILITY::sinh_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(cosh_stub, &CPU_CAPABILITY::cosh_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(atanh_stub, &CPU_CAPABILITY::atanh_kernel)
|
||||
|
||||
// Might enable AVX512 dispatch after enabling explicit vectorization for them
|
||||
REGISTER_DISPATCH(acosh_stub, &CPU_CAPABILITY::acosh_kernel)
|
||||
|
@ -775,8 +775,8 @@ void int4pack_mm_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(weight_to_int4pack_stub, &weight_to_int4pack_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(int4pack_mm_stub, &int4pack_mm_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(weight_to_int4pack_stub, &weight_to_int4pack_kernel)
|
||||
ALSO_REGISTER_AVX512_DISPATCH(int4pack_mm_stub, &int4pack_mm_kernel)
|
||||
|
||||
} // at::native
|
||||
C10_DIAGNOSTIC_POP()
|
||||
|
@ -433,6 +433,6 @@ void int8pack_mm_kernel(
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
ALSO_REGISTER_AVX512_DISPATCH(int8pack_mm_stub, &int8pack_mm_kernel);
|
||||
ALSO_REGISTER_AVX512_DISPATCH(int8pack_mm_stub, &int8pack_mm_kernel)
|
||||
|
||||
} // at::native
|
||||
|
@ -46,6 +46,6 @@ void abs_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(abs_stub, &abs_kernel_cuda);
|
||||
REGISTER_DISPATCH(abs_stub, &abs_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -80,7 +80,7 @@ void elu_backward_kernel(
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(elu_stub, &elu_kernel);
|
||||
REGISTER_DISPATCH(elu_backward_stub, &elu_backward_kernel);
|
||||
REGISTER_DISPATCH(elu_stub, &elu_kernel)
|
||||
REGISTER_DISPATCH(elu_backward_stub, &elu_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -135,7 +135,7 @@ void launch_glu_backward_kernel(
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(glu_stub, &glu_kernel);
|
||||
REGISTER_DISPATCH(glu_jvp_stub, &glu_jvp_kernel);
|
||||
REGISTER_DISPATCH(glu_stub, &glu_kernel)
|
||||
REGISTER_DISPATCH(glu_jvp_stub, &glu_jvp_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -34,6 +34,6 @@ void hardshrink_kernel(TensorIteratorBase& iter, const Scalar& value) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(hardshrink_stub, &hardshrink_kernel);
|
||||
REGISTER_DISPATCH(hardshrink_stub, &hardshrink_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -68,7 +68,7 @@ void hardsigmoid_backward_kernel(TensorIteratorBase& iter) {
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(hardsigmoid_stub, &hardsigmoid_kernel);
|
||||
REGISTER_DISPATCH(hardsigmoid_backward_stub, &hardsigmoid_backward_kernel);
|
||||
REGISTER_DISPATCH(hardsigmoid_stub, &hardsigmoid_kernel)
|
||||
REGISTER_DISPATCH(hardsigmoid_backward_stub, &hardsigmoid_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -57,7 +57,7 @@ void hardswish_backward_kernel(TensorIterator& iter) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(hardswish_stub, &hardswish_kernel);
|
||||
REGISTER_DISPATCH(hardswish_backward_stub, &hardswish_backward_kernel);
|
||||
REGISTER_DISPATCH(hardswish_stub, &hardswish_kernel)
|
||||
REGISTER_DISPATCH(hardswish_backward_stub, &hardswish_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -40,6 +40,6 @@ void hardtanh_backward_kernel(
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(hardtanh_backward_stub, &hardtanh_backward_kernel);
|
||||
REGISTER_DISPATCH(hardtanh_backward_stub, &hardtanh_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -56,7 +56,7 @@ void leaky_relu_backward_kernel(
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(leaky_relu_stub, &leaky_relu_kernel);
|
||||
REGISTER_DISPATCH(leaky_relu_backward_stub, &leaky_relu_backward_kernel);
|
||||
REGISTER_DISPATCH(leaky_relu_stub, &leaky_relu_kernel)
|
||||
REGISTER_DISPATCH(leaky_relu_backward_stub, &leaky_relu_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -59,6 +59,6 @@ void log_sigmoid_backward_kernel(TensorIterator& iter) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(log_sigmoid_backward_stub, &log_sigmoid_backward_kernel);
|
||||
REGISTER_DISPATCH(log_sigmoid_backward_stub, &log_sigmoid_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -58,7 +58,7 @@ void mish_backward_kernel(TensorIterator& iter) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(mish_stub, &mish_kernel);
|
||||
REGISTER_DISPATCH(mish_backward_stub, &mish_backward_kernel);
|
||||
REGISTER_DISPATCH(mish_stub, &mish_kernel)
|
||||
REGISTER_DISPATCH(mish_backward_stub, &mish_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -42,7 +42,7 @@ void prelu_backward_kernel(TensorIterator &iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(prelu_stub, &prelu_kernel);
|
||||
REGISTER_DISPATCH(prelu_backward_stub, &prelu_backward_kernel);
|
||||
REGISTER_DISPATCH(prelu_stub, &prelu_kernel)
|
||||
REGISTER_DISPATCH(prelu_backward_stub, &prelu_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -54,7 +54,7 @@ void silu_backward_kernel(TensorIteratorBase& iter) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(silu_stub, &silu_kernel);
|
||||
REGISTER_DISPATCH(silu_backward_stub, &silu_backward_kernel);
|
||||
REGISTER_DISPATCH(silu_stub, &silu_kernel)
|
||||
REGISTER_DISPATCH(silu_backward_stub, &silu_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -68,7 +68,7 @@ void softplus_backward_kernel(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(softplus_stub, &softplus_kernel);
|
||||
REGISTER_DISPATCH(softplus_backward_stub, &softplus_backward_kernel);
|
||||
REGISTER_DISPATCH(softplus_stub, &softplus_kernel)
|
||||
REGISTER_DISPATCH(softplus_backward_stub, &softplus_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -52,7 +52,7 @@ void shrink_backward_kernel(TensorIteratorBase& iter, const Scalar& value) {
|
||||
}
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(softshrink_stub, &softshrink_kernel);
|
||||
REGISTER_DISPATCH(shrink_backward_stub, &shrink_backward_kernel);
|
||||
REGISTER_DISPATCH(softshrink_stub, &softshrink_kernel)
|
||||
REGISTER_DISPATCH(shrink_backward_stub, &shrink_backward_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -47,6 +47,6 @@ static void threshold_kernel_cuda(
|
||||
|
||||
} // namespace
|
||||
|
||||
REGISTER_DISPATCH(threshold_stub, &threshold_kernel_cuda);
|
||||
REGISTER_DISPATCH(threshold_stub, &threshold_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -73,9 +73,9 @@ void bitwise_xor_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(bitwise_and_stub, &bitwise_and_kernel_cuda);
|
||||
REGISTER_DISPATCH(bitwise_or_stub, &bitwise_or_kernel_cuda);
|
||||
REGISTER_DISPATCH(bitwise_xor_stub, &bitwise_xor_kernel_cuda);
|
||||
REGISTER_DISPATCH(bitwise_and_stub, &bitwise_and_kernel_cuda)
|
||||
REGISTER_DISPATCH(bitwise_or_stub, &bitwise_or_kernel_cuda)
|
||||
REGISTER_DISPATCH(bitwise_xor_stub, &bitwise_xor_kernel_cuda)
|
||||
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -78,6 +78,6 @@ void div_floor_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
} // namespace binary_internal
|
||||
|
||||
REGISTER_DISPATCH(div_floor_stub, &binary_internal::div_floor_kernel_cuda);
|
||||
REGISTER_DISPATCH(div_floor_stub, &binary_internal::div_floor_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -56,6 +56,6 @@ void div_true_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
} // namespace binary_internal
|
||||
|
||||
REGISTER_DISPATCH(div_true_stub, &binary_internal::div_true_kernel_cuda);
|
||||
REGISTER_DISPATCH(div_true_stub, &binary_internal::div_true_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -48,6 +48,6 @@ void div_trunc_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
} // namespace binary_internal
|
||||
|
||||
REGISTER_DISPATCH(div_trunc_stub, &binary_internal::div_trunc_kernel_cuda);
|
||||
REGISTER_DISPATCH(div_trunc_stub, &binary_internal::div_trunc_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -33,7 +33,7 @@ void hypot_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(atan2_stub, &atan2_kernel_cuda);
|
||||
REGISTER_DISPATCH(hypot_stub, &hypot_kernel_cuda);
|
||||
REGISTER_DISPATCH(atan2_stub, &atan2_kernel_cuda)
|
||||
REGISTER_DISPATCH(hypot_stub, &hypot_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -120,9 +120,9 @@ void logical_xor_kernel_cuda(TensorIterator& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(logical_and_stub, &logical_and_kernel_cuda);
|
||||
REGISTER_DISPATCH(logical_or_stub, &logical_or_kernel_cuda);
|
||||
REGISTER_DISPATCH(logical_xor_stub, &logical_xor_kernel_cuda);
|
||||
REGISTER_DISPATCH(logical_and_stub, &logical_and_kernel_cuda)
|
||||
REGISTER_DISPATCH(logical_or_stub, &logical_or_kernel_cuda)
|
||||
REGISTER_DISPATCH(logical_xor_stub, &logical_xor_kernel_cuda)
|
||||
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -124,8 +124,8 @@ void tanh_backward_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(sigmoid_backward_stub, &sigmoid_backward_kernel_cuda);
|
||||
REGISTER_DISPATCH(logit_backward_stub, &logit_backward_kernel_cuda);
|
||||
REGISTER_DISPATCH(tanh_backward_stub, &tanh_backward_kernel_cuda);
|
||||
REGISTER_DISPATCH(sigmoid_backward_stub, &sigmoid_backward_kernel_cuda)
|
||||
REGISTER_DISPATCH(logit_backward_stub, &logit_backward_kernel_cuda)
|
||||
REGISTER_DISPATCH(tanh_backward_stub, &tanh_backward_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -69,11 +69,11 @@ void xlog1py_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(smooth_l1_stub, &smooth_l1_kernel_cuda);
|
||||
REGISTER_DISPATCH(huber_stub, &huber_kernel_cuda);
|
||||
REGISTER_DISPATCH(mse_stub, &mse_kernel_cuda);
|
||||
REGISTER_DISPATCH(xlogy_stub, &xlogy_kernel_cuda);
|
||||
REGISTER_DISPATCH(xlog1py_stub, &xlog1py_kernel_cuda);
|
||||
REGISTER_DISPATCH(smooth_l1_stub, &smooth_l1_kernel_cuda)
|
||||
REGISTER_DISPATCH(huber_stub, &huber_kernel_cuda)
|
||||
REGISTER_DISPATCH(mse_stub, &mse_kernel_cuda)
|
||||
REGISTER_DISPATCH(xlogy_stub, &xlogy_kernel_cuda)
|
||||
REGISTER_DISPATCH(xlog1py_stub, &xlog1py_kernel_cuda)
|
||||
|
||||
// DO NOT ADD ANY NEW KERNELS HERE
|
||||
// CUDA compilation times grow quickly. It's perfectly acceptable to have a file per kernel.
|
||||
|
@ -43,6 +43,6 @@ void mul_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(mul_stub, &mul_kernel_cuda);
|
||||
REGISTER_DISPATCH(mul_stub, &mul_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -55,7 +55,7 @@ void fmod_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(remainder_stub, &remainder_kernel_cuda);
|
||||
REGISTER_DISPATCH(fmod_stub, &fmod_kernel_cuda);
|
||||
REGISTER_DISPATCH(remainder_stub, &remainder_kernel_cuda)
|
||||
REGISTER_DISPATCH(fmod_stub, &fmod_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -38,7 +38,7 @@ void rshift_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(lshift_stub, &lshift_kernel_cuda);
|
||||
REGISTER_DISPATCH(rshift_stub, &rshift_kernel_cuda);
|
||||
REGISTER_DISPATCH(lshift_stub, &lshift_kernel_cuda)
|
||||
REGISTER_DISPATCH(rshift_stub, &rshift_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -44,7 +44,7 @@ void ne_kernel_cuda(TensorIteratorBase& iter) {
|
||||
compare_eq_ne_kernel(iter, EqOpType::NE);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(eq_stub, &eq_kernel_cuda);
|
||||
REGISTER_DISPATCH(ne_stub, &ne_kernel_cuda);
|
||||
REGISTER_DISPATCH(eq_stub, &eq_kernel_cuda)
|
||||
REGISTER_DISPATCH(ne_stub, &ne_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -95,9 +95,9 @@ void lt_kernel_cuda(TensorIteratorBase& iter) {
|
||||
compare_kernel_with_scalars(iter, OpType::LT);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(ge_stub, &ge_kernel_cuda);
|
||||
REGISTER_DISPATCH(gt_stub, >_kernel_cuda);
|
||||
REGISTER_DISPATCH(le_stub, &le_kernel_cuda);
|
||||
REGISTER_DISPATCH(lt_stub, <_kernel_cuda);
|
||||
REGISTER_DISPATCH(ge_stub, &ge_kernel_cuda)
|
||||
REGISTER_DISPATCH(gt_stub, >_kernel_cuda)
|
||||
REGISTER_DISPATCH(le_stub, &le_kernel_cuda)
|
||||
REGISTER_DISPATCH(lt_stub, <_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -30,7 +30,7 @@ void polar_kernel_cuda(TensorIterator& iter) {
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(complex_stub, &complex_kernel_cuda);
|
||||
REGISTER_DISPATCH(polar_stub, &polar_kernel_cuda);
|
||||
REGISTER_DISPATCH(complex_stub, &complex_kernel_cuda)
|
||||
REGISTER_DISPATCH(polar_stub, &polar_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -410,6 +410,6 @@ static void copy_kernel_cuda(TensorIterator& iter, bool non_blocking) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(copy_stub, ©_kernel_cuda);
|
||||
REGISTER_DISPATCH(copy_stub, ©_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -28,6 +28,6 @@ void copysign_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(copysign_stub, ©sign_kernel_cuda);
|
||||
REGISTER_DISPATCH(copysign_stub, ©sign_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -87,6 +87,6 @@ void cross_impl(const Tensor& result, const Tensor& x1, const Tensor& x2, int64_
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(cross_stub, &cross_impl);
|
||||
REGISTER_DISPATCH(cross_stub, &cross_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -357,9 +357,9 @@ void cdist_backward_kernel_impl(Tensor& result, const Tensor& grad, const Tensor
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(pdist_forward_stub, &pdist_forward_kernel_impl);
|
||||
REGISTER_DISPATCH(pdist_backward_stub, &pdist_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(cdist_stub, &cdist_kernel_impl);
|
||||
REGISTER_DISPATCH(cdist_backward_stub, &cdist_backward_kernel_impl);
|
||||
REGISTER_DISPATCH(pdist_forward_stub, &pdist_forward_kernel_impl)
|
||||
REGISTER_DISPATCH(pdist_backward_stub, &pdist_backward_kernel_impl)
|
||||
REGISTER_DISPATCH(cdist_stub, &cdist_kernel_impl)
|
||||
REGISTER_DISPATCH(cdist_backward_stub, &cdist_backward_kernel_impl)
|
||||
|
||||
} // at::native
|
||||
|
@ -34,7 +34,7 @@ void bernoulli_scalar_kernel(const TensorBase &self, double p, std::optional<Gen
|
||||
at::native::templates::cuda::bernoulli_kernel(iter, p, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(bernoulli_tensor_stub, &bernoulli_tensor_kernel);
|
||||
REGISTER_DISPATCH(bernoulli_scalar_stub, &bernoulli_scalar_kernel);
|
||||
REGISTER_DISPATCH(bernoulli_tensor_stub, &bernoulli_tensor_kernel)
|
||||
REGISTER_DISPATCH(bernoulli_scalar_stub, &bernoulli_scalar_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -10,6 +10,6 @@ void cauchy_kernel(TensorIteratorBase& iter, double median, double sigma, std::o
|
||||
at::native::templates::cuda::cauchy_kernel(iter, median, sigma, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(cauchy_stub, &cauchy_kernel);
|
||||
REGISTER_DISPATCH(cauchy_stub, &cauchy_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -10,6 +10,6 @@ void exponential_kernel(TensorIteratorBase& iter, double lambda, std::optional<G
|
||||
at::native::templates::cuda::exponential_kernel(iter, lambda, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(exponential_stub, &exponential_kernel);
|
||||
REGISTER_DISPATCH(exponential_stub, &exponential_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -10,6 +10,6 @@ void geometric_kernel(TensorIteratorBase& iter, double p_, std::optional<Generat
|
||||
at::native::templates::cuda::geometric_kernel(iter, p_, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(geometric_stub, &geometric_kernel);
|
||||
REGISTER_DISPATCH(geometric_stub, &geometric_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -10,6 +10,6 @@ void log_normal_kernel(TensorIteratorBase& iter, double mean, double std, std::o
|
||||
at::native::templates::cuda::log_normal_kernel(iter, mean, std, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(log_normal_stub, &log_normal_kernel);
|
||||
REGISTER_DISPATCH(log_normal_stub, &log_normal_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -10,6 +10,6 @@ void normal_kernel(const TensorBase &self, double mean, double std, std::optiona
|
||||
at::native::templates::cuda::normal_kernel(self, mean, std, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(normal_stub, &normal_kernel);
|
||||
REGISTER_DISPATCH(normal_stub, &normal_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -20,8 +20,8 @@ void random_kernel(TensorIteratorBase& iter, std::optional<Generator> gen_) {
|
||||
at::native::templates::cuda::random_kernel(iter, gen);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(random_from_to_stub, &random_from_to_kernel);
|
||||
REGISTER_DISPATCH(random_stub, &random_kernel);
|
||||
REGISTER_DISPATCH(random_full_64_bits_range_stub, &random_full_64_bits_range_kernel);
|
||||
REGISTER_DISPATCH(random_from_to_stub, &random_from_to_kernel)
|
||||
REGISTER_DISPATCH(random_stub, &random_kernel)
|
||||
REGISTER_DISPATCH(random_full_64_bits_range_stub, &random_full_64_bits_range_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -10,6 +10,6 @@ void uniform_kernel(TensorIteratorBase& iter, double from, double to, std::optio
|
||||
templates::cuda::uniform_kernel(iter, from, to, generator);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(uniform_stub, &uniform_kernel);
|
||||
REGISTER_DISPATCH(uniform_stub, &uniform_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -25,6 +25,6 @@ void fill_kernel_cuda(TensorIterator& iter, const Scalar& value) {
|
||||
}), AT_EXPAND(AT_ALL_TYPES_AND_COMPLEX), kComplexHalf, kBool, kHalf, kBFloat16, AT_EXPAND(AT_FLOAT8_TYPES), AT_EXPAND(AT_BAREBONES_UNSIGNED_TYPES));
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fill_stub, &fill_kernel_cuda);
|
||||
REGISTER_DISPATCH(fill_stub, &fill_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -109,6 +109,6 @@ void _compute_linear_combination_cuda_kernel(
|
||||
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(_compute_linear_combination_stub, &_compute_linear_combination_cuda_kernel);
|
||||
REGISTER_DISPATCH(_compute_linear_combination_stub, &_compute_linear_combination_cuda_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -52,7 +52,7 @@ void lcm_kernel_cuda(TensorIteratorBase& iter) {
|
||||
#endif // AT_USE_JITERATOR()
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(gcd_stub, &gcd_kernel_cuda);
|
||||
REGISTER_DISPATCH(lcm_stub, &lcm_kernel_cuda);
|
||||
REGISTER_DISPATCH(gcd_stub, &gcd_kernel_cuda)
|
||||
REGISTER_DISPATCH(lcm_stub, &lcm_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -545,8 +545,8 @@ void igammac_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(igamma_stub, &igamma_kernel_cuda);
|
||||
REGISTER_DISPATCH(igammac_stub, &igammac_kernel_cuda);
|
||||
REGISTER_DISPATCH(igamma_stub, &igamma_kernel_cuda)
|
||||
REGISTER_DISPATCH(igammac_stub, &igammac_kernel_cuda)
|
||||
|
||||
// DO NOT ADD ANY NEW KERNELS HERE
|
||||
// CUDA compilation times grow quickly. It's perfectly acceptable to have a file per kernel.
|
||||
|
@ -475,13 +475,13 @@ void flip_kernel(TensorIterator& iter, const bool quantized) {
|
||||
}
|
||||
|
||||
|
||||
REGISTER_DISPATCH(index_stub, &index_kernel);
|
||||
REGISTER_DISPATCH(index_fill_stub, &index_fill_kernel);
|
||||
REGISTER_DISPATCH(index_copy_stub, &index_copy_kernel);
|
||||
REGISTER_DISPATCH(index_put_stub, &index_put_kernel);
|
||||
REGISTER_DISPATCH(put_stub, &put_kernel);
|
||||
REGISTER_DISPATCH(take_stub, &take_kernel);
|
||||
REGISTER_DISPATCH(flip_stub, &flip_kernel);
|
||||
REGISTER_DISPATCH(index_stub, &index_kernel)
|
||||
REGISTER_DISPATCH(index_fill_stub, &index_fill_kernel)
|
||||
REGISTER_DISPATCH(index_copy_stub, &index_copy_kernel)
|
||||
REGISTER_DISPATCH(index_put_stub, &index_put_kernel)
|
||||
REGISTER_DISPATCH(put_stub, &put_kernel)
|
||||
REGISTER_DISPATCH(take_stub, &take_kernel)
|
||||
REGISTER_DISPATCH(flip_stub, &flip_kernel)
|
||||
|
||||
REGISTER_CUDA_DISPATCH(index_put_kernel_quantized_stub, &index_put_kernel_quantized_cuda);
|
||||
|
||||
|
@ -121,7 +121,7 @@ void lerp_scalar_kernel(at::TensorIteratorBase& iter, const c10::Scalar& weight)
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(lerp_kernel_tensor_weight, &lerp_tensor_kernel);
|
||||
REGISTER_DISPATCH(lerp_kernel_scalar_weight, &lerp_scalar_kernel);
|
||||
REGISTER_DISPATCH(lerp_kernel_tensor_weight, &lerp_tensor_kernel)
|
||||
REGISTER_DISPATCH(lerp_kernel_scalar_weight, &lerp_scalar_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -139,6 +139,6 @@ void unpack_pivots_cuda_kernel(TensorIterator& iter, const int64_t dim_size, con
|
||||
}
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(unpack_pivots_stub, &unpack_pivots_cuda_kernel);
|
||||
REGISTER_DISPATCH(addr_stub, &addr_kernel_cuda);
|
||||
REGISTER_DISPATCH(unpack_pivots_stub, &unpack_pivots_cuda_kernel)
|
||||
REGISTER_DISPATCH(addr_stub, &addr_kernel_cuda)
|
||||
} // namespace at::native
|
||||
|
@ -51,7 +51,7 @@ void logaddexp2_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(logaddexp_stub, &logaddexp_kernel_cuda);
|
||||
REGISTER_DISPATCH(logaddexp2_stub, &logaddexp2_kernel_cuda);
|
||||
REGISTER_DISPATCH(logaddexp_stub, &logaddexp_kernel_cuda)
|
||||
REGISTER_DISPATCH(logaddexp2_stub, &logaddexp2_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -90,9 +90,9 @@ void fmin_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(maximum_stub, &maximum_kernel_cuda);
|
||||
REGISTER_DISPATCH(minimum_stub, &minimum_kernel_cuda);
|
||||
REGISTER_DISPATCH(fmax_stub, &fmax_kernel_cuda);
|
||||
REGISTER_DISPATCH(fmin_stub, &fmin_kernel_cuda);
|
||||
REGISTER_DISPATCH(maximum_stub, &maximum_kernel_cuda)
|
||||
REGISTER_DISPATCH(minimum_stub, &minimum_kernel_cuda)
|
||||
REGISTER_DISPATCH(fmax_stub, &fmax_kernel_cuda)
|
||||
REGISTER_DISPATCH(fmin_stub, &fmin_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -147,9 +147,9 @@ void mse_backward_cuda_kernel(TensorIterator& iter, const Scalar& value) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(addcdiv_stub, &addcdiv_cuda_kernel);
|
||||
REGISTER_DISPATCH(addcmul_stub, &addcmul_cuda_kernel);
|
||||
REGISTER_DISPATCH(smooth_l1_backward_stub, &smooth_l1_backward_cuda_kernel);
|
||||
REGISTER_DISPATCH(huber_backward_stub, &huber_backward_cuda_kernel);
|
||||
REGISTER_DISPATCH(mse_backward_stub, &mse_backward_cuda_kernel);
|
||||
REGISTER_DISPATCH(addcdiv_stub, &addcdiv_cuda_kernel)
|
||||
REGISTER_DISPATCH(addcmul_stub, &addcmul_cuda_kernel)
|
||||
REGISTER_DISPATCH(smooth_l1_backward_stub, &smooth_l1_backward_cuda_kernel)
|
||||
REGISTER_DISPATCH(huber_backward_stub, &huber_backward_cuda_kernel)
|
||||
REGISTER_DISPATCH(mse_backward_stub, &mse_backward_cuda_kernel)
|
||||
} // namespace at::native
|
||||
|
@ -203,7 +203,7 @@ void pow_tensor_scalar_kernel(TensorIteratorBase& iter, const Scalar& exp_scalar
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
REGISTER_DISPATCH(pow_tensor_tensor_stub, &pow_tensor_tensor_kernel);
|
||||
REGISTER_DISPATCH(pow_tensor_scalar_stub, &pow_tensor_scalar_kernel);
|
||||
REGISTER_DISPATCH(pow_tensor_tensor_stub, &pow_tensor_tensor_kernel)
|
||||
REGISTER_DISPATCH(pow_tensor_scalar_stub, &pow_tensor_scalar_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -41,6 +41,6 @@ void argmax_kernel_cuda(TensorIterator& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(argmax_stub, &argmax_kernel_cuda);
|
||||
REGISTER_DISPATCH(argmax_stub, &argmax_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -41,6 +41,6 @@ void argmin_kernel_cuda(TensorIterator& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(argmin_stub, &argmin_kernel_cuda);
|
||||
REGISTER_DISPATCH(argmin_stub, &argmin_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -32,7 +32,7 @@ void or_kernel_cuda(TensorIterator& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(and_stub, &and_kernel_cuda);
|
||||
REGISTER_DISPATCH(or_stub, &or_kernel_cuda);
|
||||
REGISTER_DISPATCH(and_stub, &and_kernel_cuda)
|
||||
REGISTER_DISPATCH(or_stub, &or_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -56,6 +56,6 @@ void max_all_launch_kernel(TensorIterator &iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(max_values_stub, &max_values_kernel_cuda);
|
||||
REGISTER_DISPATCH(max_values_stub, &max_values_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void min_all_launch_kernel(TensorIterator &iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(min_values_stub, &min_values_kernel_cuda);
|
||||
REGISTER_DISPATCH(min_values_stub, &min_values_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -62,7 +62,7 @@ static void mean_kernel_cuda(TensorIterator& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(std_var_stub, &std_var_kernel_cuda);
|
||||
REGISTER_DISPATCH(mean_stub, &mean_kernel_cuda);
|
||||
REGISTER_DISPATCH(std_var_stub, &std_var_kernel_cuda)
|
||||
REGISTER_DISPATCH(mean_stub, &mean_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -208,8 +208,8 @@ static void prod_kernel_cuda(TensorIterator& iter) {
|
||||
reduce_dispatch<prod_functor>(iter, general_dispatcher);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(sum_stub, &sum_kernel_cuda);
|
||||
REGISTER_DISPATCH(nansum_stub, &nansum_kernel_cuda);
|
||||
REGISTER_DISPATCH(prod_stub, &prod_kernel_cuda);
|
||||
REGISTER_DISPATCH(sum_stub, &sum_kernel_cuda)
|
||||
REGISTER_DISPATCH(nansum_stub, &nansum_kernel_cuda)
|
||||
REGISTER_DISPATCH(prod_stub, &prod_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -24,6 +24,6 @@ void renorm_scale_factor_impl(TensorIteratorBase& iter, double maxnorm) {
|
||||
|
||||
} // namespace (anonymous)
|
||||
|
||||
REGISTER_DISPATCH(renorm_scale_factor_stub, &renorm_scale_factor_impl);
|
||||
REGISTER_DISPATCH(renorm_scale_factor_stub, &renorm_scale_factor_impl)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -562,12 +562,12 @@ void scatter_scalar_reduce_cuda_kernel(const Tensor& self, const int64_t dim, co
|
||||
}
|
||||
|
||||
|
||||
REGISTER_DISPATCH(gather_stub, &gather_cuda_kernel);
|
||||
REGISTER_DISPATCH(scatter_stub, &scatter_cuda_kernel);
|
||||
REGISTER_DISPATCH(scatter_fill_stub, &scatter_fill_cuda_kernel);
|
||||
REGISTER_DISPATCH(scatter_add_stub, &scatter_add_cuda_kernel);
|
||||
REGISTER_DISPATCH(scatter_reduce_stub, &scatter_reduce_cuda_kernel);
|
||||
REGISTER_DISPATCH(scatter_scalar_reduce_stub, &scatter_scalar_reduce_cuda_kernel);
|
||||
REGISTER_DISPATCH(scatter_reduce_two_stub, &scatter_reduce_two_cuda_kernel);
|
||||
REGISTER_DISPATCH(gather_stub, &gather_cuda_kernel)
|
||||
REGISTER_DISPATCH(scatter_stub, &scatter_cuda_kernel)
|
||||
REGISTER_DISPATCH(scatter_fill_stub, &scatter_fill_cuda_kernel)
|
||||
REGISTER_DISPATCH(scatter_add_stub, &scatter_add_cuda_kernel)
|
||||
REGISTER_DISPATCH(scatter_reduce_stub, &scatter_reduce_cuda_kernel)
|
||||
REGISTER_DISPATCH(scatter_scalar_reduce_stub, &scatter_scalar_reduce_cuda_kernel)
|
||||
REGISTER_DISPATCH(scatter_reduce_two_stub, &scatter_reduce_two_cuda_kernel)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -590,8 +590,8 @@ Tensor _segment_reduce_offsets_cuda_kernel(
|
||||
reduction, data, offsets, axis, initial, /*is_offsets_like=*/true);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(_segment_reduce_lengths_stub, &_segment_reduce_lengths_cuda_kernel);
|
||||
REGISTER_DISPATCH(_segment_reduce_offsets_stub, &_segment_reduce_offsets_cuda_kernel);
|
||||
REGISTER_DISPATCH(_segment_reduce_lengths_stub, &_segment_reduce_lengths_cuda_kernel)
|
||||
REGISTER_DISPATCH(_segment_reduce_offsets_stub, &_segment_reduce_offsets_cuda_kernel)
|
||||
REGISTER_DISPATCH(
|
||||
_segment_reduce_lengths_backward_stub,
|
||||
&_segment_reduce_lengths_backward_cuda_kernel);
|
||||
|
@ -119,6 +119,6 @@ void _fft_fill_with_conjugate_symmetry_cuda_(
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(fft_fill_with_conjugate_symmetry_stub, &_fft_fill_with_conjugate_symmetry_cuda_);
|
||||
REGISTER_DISPATCH(fft_fill_with_conjugate_symmetry_stub, &_fft_fill_with_conjugate_symmetry_cuda_)
|
||||
|
||||
} // at::native
|
||||
|
@ -27,7 +27,7 @@ void heaviside_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(nextafter_stub, &nextafter_kernel_cuda);
|
||||
REGISTER_DISPATCH(heaviside_stub, &heaviside_kernel_cuda);
|
||||
REGISTER_DISPATCH(nextafter_stub, &nextafter_kernel_cuda)
|
||||
REGISTER_DISPATCH(heaviside_stub, &heaviside_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -93,13 +93,13 @@ void clamp_max_scalar_kernel_impl(TensorIteratorBase& iter, Scalar max) {
|
||||
} // anonymous namespace
|
||||
|
||||
|
||||
REGISTER_DISPATCH(where_kernel, &where_kernel_impl);
|
||||
REGISTER_DISPATCH(isposinf_stub, &isposinf_kernel_impl);
|
||||
REGISTER_DISPATCH(isneginf_stub, &isneginf_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_stub, &clamp_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_scalar_stub, &clamp_scalar_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_min_scalar_stub, &clamp_min_scalar_kernel_impl);
|
||||
REGISTER_DISPATCH(clamp_max_scalar_stub, &clamp_max_scalar_kernel_impl);
|
||||
REGISTER_DISPATCH(where_kernel, &where_kernel_impl)
|
||||
REGISTER_DISPATCH(isposinf_stub, &isposinf_kernel_impl)
|
||||
REGISTER_DISPATCH(isneginf_stub, &isneginf_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_stub, &clamp_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_scalar_stub, &clamp_scalar_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_min_scalar_stub, &clamp_min_scalar_kernel_impl)
|
||||
REGISTER_DISPATCH(clamp_max_scalar_stub, &clamp_max_scalar_kernel_impl)
|
||||
|
||||
struct Msg {
|
||||
static constexpr size_t MAX_MSG_LENGTH = 256;
|
||||
|
@ -96,7 +96,7 @@ void conj_kernel_cuda(TensorIteratorBase& iter) {
|
||||
);
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(angle_stub, &angle_kernel_cuda);
|
||||
REGISTER_DISPATCH(conj_physical_stub, &conj_kernel_cuda);
|
||||
REGISTER_DISPATCH(angle_stub, &angle_kernel_cuda)
|
||||
REGISTER_DISPATCH(conj_physical_stub, &conj_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -188,12 +188,12 @@ void trunc_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(ceil_stub, &ceil_kernel_cuda);
|
||||
REGISTER_DISPATCH(frac_stub, &frac_kernel_cuda);
|
||||
REGISTER_DISPATCH(floor_stub, &floor_kernel_cuda);
|
||||
REGISTER_DISPATCH(reciprocal_stub, &reciprocal_kernel_cuda);
|
||||
REGISTER_DISPATCH(round_stub, &round_kernel_cuda);
|
||||
REGISTER_DISPATCH(round_decimals_stub, &round_decimals_kernel_cuda);
|
||||
REGISTER_DISPATCH(trunc_stub, &trunc_kernel_cuda);
|
||||
REGISTER_DISPATCH(ceil_stub, &ceil_kernel_cuda)
|
||||
REGISTER_DISPATCH(frac_stub, &frac_kernel_cuda)
|
||||
REGISTER_DISPATCH(floor_stub, &floor_kernel_cuda)
|
||||
REGISTER_DISPATCH(reciprocal_stub, &reciprocal_kernel_cuda)
|
||||
REGISTER_DISPATCH(round_stub, &round_kernel_cuda)
|
||||
REGISTER_DISPATCH(round_decimals_stub, &round_decimals_kernel_cuda)
|
||||
REGISTER_DISPATCH(trunc_stub, &trunc_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -125,8 +125,8 @@ void lgamma_kernel_cuda(TensorIteratorBase& iter) {
|
||||
#endif
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(digamma_stub, &digamma_kernel_cuda);
|
||||
REGISTER_DISPATCH(polygamma_stub, &polygamma_kernel_cuda);
|
||||
REGISTER_DISPATCH(lgamma_stub, &lgamma_kernel_cuda);
|
||||
REGISTER_DISPATCH(digamma_stub, &digamma_kernel_cuda)
|
||||
REGISTER_DISPATCH(polygamma_stub, &polygamma_kernel_cuda)
|
||||
REGISTER_DISPATCH(lgamma_stub, &lgamma_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void acos_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(acos_stub, &acos_kernel_cuda);
|
||||
REGISTER_DISPATCH(acos_stub, &acos_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -54,6 +54,6 @@ void acosh_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(acosh_stub, &acosh_kernel_cuda);
|
||||
REGISTER_DISPATCH(acosh_stub, &acosh_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -50,6 +50,6 @@ void asin_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(asin_stub, &asin_kernel_cuda);
|
||||
REGISTER_DISPATCH(asin_stub, &asin_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -54,6 +54,6 @@ void asinh_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(asinh_stub, &asinh_kernel_cuda);
|
||||
REGISTER_DISPATCH(asinh_stub, &asinh_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void atan_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(atan_stub, &atan_kernel_cuda);
|
||||
REGISTER_DISPATCH(atan_stub, &atan_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void atanh_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(atanh_stub, &atanh_kernel_cuda);
|
||||
REGISTER_DISPATCH(atanh_stub, &atanh_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -52,6 +52,6 @@ void cos_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(cos_stub, &cos_kernel_cuda);
|
||||
REGISTER_DISPATCH(cos_stub, &cos_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void cosh_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(cosh_stub, &cosh_kernel_cuda);
|
||||
REGISTER_DISPATCH(cosh_stub, &cosh_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -52,6 +52,6 @@ void sin_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(sin_stub, &sin_kernel_cuda);
|
||||
REGISTER_DISPATCH(sin_stub, &sin_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void sinh_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(sinh_stub, &sinh_kernel_cuda);
|
||||
REGISTER_DISPATCH(sinh_stub, &sinh_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -52,6 +52,6 @@ void tan_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(tan_stub, &tan_kernel_cuda);
|
||||
REGISTER_DISPATCH(tan_stub, &tan_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -53,6 +53,6 @@ void tanh_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(tanh_stub, &tanh_kernel_cuda);
|
||||
REGISTER_DISPATCH(tanh_stub, &tanh_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -113,9 +113,9 @@ void log2_kernel_cuda(TensorIteratorBase& iter) {
|
||||
}
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(log_stub, &log_kernel_cuda);
|
||||
REGISTER_DISPATCH(log10_stub, &log10_kernel_cuda);
|
||||
REGISTER_DISPATCH(log2_stub, &log2_kernel_cuda);
|
||||
REGISTER_DISPATCH(log1p_stub, &log1p_kernel_cuda);
|
||||
REGISTER_DISPATCH(log_stub, &log_kernel_cuda)
|
||||
REGISTER_DISPATCH(log10_stub, &log10_kernel_cuda)
|
||||
REGISTER_DISPATCH(log2_stub, &log2_kernel_cuda)
|
||||
REGISTER_DISPATCH(log1p_stub, &log1p_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
@ -275,12 +275,12 @@ void frexp_kernel_cuda(TensorIteratorBase& iter) {
|
||||
});
|
||||
}
|
||||
|
||||
REGISTER_DISPATCH(bitwise_not_stub, &bitwise_not_kernel_cuda);
|
||||
REGISTER_DISPATCH(exp_stub, &exp_kernel_cuda);
|
||||
REGISTER_DISPATCH(expm1_stub, &expm1_kernel_cuda);
|
||||
REGISTER_DISPATCH(rsqrt_stub, &rsqrt_kernel_cuda);
|
||||
REGISTER_DISPATCH(sqrt_stub, &sqrt_kernel_cuda);
|
||||
REGISTER_DISPATCH(nan_to_num_stub, &nan_to_num_kernel_cuda);
|
||||
REGISTER_DISPATCH(frexp_stub, &frexp_kernel_cuda);
|
||||
REGISTER_DISPATCH(bitwise_not_stub, &bitwise_not_kernel_cuda)
|
||||
REGISTER_DISPATCH(exp_stub, &exp_kernel_cuda)
|
||||
REGISTER_DISPATCH(expm1_stub, &expm1_kernel_cuda)
|
||||
REGISTER_DISPATCH(rsqrt_stub, &rsqrt_kernel_cuda)
|
||||
REGISTER_DISPATCH(sqrt_stub, &sqrt_kernel_cuda)
|
||||
REGISTER_DISPATCH(nan_to_num_stub, &nan_to_num_kernel_cuda)
|
||||
REGISTER_DISPATCH(frexp_stub, &frexp_kernel_cuda)
|
||||
|
||||
} // namespace at::native
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user