From d645088f2f4001d6259b210edba211fe5c185dae Mon Sep 17 00:00:00 2001 From: Serhat Yilmaz Date: Mon, 17 May 2021 13:50:56 -0700 Subject: [PATCH] [torch] Format repeat_interleave op files (#58313) Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/58313 Same as title. I am planning to send a follow-up diff to this op, so sending formatting diff ahead to keep PR simple. Test Plan: Rely on existing signals since this is simple formatting diff. Reviewed By: ngimel Differential Revision: D28447685 fbshipit-source-id: c7cd473b61e40e6f50178aca88b9af197a759099 --- aten/src/ATen/native/Repeat.cpp | 31 ++++++++++---- aten/src/ATen/native/Repeat.h | 14 +++--- aten/src/ATen/native/cuda/Repeat.cu | 66 +++++++++++++++++------------ 3 files changed, 71 insertions(+), 40 deletions(-) diff --git a/aten/src/ATen/native/Repeat.cpp b/aten/src/ATen/native/Repeat.cpp index c9f8c0723aa4..758e7de8dde7 100644 --- a/aten/src/ATen/native/Repeat.cpp +++ b/aten/src/ATen/native/Repeat.cpp @@ -1,9 +1,13 @@ #include -#include #include +#include template -static void compute_cpu(index_t *repeat_ptr, int64_t *cumsum_ptr, index_t *result_ptr, int64_t size) { +static void compute_cpu( + index_t* repeat_ptr, + int64_t* cumsum_ptr, + index_t* result_ptr, + int64_t size) { at::parallel_for(0, size, 1, [&](int64_t i_begin, int64_t i_end) { for (int64_t i = i_begin; i < i_end; i++) { int64_t end = cumsum_ptr[i]; @@ -16,9 +20,10 @@ static void compute_cpu(index_t *repeat_ptr, int64_t *cumsum_ptr, index_t *resul }); } -namespace at { namespace native { +namespace at { +namespace native { -Tensor repeat_interleave_cpu(const Tensor &repeat) { +Tensor repeat_interleave_cpu(const Tensor& repeat) { Tensor output; AT_DISPATCH_INDEX_TYPES(repeat.scalar_type(), "repeat_interleave_cpu", [&]() { output = repeat_interleave_common>(repeat); @@ -27,9 +32,12 @@ Tensor repeat_interleave_cpu(const Tensor &repeat) { return output; } -Tensor repeat_interleave(const Tensor &self, const Tensor &repeats, c10::optional dim) { +Tensor repeat_interleave( + const Tensor& self, + const Tensor& repeats, + c10::optional dim) { Tensor input = self; - if(!dim) { + if (!dim) { input = self.flatten(); dim = 0; } @@ -48,8 +56,13 @@ Tensor repeat_interleave(const Tensor &self, const Tensor &repeats, c10::optiona return input.index_select(dim.value(), at::repeat_interleave(repeats_)); } -Tensor repeat_interleave(const Tensor &self, int64_t repeats, c10::optional dim) { - return at::native::repeat_interleave(self, at::tensor({repeats}, self.options().dtype(kLong)), dim); +Tensor repeat_interleave( + const Tensor& self, + int64_t repeats, + c10::optional dim) { + return at::native::repeat_interleave( + self, at::tensor({repeats}, self.options().dtype(kLong)), dim); } -}} +} // namespace native +} // namespace at diff --git a/aten/src/ATen/native/Repeat.h b/aten/src/ATen/native/Repeat.h index 5af86d95feea..3db3196127ec 100644 --- a/aten/src/ATen/native/Repeat.h +++ b/aten/src/ATen/native/Repeat.h @@ -2,15 +2,18 @@ #include -namespace at { namespace native { +namespace at { +namespace native { template -static inline Tensor repeat_interleave_common(const Tensor &repeats) { - TORCH_CHECK(repeats.dim() == 1, "repeat_interleave only accept 1D vector as repeat"); +static inline Tensor repeat_interleave_common(const Tensor& repeats) { + TORCH_CHECK( + repeats.dim() == 1, "repeat_interleave only accept 1D vector as repeat"); TORCH_CHECK( repeats.scalar_type() == at::kLong || repeats.scalar_type() == at::kInt, "repeats has to be Long or Int tensor"); - TORCH_CHECK((repeats >= 0).all().item(), "repeats can not be negative"); + TORCH_CHECK( + (repeats >= 0).all().item(), "repeats can not be negative"); if (repeats.size(0) == 0) { return at::empty_like(repeats, LEGACY_CONTIGUOUS_MEMORY_FORMAT); } @@ -25,4 +28,5 @@ static inline Tensor repeat_interleave_common(const Tensor &repeats) { return result; } -}} +} // namespace native +} // namespace at diff --git a/aten/src/ATen/native/cuda/Repeat.cu b/aten/src/ATen/native/cuda/Repeat.cu index 005d9467c53b..261e6aae69ec 100644 --- a/aten/src/ATen/native/cuda/Repeat.cu +++ b/aten/src/ATen/native/cuda/Repeat.cu @@ -3,39 +3,53 @@ #include template -__global__ static void compute_cuda_kernel(index_t *repeat_ptr, int64_t *cumsum_ptr, index_t *result_ptr, int64_t size) { - int64_t idx = blockIdx.x * blockDim.x + threadIdx.x; - int64_t stride = (blockDim.x * gridDim.x) / C10_WARP_SIZE; - int warp_id = idx / C10_WARP_SIZE; - int tid_in_warp = idx % C10_WARP_SIZE; - for (int64_t i = warp_id; i < size; i += stride) { - int64_t end = cumsum_ptr[i]; - index_t repeat = repeat_ptr[i]; - int64_t start = end - repeat; - for(int64_t j = start + tid_in_warp; j < end; j += C10_WARP_SIZE) { - result_ptr[j] = i; - } +__global__ static void compute_cuda_kernel( + index_t* repeat_ptr, + int64_t* cumsum_ptr, + index_t* result_ptr, + int64_t size) { + int64_t idx = blockIdx.x * blockDim.x + threadIdx.x; + int64_t stride = (blockDim.x * gridDim.x) / C10_WARP_SIZE; + int warp_id = idx / C10_WARP_SIZE; + int tid_in_warp = idx % C10_WARP_SIZE; + for (int64_t i = warp_id; i < size; i += stride) { + int64_t end = cumsum_ptr[i]; + index_t repeat = repeat_ptr[i]; + int64_t start = end - repeat; + for (int64_t j = start + tid_in_warp; j < end; j += C10_WARP_SIZE) { + result_ptr[j] = i; } + } } template -static void compute_cuda(index_t *repeat_ptr, int64_t *cumsum_ptr, index_t *result_ptr, int64_t size) { - int64_t block = 512; - int64_t warps_per_block = block / C10_WARP_SIZE; - int64_t grid = std::min((size + warps_per_block - 1) / warps_per_block, 2048L); +static void compute_cuda( + index_t* repeat_ptr, + int64_t* cumsum_ptr, + index_t* result_ptr, + int64_t size) { + int64_t block = 512; + int64_t warps_per_block = block / C10_WARP_SIZE; + int64_t grid = + std::min((size + warps_per_block - 1) / warps_per_block, 2048L); - compute_cuda_kernel<<>>(repeat_ptr, cumsum_ptr, result_ptr, size); - C10_CUDA_KERNEL_LAUNCH_CHECK(); + compute_cuda_kernel<<>>( + repeat_ptr, cumsum_ptr, result_ptr, size); + C10_CUDA_KERNEL_LAUNCH_CHECK(); } -namespace at { namespace native { +namespace at { +namespace native { -Tensor repeat_interleave_cuda(const Tensor &repeat) { - Tensor output; - AT_DISPATCH_INDEX_TYPES(repeat.scalar_type(), "repeat_interleave_cuda", [&]() { - output = repeat_interleave_common>(repeat); - }); - return output; +Tensor repeat_interleave_cuda(const Tensor& repeat) { + Tensor output; + AT_DISPATCH_INDEX_TYPES( + repeat.scalar_type(), "repeat_interleave_cuda", [&]() { + output = + repeat_interleave_common>(repeat); + }); + return output; } -}} +} // namespace native +} // namespace at