Files
pytorch/test/cpp_extensions/cuda_extension_kernel.cu
Yixin Bao 840e71f4e6 Check CUDA kernel launches (/fbcode/caffe2/) (#49145)
Summary:
Pull Request resolved: https://github.com/pytorch/pytorch/pull/49145

Pull Request resolved: https://github.com/pytorch/pytorch/pull/49105

(1) Add a safety check `C10_CUDA_KERNEL_LAUNCH_CHECK()` after each kernel launch. This diff only changes the files inside the directory /fbsource/fbcode/caffe2/modules/, /fbsource/fbcode/caffe2/fb/, /fbsource/fbcode/caffe2/test/.

(2) Get rid of old check `AT_CUDA_CHECK(cudaGetLastError())` when necessary.

Test Plan:
Test build:
```
buck build mode/dev-nosan //caffe2/modules/detectron:
buck test mode/dev-nosan //caffe2/modules/detectron:
buck build mode/dev-nosan //caffe2/torch/fb/:
buck test mode/dev-nosan //caffe2/torch/fb/:
```

To check for launches without checks:
```
python3 caffe2/torch/testing/check_kernel_launches.py
```
Make sure none of the updated files are in the returned list.

Reviewed By: r-barnes

Differential Revision: D25452852

fbshipit-source-id: d6657edab612c9e0fa99b29c68460be8b1a20064
2020-12-10 10:43:03 -08:00

26 lines
779 B
Plaintext

#include <cuda.h>
#include <cuda_runtime.h>
#include <c10/cuda/CUDAException.h>
#include <ATen/ATen.h>
__global__ void sigmoid_add_kernel(
const float* __restrict__ x,
const float* __restrict__ y,
float* __restrict__ output,
const int size) {
const int index = blockIdx.x * blockDim.x + threadIdx.x;
if (index < size) {
const float sigmoid_x = 1.0f / (1.0f + __expf(-x[index]));
const float sigmoid_y = 1.0f / (1.0f + __expf(-y[index]));
output[index] = sigmoid_x + sigmoid_y;
}
}
void sigmoid_add_cuda(const float* x, const float* y, float* output, int size) {
const int threads = 1024;
const int blocks = (size + threads - 1) / threads;
sigmoid_add_kernel<<<blocks, threads>>>(x, y, output, size);
C10_CUDA_KERNEL_LAUNCH_CHECK();
}