mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
Summary: Pull Request resolved: https://github.com/pytorch/pytorch/pull/49145 Pull Request resolved: https://github.com/pytorch/pytorch/pull/49105 (1) Add a safety check `C10_CUDA_KERNEL_LAUNCH_CHECK()` after each kernel launch. This diff only changes the files inside the directory /fbsource/fbcode/caffe2/modules/, /fbsource/fbcode/caffe2/fb/, /fbsource/fbcode/caffe2/test/. (2) Get rid of old check `AT_CUDA_CHECK(cudaGetLastError())` when necessary. Test Plan: Test build: ``` buck build mode/dev-nosan //caffe2/modules/detectron: buck test mode/dev-nosan //caffe2/modules/detectron: buck build mode/dev-nosan //caffe2/torch/fb/: buck test mode/dev-nosan //caffe2/torch/fb/: ``` To check for launches without checks: ``` python3 caffe2/torch/testing/check_kernel_launches.py ``` Make sure none of the updated files are in the returned list. Reviewed By: r-barnes Differential Revision: D25452852 fbshipit-source-id: d6657edab612c9e0fa99b29c68460be8b1a20064
26 lines
779 B
Plaintext
26 lines
779 B
Plaintext
#include <cuda.h>
|
|
#include <cuda_runtime.h>
|
|
#include <c10/cuda/CUDAException.h>
|
|
|
|
#include <ATen/ATen.h>
|
|
|
|
__global__ void sigmoid_add_kernel(
|
|
const float* __restrict__ x,
|
|
const float* __restrict__ y,
|
|
float* __restrict__ output,
|
|
const int size) {
|
|
const int index = blockIdx.x * blockDim.x + threadIdx.x;
|
|
if (index < size) {
|
|
const float sigmoid_x = 1.0f / (1.0f + __expf(-x[index]));
|
|
const float sigmoid_y = 1.0f / (1.0f + __expf(-y[index]));
|
|
output[index] = sigmoid_x + sigmoid_y;
|
|
}
|
|
}
|
|
|
|
void sigmoid_add_cuda(const float* x, const float* y, float* output, int size) {
|
|
const int threads = 1024;
|
|
const int blocks = (size + threads - 1) / threads;
|
|
sigmoid_add_kernel<<<blocks, threads>>>(x, y, output, size);
|
|
C10_CUDA_KERNEL_LAUNCH_CHECK();
|
|
}
|