pytorch/test/cpp_extensions/cuda_extension.cpp

#include <torch/torch.h>

// Declare the function from cuda_extension.cu. It will be compiled
// separately with nvcc and linked with the object file of cuda_extension.cpp
// into one shared library.
void sigmoid_add_cuda(const float* x, const float* y, float* output, int size);

at::Tensor sigmoid_add(at::Tensor x, at::Tensor y) {
  AT_ASSERT(x.type().is_cuda(), "x must be a CUDA tensor");
  AT_ASSERT(y.type().is_cuda(), "y must be a CUDA tensor");
  auto output = at::zeros_like(x);
  sigmoid_add_cuda(
      x.data<float>(), y.data<float>(), output.data<float>(), output.numel());
  return output;
}

PYBIND11_MODULE(TORCH_EXTENSION_NAME, m) {
  m.def("sigmoid_add", &sigmoid_add, "sigmoid(x) + sigmoid(y)");
}