Forward declarations of needed curand functions (#10911)

Summary: Needed for FULL_CAFFE2=1 with statically linked CUDA libraries. Waiting on advice from Nvidia Pull Request resolved: https://github.com/pytorch/pytorch/pull/10911 Reviewed By: pjh5 Differential Revision: D9636256 Pulled By: orionr fbshipit-source-id: fcad7945910b6c8fb5f52e81cc87dad5fcfb3c65
2025-10-20 21:14:14 +08:00 · 2018-09-05 16:45:48 -07:00
parent 57728f71e7
commit c0efe6f027
10 changed files with 122 additions and 70 deletions
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@ -89,7 +89,6 @@ option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
 option(USE_LMDB "Use LMDB" ON)
 option(USE_METAL "Use Metal for iOS build" ON)
 option(USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON)
-option(USE_MPI "Use MPI" ON)
 option(USE_NATIVE_ARCH "Use -march=native" OFF)
 option(USE_NCCL "Use NCCL" ON)
 option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF)
@ -117,6 +116,9 @@ option(USE_MKLDNN "Use MKLDNN" OFF)
 option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
 option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
 option(USE_DISTRIBUTED "Use THD (distributed)" OFF)
+cmake_dependent_option(
+    USE_MPI "Use MPI" ON
+    "USE_DISTRIBUTED" OFF)

 # Used when building Caffe2 through setup.py
 option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF)
--- a/aten/src/ATen/Config.h.in
+++ b/aten/src/ATen/Config.h.in
@ -8,3 +8,4 @@

 #define AT_MKLDNN_ENABLED() @AT_MKLDNN_ENABLED@
 #define AT_MKL_ENABLED() @AT_MKL_ENABLED@
+#define CAFFE2_STATIC_LINK_CUDA() @CAFFE2_STATIC_LINK_CUDA@
--- a/aten/src/ATen/cuda/_curand_mtgp32_host.h
+++ b/aten/src/ATen/cuda/_curand_mtgp32_host.h
@ -0,0 +1,34 @@
+#include <curand.h>
+
+// Forward declarations of functions that are defined in libcurand_static.a
+// This is to avoid multiple-definitions of these when statically linking
+// cudarand in both Caffe2 and ATen
+#if CAFFE2_STATIC_LINK_CUDA()
+curandStatus_t curandMakeMTGP32Constants(
+    const mtgp32_params_fast_t params[],
+    mtgp32_kernel_params_t * p);
+void mtgp32_init_state(
+    unsigned int state[],
+    const mtgp32_params_fast_t *para,
+    unsigned int seed);
+curandStatus_t CURANDAPI curandMakeMTGP32KernelState(
+    curandStateMtgp32_t *s,
+    mtgp32_params_fast_t params[],
+    mtgp32_kernel_params_t *k,
+    int n,
+    unsigned long long seed);
+extern mtgp32_params_fast_t mtgp32dc_params_fast_11213[];
+int mtgp32_init_by_array(
+    unsigned int state[],
+    const mtgp32_params_fast_t *para,
+    unsigned int *array, int length);
+int mtgp32_init_by_str(
+    unsigned int state[],
+    const mtgp32_params_fast_t *para,
+    unsigned char *array);
+extern const int mtgpdc_params_11213_num;
+
+#else // CAFFE2_STATIC_LINK_CUDA
+#include <curand_mtgp32_host.h>
+#include <curand_mtgp32dc_p_11213.h>
+#endif // CAFFE2_STATIC_LINK_CUDA
--- a/aten/src/ATen/test/verify_api_visibility.cpp
+++ b/aten/src/ATen/test/verify_api_visibility.cpp
@ -12,4 +12,8 @@
 #error "AT_MKLDNN_ENABLED should not be visible in public headers"
 #endif

+#ifdef CAFFE2_STATIC_LINK_CUDA
+#error "CAFFE2_STATIC_LINK_CUDA should not be visible in public headers"
+#endif
+
 auto main() -> int {}
--- a/aten/src/THC/THCTensorRandom.cu
+++ b/aten/src/THC/THCTensorRandom.cu
@ -6,12 +6,13 @@
 #include "THCReduceApplyUtils.cuh"
 #include "THCTensorRandom.cuh"
 #include "THCGenerator.hpp"
+#include "ATen/Config.h"
+
+#include "ATen/cuda/_curand_mtgp32_host.h"

 #include <thrust/functional.h>
 #include <curand.h>
 #include <curand_kernel.h>
-#include <curand_mtgp32_host.h>
-#include <curand_mtgp32dc_p_11213.h>

 #define MAX_NUM_BLOCKS 200 
 #define BLOCK_SIZE 256
--- a/caffe2/opt/backend_cutting_test.cc
+++ b/caffe2/opt/backend_cutting_test.cc
@ -83,9 +83,9 @@ TEST(BackendCuttingTest, line) {
  EXPECT_EQ(3, net_opt.op_size());
 }

-//  X0 -> CopyIn -> MyConv -\
+//  X0 -> CopyIn -> MyConv -|
 //                           > Concat -> CopyOut -> Y
-//  N2 -> MyConv -> MyRelu -/
+//  N2 -> MyConv -> MyRelu -|
 TEST(BackendCuttingTest, convergedPaths) {
  caffe2::NetDef net;
  net.add_external_input("X0");
@ -118,8 +118,8 @@ TEST(BackendCuttingTest, convergedPaths) {
 };

 //                -> Random -> Relu -> MyConv4
-//              /                             \
-// N0 -> MyConv -> MyRelu -> MyConv2 ---------- > Concat -> CopyOut -> Y
+//                |                           |
+// N0 -> MyConv -> MyRelu -> MyConv2 ----------> Concat -> CopyOut -> Y
 TEST(BackendCuttingTest, skipPath) {
  caffe2::NetDef net;
  net.add_external_input("N0");
--- a/caffe2/utils/fatal_signal_asan_no_sig_test.cc
+++ b/caffe2/utils/fatal_signal_asan_no_sig_test.cc
@ -14,6 +14,7 @@ namespace {
 void* dummy_thread(void*) {
  while (1) {
  }
+  return nullptr;
 }

 bool forkAndPipe(
--- a/cmake/public/cuda.cmake
+++ b/cmake/public/cuda.cmake
@ -178,7 +178,7 @@ add_library(caffe2::cudart INTERFACE IMPORTED)
 if(CAFFE2_STATIC_LINK_CUDA)
    set_property(
        TARGET caffe2::cudart PROPERTY INTERFACE_LINK_LIBRARIES
-        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a")
+        "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a" rt)
 else()
    set_property(
        TARGET caffe2::cudart PROPERTY INTERFACE_LINK_LIBRARIES
--- a/setup.py
+++ b/setup.py
@ -188,7 +188,10 @@ cwd = os.path.dirname(os.path.abspath(__file__))
 lib_path = os.path.join(cwd, "torch", "lib")
 third_party_path = os.path.join(cwd, "third_party")
 tmp_install_path = lib_path + "/tmp_install"
+caffe2_build_dir = os.path.join(cwd, "build")
+# lib/pythonx.x/site-packages
 rel_site_packages = distutils.sysconfig.get_python_lib(prefix='')
+# full absolute path to the dir above
 full_site_packages = distutils.sysconfig.get_python_lib()


@ -465,6 +468,15 @@ class develop(setuptools.command.develop.develop):
        setuptools.command.develop.develop.run(self)
        self.create_compile_commands()

+        # Copy Caffe2's Python proto files (generated during the build with the
+        # protobuf python compiler) from the build folder to the root folder
+        # cp root/build/caffe2/proto/proto.py root/caffe2/proto/proto.py
+        for src in glob.glob(
+                os.path.join(caffe2_build_dir, 'caffe2', 'proto', '*.py')):
+            dst = os.path.join(
+                cwd, os.path.relpath(src, caffe2_build_dir))
+            self.copy_file(src, dst)
+
    def create_compile_commands(self):
        def load(filename):
            with open(filename) as f:
@ -593,39 +605,40 @@ class build_ext(build_ext_parent):
        # platform dependent build folder created by the "build" command of
        # setuptools. Only the contents of this folder are installed in the
        # "install" command by default.
-        if FULL_CAFFE2:
-            # We only make this copy for Caffe2's pybind extensions
-            caffe2_pybind_exts = [
-                'caffe2.python.caffe2_pybind11_state',
-                'caffe2.python.caffe2_pybind11_state_gpu',
-                'caffe2.python.caffe2_pybind11_state_hip',
-            ]
-            i = 0
-            while i < len(self.extensions):
-                ext = self.extensions[i]
-                if ext.name not in caffe2_pybind_exts:
-                    i += 1
-                    continue
-                fullname = self.get_ext_fullname(ext.name)
-                filename = self.get_ext_filename(fullname)
+        # We only make this copy for Caffe2's pybind extensions
+        caffe2_pybind_exts = [
+            'caffe2.python.caffe2_pybind11_state',
+            'caffe2.python.caffe2_pybind11_state_gpu',
+            'caffe2.python.caffe2_pybind11_state_hip',
+        ]
+        i = 0
+        while i < len(self.extensions):
+            ext = self.extensions[i]
+            if ext.name not in caffe2_pybind_exts:
+                i += 1
+                continue
+            fullname = self.get_ext_fullname(ext.name)
+            filename = self.get_ext_filename(fullname)
+            print("\nCopying extension {}".format(ext.name))

-                src = os.path.join(tmp_install_path, rel_site_packages, filename)
-                if not os.path.exists(src):
-                    print("{} does not exist".format(src))
-                    del self.extensions[i]
-                else:
-                    dst = os.path.join(os.path.realpath(self.build_lib), filename)
-                    dst_dir = os.path.dirname(dst)
-                    if not os.path.exists(dst_dir):
-                        os.makedirs(dst_dir)
-                    self.copy_file(src, dst)
-                    i += 1
+            src = os.path.join(tmp_install_path, rel_site_packages, filename)
+            if not os.path.exists(src):
+                print("{} does not exist".format(src))
+                del self.extensions[i]
+            else:
+                dst = os.path.join(os.path.realpath(self.build_lib), filename)
+                print("Copying {} from {} to {}".format(ext.name, src, dst))
+                dst_dir = os.path.dirname(dst)
+                if not os.path.exists(dst_dir):
+                    os.makedirs(dst_dir)
+                self.copy_file(src, dst)
+                i += 1
        distutils.command.build_ext.build_ext.build_extensions(self)

    def get_outputs(self):
        outputs = distutils.command.build_ext.build_ext.get_outputs(self)
-        if FULL_CAFFE2:
-            outputs.append(os.path.join(self.build_lib, "caffe2"))
+        outputs.append(os.path.join(self.build_lib, "caffe2"))
+        print("setup.py::get_outputs returning {}".format(outputs))
        return outputs


@ -1008,10 +1021,7 @@ def make_relative_rpath(path):
 ################################################################################

 extensions = []
-if FULL_CAFFE2:
-    packages = find_packages(exclude=('tools', 'tools.*'))
-else:
-    packages = find_packages(exclude=('tools', 'tools.*', 'caffe2', 'caffe2.*'))
+packages = find_packages(exclude=('tools', 'tools.*'))
 C = Extension("torch._C",
              libraries=main_libraries,
              sources=main_sources,
@ -1055,19 +1065,18 @@ if USE_CUDA:
                        )
    extensions.append(THNVRTC)

-if FULL_CAFFE2:
-    # If building Caffe2 python as well, these extensions are built by cmake
-    # copied manually in build_extensions() inside the build_ext implementaiton
-    extensions.append(
-        setuptools.Extension(
-            name=str('caffe2.python.caffe2_pybind11_state'),
-            sources=[]),
-    )
-    extensions.append(
-        setuptools.Extension(
-            name=str('caffe2.python.caffe2_pybind11_state_gpu'),
-            sources=[]),
-    )
+# These extensions are built by cmake and copied manually in build_extensions()
+# inside the build_ext implementaiton
+extensions.append(
+    setuptools.Extension(
+        name=str('caffe2.python.caffe2_pybind11_state'),
+        sources=[]),
+)
+extensions.append(
+    setuptools.Extension(
+        name=str('caffe2.python.caffe2_pybind11_state_gpu'),
+        sources=[]),
+)

 cmdclass = {
    'create_version_file': create_version_file,
@ -1083,14 +1092,12 @@ cmdclass = {
 }
 cmdclass.update(build_dep_cmds)

-entry_points = {}
-if FULL_CAFFE2:
-    entry_points = {
-        'console_scripts': [
-            'convert-caffe2-to-onnx = caffe2.python.onnx.bin.conversion:caffe2_to_onnx',
-            'convert-onnx-to-caffe2 = caffe2.python.onnx.bin.conversion:onnx_to_caffe2',
-        ]
-    }
+entry_points = {
+    'console_scripts': [
+        'convert-caffe2-to-onnx = caffe2.python.onnx.bin.conversion:caffe2_to_onnx',
+        'convert-onnx-to-caffe2 = caffe2.python.onnx.bin.conversion:onnx_to_caffe2',
+    ]
+}

 if __name__ == '__main__':
    setup(
@ -1133,6 +1140,9 @@ if __name__ == '__main__':
                'lib/include/torch/csrc/utils/*.h',
                'lib/include/torch/csrc/cuda/*.h',
                'lib/include/torch/torch.h',
+            ],
+            'caffe2': [
+                rel_site_packages + '/caffe2/**/*.py'
            ]
        },
    )
--- a/tools/build_pytorch_libs.sh
+++ b/tools/build_pytorch_libs.sh
@ -17,6 +17,7 @@ USE_NNPACK=0
 USE_MKLDNN=0
 USE_GLOO_IBVERBS=0
 FULL_CAFFE2=0
+CAFFE2_STATIC_LINK_CUDA=0
 while [[ $# -gt 0 ]]; do
    case "$1" in
      --use-cuda)
@ -170,6 +171,7 @@ function build() {
              -DTH_LIB_PATH="$INSTALL_DIR/lib" \
              -DTH_LIBRARIES="$INSTALL_DIR/lib/libTH$LD_POSTFIX" \
              -DCAFFE2_LIBRARIES="$INSTALL_DIR/lib/libcaffe2$LD_POSTFIX" \
+              -DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \
              -DTHNN_LIBRARIES="$INSTALL_DIR/lib/libTHNN$LD_POSTFIX" \
              -DTHCUNN_LIBRARIES="$INSTALL_DIR/lib/libTHCUNN$LD_POSTFIX" \
              -DTHS_LIBRARIES="$INSTALL_DIR/lib/libTHS$LD_POSTFIX" \
@ -238,6 +240,9 @@ function build_nccl() {
 # detected them (to ensure that we have a consistent view between the
 # PyTorch and Caffe2 builds.)
 function build_caffe2() {
+  # pwd is pytorch_root/build
+
+  # TODO change these to CMAKE_ARGS for consistency
  if [[ -z $EXTRA_CAFFE2_CMAKE_FLAGS ]]; then
    EXTRA_CAFFE2_CMAKE_FLAGS=()
  fi
@ -255,7 +260,7 @@ function build_caffe2() {
      -DBUILDING_WITH_TORCH_LIBS=ON \
      -DCMAKE_BUILD_TYPE=$BUILD_TYPE \
      -DBUILD_TORCH=$BUILD_TORCH \
-      -DBUILD_PYTHON=$FULL_CAFFE2 \
+      -DBUILD_PYTHON=ON \
      -DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
      -DBUILD_BINARY=$FULL_CAFFE2 \
      -DBUILD_TEST=$FULL_CAFFE2 \
@ -286,21 +291,15 @@ function build_caffe2() {

  # This is needed by the aten tests built with caffe2
  if [ -f "${INSTALL_DIR}/lib/libnccl.so" ] && [ ! -f "lib/libnccl.so.1" ]; then
+    # cp root/torch/lib/tmp_install/libnccl root/build/lib/libnccl
    cp "${INSTALL_DIR}/lib/libnccl.so.1" "lib/libnccl.so.1"
  fi

  ${CMAKE_INSTALL} -j"$MAX_JOBS"

-  # Install Python proto files
-  if [[ $FULL_CAFFE2 -ne 0 ]]; then
-    find . -name proto
-    for proto_file in ./caffe2/proto/*.py; do
-      cp $proto_file "../caffe2/proto/"
-    done
-  fi
-
  # Fix rpaths of shared libraries
  if [[ $(uname) == 'Darwin' ]]; then
+    # root/torch/lib/tmp_install/lib
    pushd "$INSTALL_DIR/lib"
    for lib in *.dylib; do
      echo "Updating install_name for $lib"