Forward declarations of needed curand functions (#10911)

Summary:
Needed for FULL_CAFFE2=1 with statically linked CUDA libraries. Waiting on advice from Nvidia
Pull Request resolved: https://github.com/pytorch/pytorch/pull/10911

Reviewed By: pjh5

Differential Revision: D9636256

Pulled By: orionr

fbshipit-source-id: fcad7945910b6c8fb5f52e81cc87dad5fcfb3c65
This commit is contained in:
Jesse Hellemn
2018-09-05 16:45:48 -07:00
committed by Facebook Github Bot
parent 57728f71e7
commit c0efe6f027
10 changed files with 122 additions and 70 deletions

View File

@ -89,7 +89,6 @@ option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
option(USE_LMDB "Use LMDB" ON)
option(USE_METAL "Use Metal for iOS build" ON)
option(USE_MOBILE_OPENGL "Use OpenGL for mobile code" ON)
option(USE_MPI "Use MPI" ON)
option(USE_NATIVE_ARCH "Use -march=native" OFF)
option(USE_NCCL "Use NCCL" ON)
option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF)
@ -117,6 +116,9 @@ option(USE_MKLDNN "Use MKLDNN" OFF)
option(USE_IDEEP "Use IDEEP interface in MKL BLAS" ON)
option(USE_MKLML "Use MKLML interface in MKL BLAS" ON)
option(USE_DISTRIBUTED "Use THD (distributed)" OFF)
cmake_dependent_option(
USE_MPI "Use MPI" ON
"USE_DISTRIBUTED" OFF)
# Used when building Caffe2 through setup.py
option(BUILDING_WITH_TORCH_LIBS "Tell cmake if Caffe2 is being built alongside torch libs" OFF)

View File

@ -8,3 +8,4 @@
#define AT_MKLDNN_ENABLED() @AT_MKLDNN_ENABLED@
#define AT_MKL_ENABLED() @AT_MKL_ENABLED@
#define CAFFE2_STATIC_LINK_CUDA() @CAFFE2_STATIC_LINK_CUDA@

View File

@ -0,0 +1,34 @@
#include <curand.h>
// Forward declarations of functions that are defined in libcurand_static.a
// This is to avoid multiple-definitions of these when statically linking
// cudarand in both Caffe2 and ATen
#if CAFFE2_STATIC_LINK_CUDA()
curandStatus_t curandMakeMTGP32Constants(
const mtgp32_params_fast_t params[],
mtgp32_kernel_params_t * p);
void mtgp32_init_state(
unsigned int state[],
const mtgp32_params_fast_t *para,
unsigned int seed);
curandStatus_t CURANDAPI curandMakeMTGP32KernelState(
curandStateMtgp32_t *s,
mtgp32_params_fast_t params[],
mtgp32_kernel_params_t *k,
int n,
unsigned long long seed);
extern mtgp32_params_fast_t mtgp32dc_params_fast_11213[];
int mtgp32_init_by_array(
unsigned int state[],
const mtgp32_params_fast_t *para,
unsigned int *array, int length);
int mtgp32_init_by_str(
unsigned int state[],
const mtgp32_params_fast_t *para,
unsigned char *array);
extern const int mtgpdc_params_11213_num;
#else // CAFFE2_STATIC_LINK_CUDA
#include <curand_mtgp32_host.h>
#include <curand_mtgp32dc_p_11213.h>
#endif // CAFFE2_STATIC_LINK_CUDA

View File

@ -12,4 +12,8 @@
#error "AT_MKLDNN_ENABLED should not be visible in public headers"
#endif
#ifdef CAFFE2_STATIC_LINK_CUDA
#error "CAFFE2_STATIC_LINK_CUDA should not be visible in public headers"
#endif
auto main() -> int {}

View File

@ -6,12 +6,13 @@
#include "THCReduceApplyUtils.cuh"
#include "THCTensorRandom.cuh"
#include "THCGenerator.hpp"
#include "ATen/Config.h"
#include "ATen/cuda/_curand_mtgp32_host.h"
#include <thrust/functional.h>
#include <curand.h>
#include <curand_kernel.h>
#include <curand_mtgp32_host.h>
#include <curand_mtgp32dc_p_11213.h>
#define MAX_NUM_BLOCKS 200
#define BLOCK_SIZE 256

View File

@ -83,9 +83,9 @@ TEST(BackendCuttingTest, line) {
EXPECT_EQ(3, net_opt.op_size());
}
// X0 -> CopyIn -> MyConv -\
// X0 -> CopyIn -> MyConv -|
// > Concat -> CopyOut -> Y
// N2 -> MyConv -> MyRelu -/
// N2 -> MyConv -> MyRelu -|
TEST(BackendCuttingTest, convergedPaths) {
caffe2::NetDef net;
net.add_external_input("X0");
@ -118,8 +118,8 @@ TEST(BackendCuttingTest, convergedPaths) {
};
// -> Random -> Relu -> MyConv4
// / \
// N0 -> MyConv -> MyRelu -> MyConv2 ---------- > Concat -> CopyOut -> Y
// | |
// N0 -> MyConv -> MyRelu -> MyConv2 ----------> Concat -> CopyOut -> Y
TEST(BackendCuttingTest, skipPath) {
caffe2::NetDef net;
net.add_external_input("N0");

View File

@ -14,6 +14,7 @@ namespace {
void* dummy_thread(void*) {
while (1) {
}
return nullptr;
}
bool forkAndPipe(

View File

@ -178,7 +178,7 @@ add_library(caffe2::cudart INTERFACE IMPORTED)
if(CAFFE2_STATIC_LINK_CUDA)
set_property(
TARGET caffe2::cudart PROPERTY INTERFACE_LINK_LIBRARIES
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a")
"${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a" rt)
else()
set_property(
TARGET caffe2::cudart PROPERTY INTERFACE_LINK_LIBRARIES

116
setup.py
View File

@ -188,7 +188,10 @@ cwd = os.path.dirname(os.path.abspath(__file__))
lib_path = os.path.join(cwd, "torch", "lib")
third_party_path = os.path.join(cwd, "third_party")
tmp_install_path = lib_path + "/tmp_install"
caffe2_build_dir = os.path.join(cwd, "build")
# lib/pythonx.x/site-packages
rel_site_packages = distutils.sysconfig.get_python_lib(prefix='')
# full absolute path to the dir above
full_site_packages = distutils.sysconfig.get_python_lib()
@ -465,6 +468,15 @@ class develop(setuptools.command.develop.develop):
setuptools.command.develop.develop.run(self)
self.create_compile_commands()
# Copy Caffe2's Python proto files (generated during the build with the
# protobuf python compiler) from the build folder to the root folder
# cp root/build/caffe2/proto/proto.py root/caffe2/proto/proto.py
for src in glob.glob(
os.path.join(caffe2_build_dir, 'caffe2', 'proto', '*.py')):
dst = os.path.join(
cwd, os.path.relpath(src, caffe2_build_dir))
self.copy_file(src, dst)
def create_compile_commands(self):
def load(filename):
with open(filename) as f:
@ -593,39 +605,40 @@ class build_ext(build_ext_parent):
# platform dependent build folder created by the "build" command of
# setuptools. Only the contents of this folder are installed in the
# "install" command by default.
if FULL_CAFFE2:
# We only make this copy for Caffe2's pybind extensions
caffe2_pybind_exts = [
'caffe2.python.caffe2_pybind11_state',
'caffe2.python.caffe2_pybind11_state_gpu',
'caffe2.python.caffe2_pybind11_state_hip',
]
i = 0
while i < len(self.extensions):
ext = self.extensions[i]
if ext.name not in caffe2_pybind_exts:
i += 1
continue
fullname = self.get_ext_fullname(ext.name)
filename = self.get_ext_filename(fullname)
# We only make this copy for Caffe2's pybind extensions
caffe2_pybind_exts = [
'caffe2.python.caffe2_pybind11_state',
'caffe2.python.caffe2_pybind11_state_gpu',
'caffe2.python.caffe2_pybind11_state_hip',
]
i = 0
while i < len(self.extensions):
ext = self.extensions[i]
if ext.name not in caffe2_pybind_exts:
i += 1
continue
fullname = self.get_ext_fullname(ext.name)
filename = self.get_ext_filename(fullname)
print("\nCopying extension {}".format(ext.name))
src = os.path.join(tmp_install_path, rel_site_packages, filename)
if not os.path.exists(src):
print("{} does not exist".format(src))
del self.extensions[i]
else:
dst = os.path.join(os.path.realpath(self.build_lib), filename)
dst_dir = os.path.dirname(dst)
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
self.copy_file(src, dst)
i += 1
src = os.path.join(tmp_install_path, rel_site_packages, filename)
if not os.path.exists(src):
print("{} does not exist".format(src))
del self.extensions[i]
else:
dst = os.path.join(os.path.realpath(self.build_lib), filename)
print("Copying {} from {} to {}".format(ext.name, src, dst))
dst_dir = os.path.dirname(dst)
if not os.path.exists(dst_dir):
os.makedirs(dst_dir)
self.copy_file(src, dst)
i += 1
distutils.command.build_ext.build_ext.build_extensions(self)
def get_outputs(self):
outputs = distutils.command.build_ext.build_ext.get_outputs(self)
if FULL_CAFFE2:
outputs.append(os.path.join(self.build_lib, "caffe2"))
outputs.append(os.path.join(self.build_lib, "caffe2"))
print("setup.py::get_outputs returning {}".format(outputs))
return outputs
@ -1008,10 +1021,7 @@ def make_relative_rpath(path):
################################################################################
extensions = []
if FULL_CAFFE2:
packages = find_packages(exclude=('tools', 'tools.*'))
else:
packages = find_packages(exclude=('tools', 'tools.*', 'caffe2', 'caffe2.*'))
packages = find_packages(exclude=('tools', 'tools.*'))
C = Extension("torch._C",
libraries=main_libraries,
sources=main_sources,
@ -1055,19 +1065,18 @@ if USE_CUDA:
)
extensions.append(THNVRTC)
if FULL_CAFFE2:
# If building Caffe2 python as well, these extensions are built by cmake
# copied manually in build_extensions() inside the build_ext implementaiton
extensions.append(
setuptools.Extension(
name=str('caffe2.python.caffe2_pybind11_state'),
sources=[]),
)
extensions.append(
setuptools.Extension(
name=str('caffe2.python.caffe2_pybind11_state_gpu'),
sources=[]),
)
# These extensions are built by cmake and copied manually in build_extensions()
# inside the build_ext implementaiton
extensions.append(
setuptools.Extension(
name=str('caffe2.python.caffe2_pybind11_state'),
sources=[]),
)
extensions.append(
setuptools.Extension(
name=str('caffe2.python.caffe2_pybind11_state_gpu'),
sources=[]),
)
cmdclass = {
'create_version_file': create_version_file,
@ -1083,14 +1092,12 @@ cmdclass = {
}
cmdclass.update(build_dep_cmds)
entry_points = {}
if FULL_CAFFE2:
entry_points = {
'console_scripts': [
'convert-caffe2-to-onnx = caffe2.python.onnx.bin.conversion:caffe2_to_onnx',
'convert-onnx-to-caffe2 = caffe2.python.onnx.bin.conversion:onnx_to_caffe2',
]
}
entry_points = {
'console_scripts': [
'convert-caffe2-to-onnx = caffe2.python.onnx.bin.conversion:caffe2_to_onnx',
'convert-onnx-to-caffe2 = caffe2.python.onnx.bin.conversion:onnx_to_caffe2',
]
}
if __name__ == '__main__':
setup(
@ -1133,6 +1140,9 @@ if __name__ == '__main__':
'lib/include/torch/csrc/utils/*.h',
'lib/include/torch/csrc/cuda/*.h',
'lib/include/torch/torch.h',
],
'caffe2': [
rel_site_packages + '/caffe2/**/*.py'
]
},
)

View File

@ -17,6 +17,7 @@ USE_NNPACK=0
USE_MKLDNN=0
USE_GLOO_IBVERBS=0
FULL_CAFFE2=0
CAFFE2_STATIC_LINK_CUDA=0
while [[ $# -gt 0 ]]; do
case "$1" in
--use-cuda)
@ -170,6 +171,7 @@ function build() {
-DTH_LIB_PATH="$INSTALL_DIR/lib" \
-DTH_LIBRARIES="$INSTALL_DIR/lib/libTH$LD_POSTFIX" \
-DCAFFE2_LIBRARIES="$INSTALL_DIR/lib/libcaffe2$LD_POSTFIX" \
-DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \
-DTHNN_LIBRARIES="$INSTALL_DIR/lib/libTHNN$LD_POSTFIX" \
-DTHCUNN_LIBRARIES="$INSTALL_DIR/lib/libTHCUNN$LD_POSTFIX" \
-DTHS_LIBRARIES="$INSTALL_DIR/lib/libTHS$LD_POSTFIX" \
@ -238,6 +240,9 @@ function build_nccl() {
# detected them (to ensure that we have a consistent view between the
# PyTorch and Caffe2 builds.)
function build_caffe2() {
# pwd is pytorch_root/build
# TODO change these to CMAKE_ARGS for consistency
if [[ -z $EXTRA_CAFFE2_CMAKE_FLAGS ]]; then
EXTRA_CAFFE2_CMAKE_FLAGS=()
fi
@ -255,7 +260,7 @@ function build_caffe2() {
-DBUILDING_WITH_TORCH_LIBS=ON \
-DCMAKE_BUILD_TYPE=$BUILD_TYPE \
-DBUILD_TORCH=$BUILD_TORCH \
-DBUILD_PYTHON=$FULL_CAFFE2 \
-DBUILD_PYTHON=ON \
-DBUILD_SHARED_LIBS=$BUILD_SHARED_LIBS \
-DBUILD_BINARY=$FULL_CAFFE2 \
-DBUILD_TEST=$FULL_CAFFE2 \
@ -286,21 +291,15 @@ function build_caffe2() {
# This is needed by the aten tests built with caffe2
if [ -f "${INSTALL_DIR}/lib/libnccl.so" ] && [ ! -f "lib/libnccl.so.1" ]; then
# cp root/torch/lib/tmp_install/libnccl root/build/lib/libnccl
cp "${INSTALL_DIR}/lib/libnccl.so.1" "lib/libnccl.so.1"
fi
${CMAKE_INSTALL} -j"$MAX_JOBS"
# Install Python proto files
if [[ $FULL_CAFFE2 -ne 0 ]]; then
find . -name proto
for proto_file in ./caffe2/proto/*.py; do
cp $proto_file "../caffe2/proto/"
done
fi
# Fix rpaths of shared libraries
if [[ $(uname) == 'Darwin' ]]; then
# root/torch/lib/tmp_install/lib
pushd "$INSTALL_DIR/lib"
for lib in *.dylib; do
echo "Updating install_name for $lib"