Fix extension test on Windows (#5548)

* Change cpp_extensions.py to make it work on Windows

* Fix linting

* Show python paths

* Debug

* Debug 1

* set PYTHONPATH

* Add ATen into library

* expose essential libs and functions, and copy _C.lib

* Specify dir in header

* Update check_abi for MSVC

* Activate cl environment to compile cpp extensions

* change version string

* Redirect stderr to stdout

* Add monkey patch for windows

* Remove unnecessary self

* Fix various issues

* Append necessary flags

* add /MD flag to cuda

* Install ninja

* Use THP_API instead of THP_CLASS

* Beautify the paths

* Revert "Use THP_API instead of THP_CLASS"

This reverts commit dd7e74c44db48e4c5f85bb8e3c698ff9de71ba2d.

* Use THP_API instead of THP_CLASS(new)
This commit is contained in:
peterjc123
2018-04-03 01:53:25 +08:00
committed by Edward Z. Yang
parent 605307f8f3
commit 63af898d46
9 changed files with 240 additions and 35 deletions

1
.gitignore vendored
View File

@ -40,6 +40,7 @@ test/.coverage
*/*.so*
*/**/*.so*
*/**/*.dylib*
*/**/*.pyd
test/data/legacy_serialized.pt
test/data/linear.pt
.mypy_cache

View File

@ -60,6 +60,10 @@ curl https://repo.continuum.io/miniconda/Miniconda3-latest-Windows-x86_64.exe -O
call C:\\Jenkins\\Miniconda3\\Scripts\\activate.bat C:\\Jenkins\\Miniconda3
call conda install -y -q numpy mkl cffi pyyaml boto3
pip install ninja
call "C:\\Program Files (x86)\\Microsoft Visual Studio\\2017\\Community\\VC\\Auxiliary\\Build\\vcvarsall.bat" x86_amd64
set PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\bin;C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\libnvvp;%PATH%
set CUDA_PATH=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
set CUDA_PATH_V9_0=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
@ -67,6 +71,7 @@ set NVTOOLSEXT_PATH=C:\\Program Files\\NVIDIA Corporation\\NvToolsExt
set CUDNN_LIB_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0\\lib\\x64
set CUDA_TOOLKIT_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
set CUDNN_ROOT_DIR=C:\\Program Files\\NVIDIA GPU Computing Toolkit\\CUDA\\v9.0
set PYTHONPATH=%CD%\\test;%PYTHONPATH%
cd test/

View File

@ -443,6 +443,23 @@ class build_ext(build_ext_parent):
# It's an old-style class in Python 2.7...
setuptools.command.build_ext.build_ext.run(self)
# Copy the essential export library to compile C++ extensions.
if IS_WINDOWS:
build_temp = self.build_temp
ext_filename = self.get_ext_filename('_C')
lib_filename = '.'.join(ext_filename.split('.')[:-1]) + '.lib'
export_lib = os.path.join(
build_temp, 'torch', 'csrc', lib_filename).replace('\\', '/')
build_lib = self.build_lib
target_lib = os.path.join(
build_lib, 'torch', 'lib', '_C.lib').replace('\\', '/')
self.copy_file(export_lib, target_lib)
class build(distutils.command.build.build):
sub_commands = [

View File

@ -32,7 +32,6 @@ TESTS = [
]
WINDOWS_BLACKLIST = [
'cpp_extensions',
'distributed',
]
@ -79,12 +78,28 @@ def test_cpp_extensions(python, test_module, test_directory, options):
python_path = os.environ.get('PYTHONPATH', '')
try:
cpp_extensions = os.path.join(test_directory, 'cpp_extensions')
install_directory = get_shell_output(
"find {}/install -name *-packages".format(cpp_extensions))
if sys.platform == 'win32':
install_directory = os.path.join(cpp_extensions, 'install')
install_directories = get_shell_output(
"where -r \"{}\" *.pyd".format(install_directory)).split('\r\n')
assert install_directories, 'install_directory must not be empty'
if len(install_directories) >= 1:
install_directory = install_directories[0]
install_directory = os.path.dirname(install_directory)
split_char = ';'
else:
install_directory = get_shell_output(
"find {}/install -name *-packages".format(cpp_extensions))
split_char = ':'
assert install_directory, 'install_directory must not be empty'
install_directory = os.path.join(test_directory, install_directory)
os.environ['PYTHONPATH'] = '{}:{}'.format(install_directory,
python_path)
os.environ['PYTHONPATH'] = '{}{}{}'.format(install_directory,
split_char,
python_path)
return run_test(python, test_module, test_directory, options)
finally:
os.environ['PYTHONPATH'] = python_path

View File

@ -1,16 +1,22 @@
#ifndef THP_EXPORT_H
#define THP_EXPORT_H
#ifdef __cplusplus
# define THP_EXTERNC extern "C"
#else
# define THP_EXTERNC extern
#endif
#ifdef _WIN32
# ifdef _THP_CORE
# define THP_API extern "C" __declspec(dllexport)
# define THP_API THP_EXTERNC __declspec(dllexport)
# define THP_CLASS __declspec(dllexport)
# else
# define THP_API extern "C" __declspec(dllimport)
# define THP_API THP_EXTERNC __declspec(dllimport)
# define THP_CLASS __declspec(dllimport)
# endif
#else
# define THP_API extern "C"
# define THP_API THP_EXTERNC
# define THP_CLASS
#endif

View File

@ -2,6 +2,7 @@
#include <exception>
#include <string>
#include "THP_export.h"
namespace torch {
@ -12,7 +13,7 @@ struct assert_error final : public std::exception {
};
[[noreturn]]
void barf(const char *fmt, ...);
THP_CLASS void barf(const char *fmt, ...);
} // namespace torch

View File

@ -20,7 +20,7 @@ struct THPVariable {
THP_API PyObject *THPVariableClass;
bool THPVariable_initModule(PyObject *module);
PyObject * THPVariable_Wrap(torch::autograd::Variable var);
THP_API PyObject * THPVariable_Wrap(torch::autograd::Variable var);
inline bool THPVariable_Check(PyObject *obj)
{

View File

@ -4,6 +4,7 @@
#include <ATen/ATen.h>
#include <pybind11/pybind11.h>
#include <torch/csrc/THP_export.h>
#include <torch/csrc/utils/pybind.h>
namespace torch {
@ -13,19 +14,19 @@ namespace torch {
/// Returns a `Type` object for the given backend (e.g. `at::kCPU`) and
/// `ScalarType` (e.g. `at::kDouble`).
at::Type& getType(at::Backend backend, at::ScalarType type);
THP_CLASS at::Type& getType(at::Backend backend, at::ScalarType type);
/// Returns a `Type` object for the CPU backend and the given `ScalarType`
/// (e.g. `at::kDouble`). Equivalent to `getType(kCPU, type)`.
at::Type& CPU(at::ScalarType type);
THP_CLASS at::Type& CPU(at::ScalarType type);
/// Returns a `Type` object for the CUDA backend and the given `ScalarType`
/// (e.g. `at::kDouble`). Equivalent to `getType(kCUDA, type)`.
at::Type& CUDA(at::ScalarType type);
THP_CLASS at::Type& CUDA(at::ScalarType type);
/// Sets the `requires_grad` property of the given `Tensor`.
void set_requires_grad(at::Tensor& tensor, bool requires_grad) noexcept;
THP_CLASS void set_requires_grad(at::Tensor& tensor, bool requires_grad) noexcept;
/// Returns the `requires_grad` of the given `Tensor`.
bool requires_grad(const at::Tensor& tensor) noexcept;
THP_CLASS bool requires_grad(const at::Tensor& tensor) noexcept;
} // namespace torch

View File

@ -30,7 +30,7 @@ def _find_cuda_home():
# Guess #3
try:
which = 'where' if sys.platform == 'win32' else 'which'
nvcc = subprocess.check_output([which, 'nvcc']).decode()
nvcc = subprocess.check_output([which, 'nvcc']).decode().rstrip('\r\n')
cuda_home = os.path.dirname(os.path.dirname(nvcc))
except Exception:
cuda_home = None
@ -38,6 +38,7 @@ def _find_cuda_home():
MINIMUM_GCC_VERSION = (4, 9)
MINIMUM_MSVC_VERSION = (19, 0, 24215)
ABI_INCOMPATIBILITY_WARNING = '''
Your compiler ({}) may be ABI-incompatible with PyTorch.
Please use a compiler that is ABI-compatible with GCC 4.9 and above.
@ -58,7 +59,8 @@ def check_compiler_abi_compatibility(compiler):
else True.
'''
try:
info = subprocess.check_output('{} --version'.format(compiler).split())
check_cmd = '{}' if sys.platform == 'win32' else '{} --version'
info = subprocess.check_output(check_cmd.format(compiler).split(), stderr=subprocess.STDOUT)
except Exception:
_, error, _ = sys.exc_info()
warnings.warn('Error checking compiler version: {}'.format(error))
@ -75,6 +77,16 @@ def check_compiler_abi_compatibility(compiler):
else:
# Append the detected version for the warning.
compiler = '{} {}'.format(compiler, version.group(0))
elif 'Microsoft' in info:
info = info.decode().lower()
version = re.search(r'(\d+)\.(\d+)\.(\d+)', info)
if version is not None:
major, minor, revision = version.groups()
if (int(major), int(minor), int(revision)) >= MINIMUM_MSVC_VERSION:
return True
else:
# Append the detected version for the warning.
compiler = '{} {}'.format(compiler, version.group(0))
warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
return False
@ -103,9 +115,14 @@ class BuildExtension(build_ext):
# Register .cu and .cuh as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cuh']
# Save the original _compile method for later.
original_compile = self.compiler._compile
if self.compiler.compiler_type == 'msvc':
self.compiler._cpp_extensions += ['.cu', '.cuh']
original_compile = self.compiler.compile
original_spawn = self.compiler.spawn
else:
original_compile = self.compiler._compile
def wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
def unix_wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
# Copy before we make any modifications.
cflags = copy.deepcopy(extra_postargs)
try:
@ -128,8 +145,64 @@ class BuildExtension(build_ext):
# Put the original compiler back in place.
self.compiler.set_executable('compiler_so', original_compiler)
def win_wrap_compile(sources, output_dir=None, macros=None,
include_dirs=None, debug=0, extra_preargs=None,
extra_postargs=None, depends=None):
self.cflags = copy.deepcopy(extra_postargs)
extra_postargs = None
def spawn(cmd):
orig_cmd = cmd
# Using regex to match src, obj and include files
src_regex = re.compile('/T(p|c)(.*)')
src_list = [m.group(2) for m in (
src_regex.match(elem) for elem in cmd) if m]
obj_regex = re.compile('/Fo(.*)')
obj_list = [m.group(1) for m in (
obj_regex.match(elem) for elem in cmd) if m]
include_regex = re.compile(r'((\-|\/)I.*)')
include_list = [m.group(1) for m in (
include_regex.match(elem) for elem in cmd) if m]
if len(src_list) >= 1 and len(obj_list) >= 1:
src = src_list[0]
obj = obj_list[0]
if _is_cuda_file(src):
nvcc = _join_cuda_home('bin', 'nvcc')
if isinstance(self.cflags, dict):
cflags = self.cflags['nvcc']
elif isinstance(self.cflags, list):
cflags = self.cflags
else:
cflags = []
cmd = [nvcc, '-c', src, '-o', obj, '-Xcompiler',
'/wd4819', '-Xcompiler', '/MD'] + include_list + cflags
elif isinstance(self.cflags, dict):
cflags = self.cflags['cxx']
cmd += cflags
elif isinstance(self.cflags, list):
cflags = self.cflags
cmd += cflags
return original_spawn(cmd)
try:
self.compiler.spawn = spawn
return original_compile(sources,
output_dir, macros, include_dirs, debug,
extra_preargs, extra_postargs, depends)
finally:
self.compiler.spawn = original_spawn
# Monkey-patch the _compile method.
self.compiler._compile = wrap_compile
if self.compiler.compiler_type == 'msvc':
self.compiler.compile = win_wrap_compile
else:
self.compiler._compile = unix_wrap_compile
build_ext.build_extensions(self)
@ -137,6 +210,8 @@ class BuildExtension(build_ext):
# On some platforms, like Windows, compiler_cxx is not available.
if hasattr(self.compiler, 'compiler_cxx'):
compiler = self.compiler.compiler_cxx[0]
elif sys.platform == 'win32':
compiler = os.environ.get('CXX', 'cl')
else:
compiler = os.environ.get('CXX', 'c++')
check_compiler_abi_compatibility(compiler)
@ -178,6 +253,17 @@ def CppExtension(name, sources, *args, **kwargs):
include_dirs = kwargs.get('include_dirs', [])
include_dirs += include_paths()
kwargs['include_dirs'] = include_dirs
if sys.platform == 'win32':
library_dirs = kwargs.get('library_dirs', [])
library_dirs += library_paths()
kwargs['library_dirs'] = library_dirs
libraries = kwargs.get('libraries', [])
libraries.append('ATen')
libraries.append('_C')
kwargs['libraries'] = libraries
kwargs['language'] = 'c++'
return setuptools.Extension(name, sources, *args, **kwargs)
@ -211,11 +297,14 @@ def CUDAExtension(name, sources, *args, **kwargs):
})
'''
library_dirs = kwargs.get('library_dirs', [])
library_dirs.append(_join_cuda_home('lib64'))
library_dirs += library_paths(cuda=True)
kwargs['library_dirs'] = library_dirs
libraries = kwargs.get('libraries', [])
libraries.append('cudart')
if sys.platform == 'win32':
libraries.append('ATen')
libraries.append('_C')
kwargs['libraries'] = libraries
include_dirs = kwargs.get('include_dirs', [])
@ -252,6 +341,31 @@ def include_paths(cuda=False):
return paths
def library_paths(cuda=False):
'''
Get the library paths required to build a C++ or CUDA extension.
Args:
cuda: If `True`, includes CUDA-specific library paths.
Returns:
A list of library path strings.
'''
paths = []
if sys.platform == 'win32':
here = os.path.abspath(__file__)
torch_path = os.path.dirname(os.path.dirname(here))
lib_path = os.path.join(torch_path, 'lib')
paths.append(lib_path)
if cuda:
lib_dir = 'lib/x64' if sys.platform == 'win32' else 'lib64'
paths.append(_join_cuda_home(lib_dir))
return paths
def load(name,
sources,
extra_cflags=None,
@ -329,13 +443,30 @@ def load(name,
if build_directory is None:
build_directory = _get_build_directory(name, verbose)
extra_ldflags = extra_ldflags or []
if sys.platform == 'win32':
python_path = os.path.dirname(sys.executable)
python_lib_path = os.path.join(python_path, 'libs')
here = os.path.abspath(__file__)
torch_path = os.path.dirname(os.path.dirname(here))
lib_path = os.path.join(torch_path, 'lib')
extra_ldflags.append('ATen.lib')
extra_ldflags.append('_C.lib')
extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path))
extra_ldflags.append('/LIBPATH:{}'.format(lib_path))
with_cuda = any(map(_is_cuda_file, sources))
if with_cuda:
if verbose:
print('Detected CUDA files, patching ldflags')
extra_ldflags = extra_ldflags or []
extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64')))
extra_ldflags.append('-lcudart')
if sys.platform == 'win32':
extra_ldflags.append('/LIBPATH:{}'.format(_join_cuda_home('lib/x64')))
extra_ldflags.append('cudart.lib')
else:
extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64')))
extra_ldflags.append('-lcudart')
build_file_path = os.path.join(build_directory, 'build.ninja')
if verbose:
@ -442,28 +573,45 @@ def _write_ninja_file(path,
common_cflags += ['-I{}'.format(include) for include in includes]
cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
if sys.platform == 'win32':
from distutils.spawn import _nt_quote_args
cflags = _nt_quote_args(cflags)
flags = ['cflags = {}'.format(' '.join(cflags))]
if with_cuda:
cuda_flags = common_cflags
cuda_flags += ['--compiler-options', "'-fPIC'"]
cuda_flags += extra_cuda_cflags
if not any(flag.startswith('-std=') for flag in cuda_flags):
cuda_flags.append('-std=c++11')
if sys.platform == 'win32':
cuda_flags = _nt_quote_args(cuda_flags)
else:
cuda_flags += ['--compiler-options', "'-fPIC'"]
cuda_flags += extra_cuda_cflags
if not any(flag.startswith('-std=') for flag in cuda_flags):
cuda_flags.append('-std=c++11')
flags.append('cuda_flags = {}'.format(' '.join(cuda_flags)))
ldflags = ['-shared'] + extra_ldflags
if sys.platform == 'win32':
ldflags = ['/DLL'] + extra_ldflags
else:
ldflags = ['-shared'] + extra_ldflags
# The darwin linker needs explicit consent to ignore unresolved symbols.
if sys.platform == 'darwin':
ldflags.append('-undefined dynamic_lookup')
elif sys.platform == 'win32':
ldflags = _nt_quote_args(ldflags)
flags.append('ldflags = {}'.format(' '.join(ldflags)))
# See https://ninja-build.org/build.ninja.html for reference.
compile_rule = ['rule compile']
compile_rule.append(
' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out')
compile_rule.append(' depfile = $out.d')
compile_rule.append(' deps = gcc')
if sys.platform == 'win32':
compile_rule.append(
' command = cl /showIncludes $cflags -c $in /Fo$out')
compile_rule.append(' deps = msvc')
else:
compile_rule.append(
' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out')
compile_rule.append(' depfile = $out.d')
compile_rule.append(' deps = gcc')
if with_cuda:
cuda_compile_rule = ['rule cuda_compile']
@ -471,7 +619,15 @@ def _write_ninja_file(path,
' command = $nvcc $cuda_flags -c $in -o $out')
link_rule = ['rule link']
link_rule.append(' command = $cxx $ldflags $in -o $out')
if sys.platform == 'win32':
cl_paths = subprocess.check_output(['where', 'cl']).decode().split('\r\n')
if len(cl_paths) >= 1:
cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:')
else:
raise RuntimeError("MSVC is required to load C++ extensions")
link_rule.append(' command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format(cl_path))
else:
link_rule.append(' command = $cxx $ldflags $in -o $out')
# Emit one build rule per source to enable incremental build.
object_files = []
@ -488,9 +644,12 @@ def _write_ninja_file(path,
rule = 'compile'
target = '{}.o'.format(file_name)
object_files.append(target)
if sys.platform == 'win32':
source_file = source_file.replace(':', '$:')
build.append('build {}: {} {}'.format(target, rule, source_file))
library_target = '{}.so'.format(name)
ext = '.pyd' if sys.platform == 'win32' else '.so'
library_target = '{}{}'.format(name, ext)
link = ['build {}: link {}'.format(library_target, ' '.join(object_files))]
default = ['default {}'.format(library_target)]