Files
pytorch/torch/utils/cpp_extension.py
peterjc123 63af898d46 Fix extension test on Windows (#5548)
* Change cpp_extensions.py to make it work on Windows

* Fix linting

* Show python paths

* Debug

* Debug 1

* set PYTHONPATH

* Add ATen into library

* expose essential libs and functions, and copy _C.lib

* Specify dir in header

* Update check_abi for MSVC

* Activate cl environment to compile cpp extensions

* change version string

* Redirect stderr to stdout

* Add monkey patch for windows

* Remove unnecessary self

* Fix various issues

* Append necessary flags

* add /MD flag to cuda

* Install ninja

* Use THP_API instead of THP_CLASS

* Beautify the paths

* Revert "Use THP_API instead of THP_CLASS"

This reverts commit dd7e74c44db48e4c5f85bb8e3c698ff9de71ba2d.

* Use THP_API instead of THP_CLASS(new)
2018-04-02 13:53:25 -04:00

683 lines
26 KiB
Python

import copy
import glob
import imp
import os
import re
import setuptools
import subprocess
import sys
import sysconfig
import tempfile
import warnings
import torch
from setuptools.command.build_ext import build_ext
def _find_cuda_home():
'''Finds the CUDA install path.'''
# Guess #1
cuda_home = os.environ.get('CUDA_HOME') or os.environ.get('CUDA_PATH')
if cuda_home is None:
# Guess #2
if sys.platform == 'win32':
cuda_home = glob.glob(
'C:/Program Files/NVIDIA GPU Computing Toolkit/CUDA/v*.*')
else:
cuda_home = '/usr/local/cuda'
if not os.path.exists(cuda_home):
# Guess #3
try:
which = 'where' if sys.platform == 'win32' else 'which'
nvcc = subprocess.check_output([which, 'nvcc']).decode().rstrip('\r\n')
cuda_home = os.path.dirname(os.path.dirname(nvcc))
except Exception:
cuda_home = None
return cuda_home
MINIMUM_GCC_VERSION = (4, 9)
MINIMUM_MSVC_VERSION = (19, 0, 24215)
ABI_INCOMPATIBILITY_WARNING = '''
Your compiler ({}) may be ABI-incompatible with PyTorch.
Please use a compiler that is ABI-compatible with GCC 4.9 and above.
See https://gcc.gnu.org/onlinedocs/libstdc++/manual/abi.html.'''
CUDA_HOME = _find_cuda_home() if torch.cuda.is_available() else None
def check_compiler_abi_compatibility(compiler):
'''
Verifies that the given compiler is ABI-compatible with PyTorch.
Arguments:
compiler (str): The compiler executable name to check (e.g. ``g++``).
Must be executable in a shell process.
Returns:
False if the compiler is (likely) ABI-incompatible with PyTorch,
else True.
'''
try:
check_cmd = '{}' if sys.platform == 'win32' else '{} --version'
info = subprocess.check_output(check_cmd.format(compiler).split(), stderr=subprocess.STDOUT)
except Exception:
_, error, _ = sys.exc_info()
warnings.warn('Error checking compiler version: {}'.format(error))
else:
info = info.decode().lower()
if 'gcc' in info:
# Sometimes the version is given as "major.x" instead of semver.
version = re.search(r'(\d+)\.(\d+|x)', info)
if version is not None:
major, minor = version.groups()
minor = 0 if minor == 'x' else int(minor)
if (int(major), minor) >= MINIMUM_GCC_VERSION:
return True
else:
# Append the detected version for the warning.
compiler = '{} {}'.format(compiler, version.group(0))
elif 'Microsoft' in info:
info = info.decode().lower()
version = re.search(r'(\d+)\.(\d+)\.(\d+)', info)
if version is not None:
major, minor, revision = version.groups()
if (int(major), int(minor), int(revision)) >= MINIMUM_MSVC_VERSION:
return True
else:
# Append the detected version for the warning.
compiler = '{} {}'.format(compiler, version.group(0))
warnings.warn(ABI_INCOMPATIBILITY_WARNING.format(compiler))
return False
class BuildExtension(build_ext):
'''
A custom :mod:`setuptools` build extension .
This :class:`setuptools.build_ext` subclass takes care of passing the
minimum required compiler flags (e.g. ``-std=c++11``) as well as mixed
C++/CUDA compilation (and support for CUDA files in general).
When using :class:`BuildExtension`, it is allowed to supply a dictionary
for ``extra_compile_args`` (rather than the usual list) that maps from
languages (``cxx`` or ``cuda``) to a list of additional compiler flags to
supply to the compiler. This makes it possible to supply different flags to
the C++ and CUDA compiler during mixed compilation.
'''
def build_extensions(self):
self._check_abi()
for extension in self.extensions:
self._define_torch_extension_name(extension)
# Register .cu and .cuh as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cuh']
# Save the original _compile method for later.
if self.compiler.compiler_type == 'msvc':
self.compiler._cpp_extensions += ['.cu', '.cuh']
original_compile = self.compiler.compile
original_spawn = self.compiler.spawn
else:
original_compile = self.compiler._compile
def unix_wrap_compile(obj, src, ext, cc_args, extra_postargs, pp_opts):
# Copy before we make any modifications.
cflags = copy.deepcopy(extra_postargs)
try:
original_compiler = self.compiler.compiler_so
if _is_cuda_file(src):
nvcc = _join_cuda_home('bin', 'nvcc')
self.compiler.set_executable('compiler_so', nvcc)
if isinstance(cflags, dict):
cflags = cflags['nvcc']
cflags += ['--compiler-options', "'-fPIC'"]
elif isinstance(cflags, dict):
cflags = cflags['cxx']
# NVCC does not allow multiple -std to be passed, so we avoid
# overriding the option if the user explicitly passed it.
if not any(flag.startswith('-std=') for flag in cflags):
cflags.append('-std=c++11')
original_compile(obj, src, ext, cc_args, cflags, pp_opts)
finally:
# Put the original compiler back in place.
self.compiler.set_executable('compiler_so', original_compiler)
def win_wrap_compile(sources, output_dir=None, macros=None,
include_dirs=None, debug=0, extra_preargs=None,
extra_postargs=None, depends=None):
self.cflags = copy.deepcopy(extra_postargs)
extra_postargs = None
def spawn(cmd):
orig_cmd = cmd
# Using regex to match src, obj and include files
src_regex = re.compile('/T(p|c)(.*)')
src_list = [m.group(2) for m in (
src_regex.match(elem) for elem in cmd) if m]
obj_regex = re.compile('/Fo(.*)')
obj_list = [m.group(1) for m in (
obj_regex.match(elem) for elem in cmd) if m]
include_regex = re.compile(r'((\-|\/)I.*)')
include_list = [m.group(1) for m in (
include_regex.match(elem) for elem in cmd) if m]
if len(src_list) >= 1 and len(obj_list) >= 1:
src = src_list[0]
obj = obj_list[0]
if _is_cuda_file(src):
nvcc = _join_cuda_home('bin', 'nvcc')
if isinstance(self.cflags, dict):
cflags = self.cflags['nvcc']
elif isinstance(self.cflags, list):
cflags = self.cflags
else:
cflags = []
cmd = [nvcc, '-c', src, '-o', obj, '-Xcompiler',
'/wd4819', '-Xcompiler', '/MD'] + include_list + cflags
elif isinstance(self.cflags, dict):
cflags = self.cflags['cxx']
cmd += cflags
elif isinstance(self.cflags, list):
cflags = self.cflags
cmd += cflags
return original_spawn(cmd)
try:
self.compiler.spawn = spawn
return original_compile(sources,
output_dir, macros, include_dirs, debug,
extra_preargs, extra_postargs, depends)
finally:
self.compiler.spawn = original_spawn
# Monkey-patch the _compile method.
if self.compiler.compiler_type == 'msvc':
self.compiler.compile = win_wrap_compile
else:
self.compiler._compile = unix_wrap_compile
build_ext.build_extensions(self)
def _check_abi(self):
# On some platforms, like Windows, compiler_cxx is not available.
if hasattr(self.compiler, 'compiler_cxx'):
compiler = self.compiler.compiler_cxx[0]
elif sys.platform == 'win32':
compiler = os.environ.get('CXX', 'cl')
else:
compiler = os.environ.get('CXX', 'c++')
check_compiler_abi_compatibility(compiler)
def _define_torch_extension_name(self, extension):
define = '-DTORCH_EXTENSION_NAME={}'.format(extension.name)
if isinstance(extension.extra_compile_args, dict):
for args in extension.extra_compile_args.values():
args.append(define)
else:
extension.extra_compile_args.append(define)
def CppExtension(name, sources, *args, **kwargs):
'''
Creates a :class:`setuptools.Extension` for C++.
Convenience method that creates a :class:`setuptools.Extension` with the
bare minimum (but often sufficient) arguments to build a C++ extension.
All arguments are forwarded to the :class:`setuptools.Extension`
constructor.
Example:
>>> from setuptools import setup
>>> from torch.utils.cpp_extension import BuildExtension, CppExtension
>>> setup(
name='extension',
ext_modules=[
CppExtension(
name='extension',
sources=['extension.cpp'],
extra_compile_args=['-g'])),
],
cmdclass={
'build_ext': BuildExtension
})
'''
include_dirs = kwargs.get('include_dirs', [])
include_dirs += include_paths()
kwargs['include_dirs'] = include_dirs
if sys.platform == 'win32':
library_dirs = kwargs.get('library_dirs', [])
library_dirs += library_paths()
kwargs['library_dirs'] = library_dirs
libraries = kwargs.get('libraries', [])
libraries.append('ATen')
libraries.append('_C')
kwargs['libraries'] = libraries
kwargs['language'] = 'c++'
return setuptools.Extension(name, sources, *args, **kwargs)
def CUDAExtension(name, sources, *args, **kwargs):
'''
Creates a :class:`setuptools.Extension` for CUDA/C++.
Convenience method that creates a :class:`setuptools.Extension` with the
bare minimum (but often sufficient) arguments to build a CUDA/C++
extension. This includes the CUDA include path, library path and runtime
library.
All arguments are forwarded to the :class:`setuptools.Extension`
constructor.
Example:
>>> from setuptools import setup
>>> from torch.utils.cpp_extension import BuildExtension, CppExtension
>>> setup(
name='cuda_extension',
ext_modules=[
CUDAExtension(
name='cuda_extension',
sources=['extension.cpp', 'extension_kernel.cu'],
extra_compile_args={'cxx': ['-g'],
'nvcc': ['-O2']})
],
cmdclass={
'build_ext': BuildExtension
})
'''
library_dirs = kwargs.get('library_dirs', [])
library_dirs += library_paths(cuda=True)
kwargs['library_dirs'] = library_dirs
libraries = kwargs.get('libraries', [])
libraries.append('cudart')
if sys.platform == 'win32':
libraries.append('ATen')
libraries.append('_C')
kwargs['libraries'] = libraries
include_dirs = kwargs.get('include_dirs', [])
include_dirs += include_paths(cuda=True)
kwargs['include_dirs'] = include_dirs
kwargs['language'] = 'c++'
return setuptools.Extension(name, sources, *args, **kwargs)
def include_paths(cuda=False):
'''
Get the include paths required to build a C++ or CUDA extension.
Args:
cuda: If `True`, includes CUDA-specific include paths.
Returns:
A list of include path strings.
'''
here = os.path.abspath(__file__)
torch_path = os.path.dirname(os.path.dirname(here))
lib_include = os.path.join(torch_path, 'lib', 'include')
# Some internal (old) Torch headers don't properly prefix their includes,
# so we need to pass -Itorch/lib/include/TH as well.
paths = [
lib_include,
os.path.join(lib_include, 'TH'),
os.path.join(lib_include, 'THC')
]
if cuda:
paths.append(_join_cuda_home('include'))
return paths
def library_paths(cuda=False):
'''
Get the library paths required to build a C++ or CUDA extension.
Args:
cuda: If `True`, includes CUDA-specific library paths.
Returns:
A list of library path strings.
'''
paths = []
if sys.platform == 'win32':
here = os.path.abspath(__file__)
torch_path = os.path.dirname(os.path.dirname(here))
lib_path = os.path.join(torch_path, 'lib')
paths.append(lib_path)
if cuda:
lib_dir = 'lib/x64' if sys.platform == 'win32' else 'lib64'
paths.append(_join_cuda_home(lib_dir))
return paths
def load(name,
sources,
extra_cflags=None,
extra_cuda_cflags=None,
extra_ldflags=None,
extra_include_paths=None,
build_directory=None,
verbose=False):
'''
Loads a PyTorch C++ extension just-in-time (JIT).
To load an extension, a Ninja build file is emitted, which is used to
compile the given sources into a dynamic library. This library is
subsequently loaded into the current Python process as a module and
returned from this function, ready for use.
By default, the directory to which the build file is emitted and the
resulting library compiled to is ``<tmp>/torch_extensions/<name>``, where
``<tmp>`` is the temporary folder on the current platform and ``<name>``
the name of the extension. This location can be overriden in two ways.
First, if the ``TORCH_EXTENSIONS_DIR`` environment variable is set, it
replaces ``<tmp>/torch_extensions`` and all extensions will be compiled
into subfolders of this directory. Second, if the ``build_directory``
argument to this function is supplied, it overrides the entire path, i.e.
the library will be compiled into that folder directly.
To compile the sources, the default system compiler (``c++``) is used,
which can be overriden by setting the ``CXX`` environment variable. To pass
additional arguments to the compilation process, ``extra_cflags`` or
``extra_ldflags`` can be provided. For example, to compile your extension
with optimizations, pass ``extra_cflags=['-O3']``. You can also use
``extra_cflags`` to pass further include directories.
CUDA support with mixed compilation is provided. Simply pass CUDA source
files (``.cu`` or ``.cuh``) along with other sources. Such files will be
detected and compiled with nvcc rather than the C++ compiler. This includes
passing the CUDA lib64 directory as a library directory, and linking
``cudart``. You can pass additional flags to nvcc via
``extra_cuda_cflags``, just like with ``extra_cflags`` for C++. Various
heuristics for finding the CUDA install directory are used, which usually
work fine. If not, setting the ``CUDA_HOME`` environment variable is the
safest option.
Args:
name: The name of the extension to build. This MUST be the same as the
name of the pybind11 module!
sources: A list of relative or absolute paths to C++ source files.
extra_cflags: optional list of compiler flags to forward to the build.
extra_cuda_cflags: optional list of compiler flags to forward to nvcc
when building CUDA sources.
extra_ldflags: optional list of linker flags to forward to the build.
extra_include_paths: optional list of include directories to forward
to the build.
build_directory: optional path to use as build workspace.
verbose: If ``True``, turns on verbose logging of load steps.
Returns:
The loaded PyTorch extension as a Python module.
Example:
>>> from torch.utils.cpp_extension import load
>>> module = load(
name='extension',
sources=['extension.cpp', 'extension_kernel.cu'],
extra_cflags=['-O2'],
verbose=True)
'''
verify_ninja_availability()
# Allows sources to be a single path or a list of paths.
if isinstance(sources, str):
sources = [sources]
if build_directory is None:
build_directory = _get_build_directory(name, verbose)
extra_ldflags = extra_ldflags or []
if sys.platform == 'win32':
python_path = os.path.dirname(sys.executable)
python_lib_path = os.path.join(python_path, 'libs')
here = os.path.abspath(__file__)
torch_path = os.path.dirname(os.path.dirname(here))
lib_path = os.path.join(torch_path, 'lib')
extra_ldflags.append('ATen.lib')
extra_ldflags.append('_C.lib')
extra_ldflags.append('/LIBPATH:{}'.format(python_lib_path))
extra_ldflags.append('/LIBPATH:{}'.format(lib_path))
with_cuda = any(map(_is_cuda_file, sources))
if with_cuda:
if verbose:
print('Detected CUDA files, patching ldflags')
if sys.platform == 'win32':
extra_ldflags.append('/LIBPATH:{}'.format(_join_cuda_home('lib/x64')))
extra_ldflags.append('cudart.lib')
else:
extra_ldflags.append('-L{}'.format(_join_cuda_home('lib64')))
extra_ldflags.append('-lcudart')
build_file_path = os.path.join(build_directory, 'build.ninja')
if verbose:
print('Emitting ninja build file {}...'.format(build_file_path))
# NOTE: Emitting a new ninja build file does not cause re-compilation if
# the sources did not change, so it's ok to re-emit (and it's fast).
_write_ninja_file(
path=build_file_path,
name=name,
sources=sources,
extra_cflags=extra_cflags or [],
extra_cuda_cflags=extra_cuda_cflags or [],
extra_ldflags=extra_ldflags or [],
extra_include_paths=extra_include_paths or [],
with_cuda=with_cuda)
if verbose:
print('Building extension module {}...'.format(name))
_build_extension_module(name, build_directory)
if verbose:
print('Loading extension module {}...'.format(name))
return _import_module_from_library(name, build_directory)
def verify_ninja_availability():
'''
Returns ``True`` if the `ninja <https://ninja-build.org/>`_ build system is
available on the system.
'''
with open(os.devnull, 'wb') as devnull:
try:
subprocess.check_call('ninja --version'.split(), stdout=devnull)
except OSError:
raise RuntimeError("Ninja is required to load C++ extensions")
def _get_build_directory(name, verbose):
root_extensions_directory = os.environ.get('TORCH_EXTENSIONS_DIR')
if root_extensions_directory is None:
# tempfile.gettempdir() will be /tmp on UNIX and \TEMP on Windows.
root_extensions_directory = os.path.join(tempfile.gettempdir(),
'torch_extensions')
if verbose:
print('Using {} as PyTorch extensions root...'.format(
root_extensions_directory))
build_directory = os.path.join(root_extensions_directory, name)
if not os.path.exists(build_directory):
if verbose:
print('Creating extension directory {}...'.format(build_directory))
# This is like mkdir -p, i.e. will also create parent directories.
os.makedirs(build_directory)
return build_directory
def _build_extension_module(name, build_directory):
try:
subprocess.check_output(
['ninja', '-v'], stderr=subprocess.STDOUT, cwd=build_directory)
except subprocess.CalledProcessError:
# Python 2 and 3 compatible way of getting the error object.
_, error, _ = sys.exc_info()
# error.output contains the stdout and stderr of the build attempt.
raise RuntimeError("Error building extension '{}': {}".format(
name, error.output.decode()))
def _import_module_from_library(module_name, path):
# https://stackoverflow.com/questions/67631/how-to-import-a-module-given-the-full-path
file, path, description = imp.find_module(module_name, [path])
# Close the .so file after load.
with file:
return imp.load_module(module_name, file, path, description)
def _write_ninja_file(path,
name,
sources,
extra_cflags,
extra_cuda_cflags,
extra_ldflags,
extra_include_paths,
with_cuda=False):
# Version 1.3 is required for the `deps` directive.
config = ['ninja_required_version = 1.3']
config.append('cxx = {}'.format(os.environ.get('CXX', 'c++')))
if with_cuda:
config.append('nvcc = {}'.format(_join_cuda_home('bin', 'nvcc')))
# Turn into absolute paths so we can emit them into the ninja build
# file wherever it is.
sources = [os.path.abspath(file) for file in sources]
includes = [os.path.abspath(file) for file in extra_include_paths]
# include_paths() gives us the location of torch/torch.h
includes += include_paths(with_cuda)
# sysconfig.get_paths()['include'] gives us the location of Python.h
includes.append(sysconfig.get_paths()['include'])
common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
common_cflags += ['-I{}'.format(include) for include in includes]
cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
if sys.platform == 'win32':
from distutils.spawn import _nt_quote_args
cflags = _nt_quote_args(cflags)
flags = ['cflags = {}'.format(' '.join(cflags))]
if with_cuda:
cuda_flags = common_cflags
if sys.platform == 'win32':
cuda_flags = _nt_quote_args(cuda_flags)
else:
cuda_flags += ['--compiler-options', "'-fPIC'"]
cuda_flags += extra_cuda_cflags
if not any(flag.startswith('-std=') for flag in cuda_flags):
cuda_flags.append('-std=c++11')
flags.append('cuda_flags = {}'.format(' '.join(cuda_flags)))
if sys.platform == 'win32':
ldflags = ['/DLL'] + extra_ldflags
else:
ldflags = ['-shared'] + extra_ldflags
# The darwin linker needs explicit consent to ignore unresolved symbols.
if sys.platform == 'darwin':
ldflags.append('-undefined dynamic_lookup')
elif sys.platform == 'win32':
ldflags = _nt_quote_args(ldflags)
flags.append('ldflags = {}'.format(' '.join(ldflags)))
# See https://ninja-build.org/build.ninja.html for reference.
compile_rule = ['rule compile']
if sys.platform == 'win32':
compile_rule.append(
' command = cl /showIncludes $cflags -c $in /Fo$out')
compile_rule.append(' deps = msvc')
else:
compile_rule.append(
' command = $cxx -MMD -MF $out.d $cflags -c $in -o $out')
compile_rule.append(' depfile = $out.d')
compile_rule.append(' deps = gcc')
if with_cuda:
cuda_compile_rule = ['rule cuda_compile']
cuda_compile_rule.append(
' command = $nvcc $cuda_flags -c $in -o $out')
link_rule = ['rule link']
if sys.platform == 'win32':
cl_paths = subprocess.check_output(['where', 'cl']).decode().split('\r\n')
if len(cl_paths) >= 1:
cl_path = os.path.dirname(cl_paths[0]).replace(':', '$:')
else:
raise RuntimeError("MSVC is required to load C++ extensions")
link_rule.append(' command = "{}/link.exe" $in /nologo $ldflags /out:$out'.format(cl_path))
else:
link_rule.append(' command = $cxx $ldflags $in -o $out')
# Emit one build rule per source to enable incremental build.
object_files = []
build = []
for source_file in sources:
# '/path/to/file.cpp' -> 'file'
file_name = os.path.splitext(os.path.basename(source_file))[0]
if _is_cuda_file(source_file):
rule = 'cuda_compile'
# Use a different object filename in case a C++ and CUDA file have
# the same filename but different extension (.cpp vs. .cu).
target = '{}.cuda.o'.format(file_name)
else:
rule = 'compile'
target = '{}.o'.format(file_name)
object_files.append(target)
if sys.platform == 'win32':
source_file = source_file.replace(':', '$:')
build.append('build {}: {} {}'.format(target, rule, source_file))
ext = '.pyd' if sys.platform == 'win32' else '.so'
library_target = '{}{}'.format(name, ext)
link = ['build {}: link {}'.format(library_target, ' '.join(object_files))]
default = ['default {}'.format(library_target)]
# 'Blocks' should be separated by newlines, for visual benefit.
blocks = [config, flags, compile_rule]
if with_cuda:
blocks.append(cuda_compile_rule)
blocks += [link_rule, build, link, default]
with open(path, 'w') as build_file:
for block in blocks:
lines = '\n'.join(block)
build_file.write('{}\n\n'.format(lines))
def _join_cuda_home(*paths):
'''
Joins paths with CUDA_HOME, or raises an error if it CUDA_HOME is not set.
This is basically a lazy way of raising an error for missing $CUDA_HOME
only once we need to get any CUDA-specific path.
'''
if CUDA_HOME is None:
raise EnvironmentError('CUDA_HOME environment variable is not set. '
'Please set it to your CUDA install root.')
return os.path.join(CUDA_HOME, *paths)
def _is_cuda_file(path):
return os.path.splitext(path)[1] in ['.cu', '.cuh']