Compare commits

...

17 Commits

Author SHA1 Message Date
a24163a95e fix lint 2018-07-25 17:28:04 -07:00
f08f222db3 add _GLIBCXX_USE_CXX11_ABI=0 to cpp_extensions when binary built pytorch is detected 2018-07-25 17:22:33 -07:00
8f916179f8 fix for cpp_extensions TEST_CUDNN logic 2018-07-25 18:48:44 -04:00
7b7e6dbfa7 ATEN tests are failing on cuda 9.2. Disable compiling by setting ATEN_NO_TEST to ON 2018-07-25 10:17:23 -07:00
84b8c1c357 fix cffi tests under the CUDA setting 2018-07-24 21:21:48 -07:00
b595c3e9ca skip cudnn cpp_extension tests if cudnn header not found in CUDA_HOME 2018-07-24 21:21:31 -07:00
6ecc275272 build fixes for static cuda linkage 2018-07-24 21:21:24 -07:00
f34528a723 Revert "Fix dataloader hang when it is not completely iterated (#9655)"
This reverts commit 9ee513365121cd387e11987c66db6599ac53ded7.
2018-07-24 22:52:47 -04:00
2edf053549 Fix dataloader hang when it is not completely iterated (#9655)
Summary:
second trial of https://github.com/pytorch/pytorch/pull/7140

cc csarofeen Let's see if this works. It passes everything locally.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9655

Differential Revision: D8940177

Pulled By: SsnL

fbshipit-source-id: 8d6340fc9f7355c71e1e26b262da166402faa158
2018-07-22 23:54:05 -04:00
76c16a5a64 Fixed a missing '=' in LPPoolNd repr function (#9629)
Summary:
In the repr funciton of LPPoolNd(..) class, there was a missing '='. (`kernel_size{kernel_size}`)

Link to line in the code: https://github.com/pytorch/pytorch/blob/master/torch/nn/modules/pooling.py#L694

Original:

       return 'norm_type={norm_type}, kernel_size{kernel_size}, stride={stride}, ' \
              'ceil_mode={ceil_mode}'.format(**self.__dict__)

Fixed:

       return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \
              'ceil_mode={ceil_mode}'.format(**self.__dict__)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9629

Differential Revision: D8932913

Pulled By: soumith

fbshipit-source-id: 9030dff6b14659b5c7b6992d87ef53ec8891f674
2018-07-22 11:57:01 -04:00
f6fac92692 Fix integral type dispatch error message (#9625)
Summary:
This fix will prevent errors like (found in `bincount`)
```
RuntimeError: %s not implemented for '%s'bincounttorch.FloatTensor
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9625

Differential Revision: D8932945

Pulled By: soumith

fbshipit-source-id: 794e3b58d662779402ab318e274661826a5db8b2
2018-07-22 11:57:01 -04:00
bb60c97805 Add scatter_add_ doc (#9630)
Summary:
fixes #4176 cc vishwakftw

I didn't do `:math:` and `\neg` because I am using double ticks so they render more similarly with `:attr:`.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9630

Differential Revision: D8933022

Pulled By: SsnL

fbshipit-source-id: 31d8551f415b624c2ff66b25d886f20789846508
2018-07-22 11:57:01 -04:00
886a367247 docs fixes (#9607)
Summary:
fixes #9589 #9507 #9502 #9390
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9607

Reviewed By: ezyang, soumith

Differential Revision: D8923575

Pulled By: SsnL

fbshipit-source-id: cb61d990333b700d813ce781040c3d0325999b8c
2018-07-22 11:57:01 -04:00
416c8ef1d1 Use int64_t for im2col and col2im (#9590)
Summary:
Fixes #9404
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9590

Differential Revision: D8916020

Pulled By: SsnL

fbshipit-source-id: ac6758326bbb09b48642b149f4eb8f466ef7044e
2018-07-22 11:56:57 -04:00
2fbbe42a30 Use _six for inf and nan (#9500)
Summary:
Things like `float('inf')` are actually quite expensive.
```py
In [1]: import math

In [2]: %timeit -n 200 math.inf
49.3 ns ± 1.42 ns per loop (mean ± std. dev. of 7 runs, 200 loops each)

In [3]: %timeit -n 200 float('inf')
194 ns ± 39.1 ns per loop (mean ± std. dev. of 7 runs, 200 loops each)
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9500

Reviewed By: soumith

Differential Revision: D8876229

Pulled By: SsnL

fbshipit-source-id: 78602b76bb53d5588910b58270930c0bd413d2d7
2018-07-22 11:56:45 -04:00
f07e550b08 Fix CUDA 8 build on Windows (#9618)
* Remove CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS and Fix CUDA 8 build on Windows

* Add symbols 1

* Add symbols 2

* Add symbols 3

* Add symbols 4

* Add symbols 5

* Add symbols 6

* Finalize changes

* Fix lint

* Export GetEmptyStringAlreadyInited

* Remove trailing whitespace

* Minor fixes
2018-07-20 08:21:05 -04:00
3684cc4e52 cherry pick #9500 and #9590 into 0.4.1 (#9599)
* Use _six for inf and nan (#9500)

Summary:
Things like `float('inf')` are actually quite expensive.
```py
In [1]: import math

In [2]: %timeit -n 200 math.inf
49.3 ns ± 1.42 ns per loop (mean ± std. dev. of 7 runs, 200 loops each)

In [3]: %timeit -n 200 float('inf')
194 ns ± 39.1 ns per loop (mean ± std. dev. of 7 runs, 200 loops each)
```
Pull Request resolved: https://github.com/pytorch/pytorch/pull/9500

Reviewed By: soumith

Differential Revision: D8876229

Pulled By: SsnL

fbshipit-source-id: 78602b76bb53d5588910b58270930c0bd413d2d7

* use int64_t for im2col
2018-07-19 17:33:33 -04:00
60 changed files with 542 additions and 424 deletions

View File

@ -151,10 +151,6 @@ endif()
# ---[ CMake scripts + modules # ---[ CMake scripts + modules
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
if (MSVC AND ${BUILD_SHARED_LIBS})
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
endif()
# ---[ CMake build directories # ---[ CMake build directories
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)

View File

@ -13,7 +13,7 @@ else()
cmake_dependent_option( cmake_dependent_option(
USE_CUDNN "Use cuDNN" ON USE_CUDNN "Use cuDNN" ON
"USE_CUDA" OFF) "USE_CUDA" OFF)
option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) option(ATEN_NO_TEST "Do not build ATen test binaries" ON)
# Flag for shared dependencies # Flag for shared dependencies
set(BUILD_ATEN ON) set(BUILD_ATEN ON)

View File

@ -1,4 +1,5 @@
#include <ATen/optional.h> #include <ATen/optional.h>
#include <ATen/Backtrace.h>
#include <functional> #include <functional>
#include <memory> #include <memory>

View File

@ -4,9 +4,11 @@
#include <string> #include <string>
#include <typeinfo> #include <typeinfo>
#include <ATen/ATenGeneral.h>
namespace at { namespace at {
/// Utility to demangle a C++ symbol name. /// Utility to demangle a C++ symbol name.
std::string demangle(const char* name); AT_API std::string demangle(const char* name);
/// Returns the printable name of the type. /// Returns the printable name of the type.
template <typename T> template <typename T>
@ -19,7 +21,7 @@ inline const char* demangle_type() {
#endif // __GXX_RTTI #endif // __GXX_RTTI
} }
std::string get_backtrace( AT_API std::string get_backtrace(
size_t frames_to_skip = 0, size_t frames_to_skip = 0,
size_t maximum_number_of_frames = 64, size_t maximum_number_of_frames = 64,
bool skip_python_frames = true); bool skip_python_frames = true);

View File

@ -250,6 +250,7 @@ IF(USE_CUDA AND NOT USE_ROCM)
ENDIF(USE_MAGMA) ENDIF(USE_MAGMA)
IF ($ENV{ATEN_STATIC_CUDA}) IF ($ENV{ATEN_STATIC_CUDA})
list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a")
list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a")
ENDIF($ENV{ATEN_STATIC_CUDA}) ENDIF($ENV{ATEN_STATIC_CUDA})
ENDIF() ENDIF()
@ -405,11 +406,11 @@ ENDFOREACH()
INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen)
if(ATEN_NO_TEST) # if(ATEN_NO_TEST)
message("disable test because ATEN_NO_TEST is set") # message("disable test because ATEN_NO_TEST is set")
else() # else()
add_subdirectory(test) # add_subdirectory(test)
endif() # endif()
if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
foreach(test_src ${ATen_CPU_TEST_SRCS}) foreach(test_src ${ATen_CPU_TEST_SRCS})

View File

@ -3,6 +3,8 @@
#include <cstdint> #include <cstdint>
#include <utility> #include <utility>
#include <ATen/ATenGeneral.h>
/* /*
* A CUDA stream interface with no CUDA build dependency. * A CUDA stream interface with no CUDA build dependency.
* *
@ -25,27 +27,27 @@ namespace detail {
// Pointer-based API (for internal use) // Pointer-based API (for internal use)
// Note: ATen/Context is preferred to work with streams safely // Note: ATen/Context is preferred to work with streams safely
CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device); AT_API CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device);
CUDAStreamInternals* CUDAStream_getDefaultStream(); AT_API CUDAStreamInternals* CUDAStream_getDefaultStream();
CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority); AT_API CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority);
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device); AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device);
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream(); AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream();
// Note: these Unsafe gets should NEVER be used and are only here for legacy // Note: these Unsafe gets should NEVER be used and are only here for legacy
// purposes. Once those uses are gone they should be removed. // purposes. Once those uses are gone they should be removed.
CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device); AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device);
CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe(); AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe();
void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals); AT_API void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals);
void CUDAStream_setStream(CUDAStreamInternals* internals); AT_API void CUDAStream_setStream(CUDAStreamInternals* internals);
cudaStream_t CUDAStream_stream(CUDAStreamInternals*); AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
int64_t CUDAStream_device(CUDAStreamInternals*); AT_API int64_t CUDAStream_device(CUDAStreamInternals*);
bool CUDAStream_retain(CUDAStreamInternals*); AT_API bool CUDAStream_retain(CUDAStreamInternals*);
void CUDAStream_free(CUDAStreamInternals*&); AT_API void CUDAStream_free(CUDAStreamInternals*&);
} // namespace detail } // namespace detail
@ -64,10 +66,10 @@ struct CUDAStream {
~CUDAStream() { detail::CUDAStream_free(internals_); } ~CUDAStream() { detail::CUDAStream_free(internals_); }
// Copy constructor // Copy constructor
CUDAStream(const CUDAStream& other); AT_API CUDAStream(const CUDAStream& other);
// Move constructor // Move constructor
CUDAStream(CUDAStream&& other); AT_API CUDAStream(CUDAStream&& other);
// Assignment operator // Assignment operator
CUDAStream& operator=(CUDAStream other) { CUDAStream& operator=(CUDAStream other) {

View File

@ -111,8 +111,8 @@ struct Device {
}; };
} // namespace at } // namespace at
std::ostream& operator<<(std::ostream& stream, at::Device::Type type); AT_API std::ostream& operator<<(std::ostream& stream, at::Device::Type type);
std::ostream& operator<<(std::ostream& stream, const at::Device& device); AT_API std::ostream& operator<<(std::ostream& stream, const at::Device& device);
namespace std { namespace std {
template<> struct hash<at::Device> template<> struct hash<at::Device>

View File

@ -43,7 +43,7 @@
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \ AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \ AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
default: \ default: \
AT_ERROR("%s not implemented for '%s'", (NAME), the_type.toString()); \ AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
} \ } \
}() }()

View File

@ -35,8 +35,8 @@ namespace at {
namespace detail { namespace detail {
float halfbits2float(unsigned short bits); AT_API float halfbits2float(unsigned short bits);
unsigned short float2halfbits(float value); AT_API unsigned short float2halfbits(float value);
} }

View File

@ -33,6 +33,8 @@
#include <type_traits> #include <type_traits>
#include <utility> #include <utility>
#include <ATen/ATenGeneral.h>
#if __GNUG__ && __GNUC__ < 5 #if __GNUG__ && __GNUC__ < 5
#define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) #define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T)
#else #else
@ -57,7 +59,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) {
} }
/// This is all the non-templated stuff common to all SmallVectors. /// This is all the non-templated stuff common to all SmallVectors.
class SmallVectorBase { class AT_API SmallVectorBase {
protected: protected:
void *BeginX, *EndX, *CapacityX; void *BeginX, *EndX, *CapacityX;

View File

@ -5,7 +5,7 @@
#include "ATen/Error.h" #include "ATen/Error.h"
namespace at { namespace at {
struct SparseTensorImpl : public TensorImpl { struct AT_API SparseTensorImpl : public TensorImpl {
// Stored in COO format, indices + values. // Stored in COO format, indices + values.
// Ideal INVARIANTS: // Ideal INVARIANTS:

View File

@ -19,7 +19,7 @@ namespace at {
/// `torch::TensorOptions` subclass of this `TensorOptions`, which changes /// `torch::TensorOptions` subclass of this `TensorOptions`, which changes
/// `type()` to return a variable type instead of a tensor type, such that /// `type()` to return a variable type instead of a tensor type, such that
/// variables are created inside factory methods, instead of tensors. /// variables are created inside factory methods, instead of tensors.
struct TensorOptions { struct AT_API TensorOptions {
TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {} TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {}
/// Constructs the `TensorOptions` with defaults taken from the thread local /// Constructs the `TensorOptions` with defaults taken from the thread local

View File

@ -143,7 +143,7 @@ static inline ${return_type} ${api_name}(${formals}) {
""") """)
# add a native declaration for a native function # add a native declaration for a native function
NATIVE_DECLARATION = CodeTemplate("""\ NATIVE_DECLARATION = CodeTemplate("""\
${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
""") """)
# special method definition for factory functions in Functions.h # special method definition for factory functions in Functions.h

View File

@ -35,11 +35,14 @@
#ifdef _WIN32 #ifdef _WIN32
# if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) # if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
# define TH_API TH_EXTERNC __declspec(dllexport) # define TH_API TH_EXTERNC __declspec(dllexport)
# define TH_CPP_API extern __declspec(dllexport)
# else # else
# define TH_API TH_EXTERNC __declspec(dllimport) # define TH_API TH_EXTERNC __declspec(dllimport)
# define TH_CPP_API extern __declspec(dllimport)
# endif # endif
#else #else
# define TH_API TH_EXTERNC # define TH_API TH_EXTERNC
# define TH_CPP_API extern
#endif #endif
#ifdef _WIN32 #ifdef _WIN32

View File

@ -69,18 +69,18 @@ TH_API THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t si
TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size, TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size,
at::Allocator *allocator); at::Allocator *allocator);
ptrdiff_t THStorage_size(const THStorage *self); TH_API ptrdiff_t THStorage_size(const THStorage *self);
size_t THStorage_elementSize(); TH_API size_t THStorage_elementSize();
THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags); TH_API THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags);
void THStorage_setFlag(THStorage *storage, const char flag); TH_API void THStorage_setFlag(THStorage *storage, const char flag);
void THStorage_clearFlag(THStorage *storage, const char flag); TH_API void THStorage_clearFlag(THStorage *storage, const char flag);
void THStorage_retain(THStorage *storage); TH_API void THStorage_retain(THStorage *storage);
THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type, TH_API THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type,
at::DataPtr&& data, ptrdiff_t size, at::DataPtr&& data, ptrdiff_t size,
at::Allocator* allocator); at::Allocator* allocator);
void THStorage_resize(THStorage *storage, ptrdiff_t size); TH_API void THStorage_resize(THStorage *storage, ptrdiff_t size);
void THStorage_swap(THStorage *storage1, THStorage *storage2); TH_API void THStorage_swap(THStorage *storage1, THStorage *storage2);
void THStorage_weakRetain(THStorage *weak_storage); TH_API void THStorage_weakRetain(THStorage *weak_storage);
void THStorage_weakFree(THStorage *weak_storage); TH_API void THStorage_weakFree(THStorage *weak_storage);
THStorage* THStorage_weakLock(THStorage *weak_storage); TH_API THStorage* THStorage_weakLock(THStorage *weak_storage);

View File

@ -83,5 +83,5 @@ struct THTensor
#include "THGenerateAllTypes.h" #include "THGenerateAllTypes.h"
TH_API void THTensor_free(THTensor *self); TH_API void THTensor_free(THTensor *self);
at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride, TH_CPP_API at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride,
at::IntList newshape); at::IntList newshape);

View File

@ -6,9 +6,9 @@ static inline void THNN_(Col2Im_shapeCheck)(
THCState *state, THCState *state,
THCTensor *input, THCTensor *input,
THCTensor *gradOutput, THCTensor *gradOutput,
int outputHeight, int outputWidth, int64_t outputHeight, int64_t outputWidth,
int kH, int kW, int dH, int dW, int64_t kH, int64_t kW, int64_t dH, int64_t dW,
int padH, int padW, int sH, int sW) { int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
THArgCheck(kW > 0 && kH > 0, 6, THArgCheck(kW > 0 && kH > 0, 6,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
@ -17,7 +17,7 @@ static inline void THNN_(Col2Im_shapeCheck)(
THArgCheck(dW > 0 && dH > 0, 8, THArgCheck(dW > 0 && dH > 0, 8,
"dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW);
int ndim = THCTensor_(nDimension)(state, input); int64_t ndim = THCTensor_(nDimension)(state, input);
THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input, THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
"Expected non-empty 2D or 3D input tensor, but got input of shape %s"); "Expected non-empty 2D or 3D input tensor, but got input of shape %s");
@ -54,11 +54,11 @@ void THNN_(Col2Im_updateOutput)(
THCState *state, THCState *state,
THCTensor *input, THCTensor *input,
THCTensor *output, THCTensor *output,
int outputHeight, int outputWidth, int64_t outputHeight, int64_t outputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THCUNN_assertSameGPU(state, 2, input, output); THCUNN_assertSameGPU(state, 2, input, output);
@ -84,10 +84,10 @@ void THNN_(Col2Im_updateOutput)(
THCTensor *input_n = THCTensor_(new)(state); THCTensor *input_n = THCTensor_(new)(state);
THCTensor *output_n = THCTensor_(new)(state); THCTensor *output_n = THCTensor_(new)(state);
int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
for (int elt = 0; elt < batchSize; elt++) { for (int64_t elt = 0; elt < batchSize; elt++) {
THCTensor_(select)(state, input_n, input, 0, elt); THCTensor_(select)(state, input_n, input, 0, elt);
THCTensor_(select)(state, output_n, output, 0, elt); THCTensor_(select)(state, output_n, output, 0, elt);
@ -116,10 +116,10 @@ void THNN_(Col2Im_updateGradInput)(
THCState *state, THCState *state,
THCTensor *gradOutput, THCTensor *gradOutput,
THCTensor *gradInput, THCTensor *gradInput,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
kH, kW, dH, dW, padH, padW, sH, sW); kH, kW, dH, dW, padH, padW, sH, sW);

View File

@ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)(
THCState *state, THCState *state,
THCTensor *input, THCTensor *input,
THCTensor *gradOutput, THCTensor *gradOutput,
int kH, int kW, int dH, int dW, int64_t kH, int64_t kW, int64_t dH, int64_t dW,
int padH, int padW, int sH, int sW) { int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
THArgCheck(kW > 0 && kH > 0, 4, THArgCheck(kW > 0 && kH > 0, 4,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
@ -18,7 +18,7 @@ static inline void THNN_(Im2Col_shapeCheck)(
THArgCheck(sW > 0 && sH > 0, 10, THArgCheck(sW > 0 && sH > 0, 10,
"stride should be greater than zero, but got sH: %d sW: %d", sH, sW); "stride should be greater than zero, but got sH: %d sW: %d", sH, sW);
int ndim = THCTensor_(nDimension)(state, input); int64_t ndim = THCTensor_(nDimension)(state, input);
THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input, THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
"Expected non-empty 3D or 4D input tensor, but got input of shape %s"); "Expected non-empty 3D or 4D input tensor, but got input of shape %s");
@ -26,11 +26,11 @@ static inline void THNN_(Im2Col_shapeCheck)(
if (ndim == 3) { if (ndim == 3) {
dim_batch = -1; dim_batch = -1;
} }
int nInputPlane = THCTensor_(size)(state, input, dim_batch + 1); int64_t nInputPlane = THCTensor_(size)(state, input, dim_batch + 1);
int inputHeight = THCTensor_(size)(state, input, dim_batch + 2); int64_t inputHeight = THCTensor_(size)(state, input, dim_batch + 2);
int inputWidth = THCTensor_(size)(state, input, dim_batch + 3); int64_t inputWidth = THCTensor_(size)(state, input, dim_batch + 3);
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
if (outputHeight < 1 || outputWidth < 1) { if (outputHeight < 1 || outputWidth < 1) {
THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), "
@ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)(
THCState *state, THCState *state,
THCTensor *input, THCTensor *input,
THCTensor *output, THCTensor *output,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THCUNN_assertSameGPU(state, 2, input, output); THCUNN_assertSameGPU(state, 2, input, output);
@ -62,15 +62,15 @@ void THNN_(Im2Col_updateOutput)(
THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]); THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]);
} }
int batchSize = THCTensor_(size)(state, input, 0); int64_t batchSize = THCTensor_(size)(state, input, 0);
int nInputPlane = THCTensor_(size)(state, input, 1); int64_t nInputPlane = THCTensor_(size)(state, input, 1);
int inputHeight = THCTensor_(size)(state, input, 2); int64_t inputHeight = THCTensor_(size)(state, input, 2);
int inputWidth = THCTensor_(size)(state, input, 3); int64_t inputWidth = THCTensor_(size)(state, input, 3);
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
int nOutputPlane = nInputPlane * kW * kH; int64_t nOutputPlane = nInputPlane * kW * kH;
int outputLength = outputHeight * outputWidth; int64_t outputLength = outputHeight * outputWidth;
THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength); THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength);
THCTensor_(zero)(state, output); THCTensor_(zero)(state, output);
@ -78,7 +78,7 @@ void THNN_(Im2Col_updateOutput)(
THCTensor *input_n = THCTensor_(new)(state); THCTensor *input_n = THCTensor_(new)(state);
THCTensor *output_n = THCTensor_(new)(state); THCTensor *output_n = THCTensor_(new)(state);
for (int elt = 0; elt < batchSize; elt++) { for (int64_t elt = 0; elt < batchSize; elt++) {
THCTensor_(select)(state, input_n, input, 0, elt); THCTensor_(select)(state, input_n, input, 0, elt);
THCTensor_(select)(state, output_n, output, 0, elt); THCTensor_(select)(state, output_n, output, 0, elt);
@ -104,11 +104,11 @@ void THNN_(Im2Col_updateGradInput)(
THCState *state, THCState *state,
THCTensor *gradOutput, THCTensor *gradOutput,
THCTensor *gradInput, THCTensor *gradInput,
int inputHeight, int inputWidth, int64_t inputHeight, int64_t inputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput,
inputHeight, inputWidth, inputHeight, inputWidth,

View File

@ -183,39 +183,39 @@ THC_API void THNN_(Im2Col_updateOutput)(
THCState *state, THCState *state,
THCTensor *input, THCTensor *input,
THCTensor *output, THCTensor *output,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
THC_API void THNN_(Im2Col_updateGradInput)( THC_API void THNN_(Im2Col_updateGradInput)(
THCState *state, THCState *state,
THCTensor *gradOutput, THCTensor *gradOutput,
THCTensor *gradInput, THCTensor *gradInput,
int inputHeight, int inputWidth, int64_t inputHeight, int64_t inputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
THC_API void THNN_(Col2Im_updateOutput)( THC_API void THNN_(Col2Im_updateOutput)(
THCState *state, THCState *state,
THCTensor *input, THCTensor *input,
THCTensor *output, THCTensor *output,
int outputHeight, int outputWidth, int64_t outputHeight, int64_t outputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
THC_API void THNN_(Col2Im_updateGradInput)( THC_API void THNN_(Col2Im_updateGradInput)(
THCState *state, THCState *state,
THCTensor *gradOutput, THCTensor *gradOutput,
THCTensor *gradInput, THCTensor *gradInput,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
THC_API void THNN_(LeakyReLU_updateOutput)( THC_API void THNN_(LeakyReLU_updateOutput)(
THCState *state, THCState *state,

View File

@ -8,28 +8,28 @@
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu) // (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu)
template <typename Dtype> template <typename Dtype>
__launch_bounds__(CUDA_NUM_THREADS) __launch_bounds__(CUDA_NUM_THREADS)
__global__ void im2col_kernel(const int n, const Dtype* data_im, __global__ void im2col_kernel(const int64_t n, const Dtype* data_im,
const int height, const int width, const int64_t height, const int64_t width,
const int ksize_h, const int ksize_w, const int64_t ksize_h, const int64_t ksize_w,
const int pad_h, const int pad_w, const int64_t pad_h, const int64_t pad_w,
const int stride_h, const int stride_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, const int64_t dilation_h, const int64_t dilation_w,
const int height_col, const int width_col, const int64_t height_col, const int64_t width_col,
Dtype* data_col) { Dtype* data_col) {
CUDA_KERNEL_LOOP(index, n) { CUDA_KERNEL_LOOP(index, n) {
int w_out = index % width_col; int64_t w_out = index % width_col;
index /= width_col; index /= width_col;
int h_out = index % height_col; int64_t h_out = index % height_col;
int channel_in = index / height_col; int64_t channel_in = index / height_col;
int channel_out = channel_in * ksize_h * ksize_w; int64_t channel_out = channel_in * ksize_h * ksize_w;
int h_in = h_out * stride_h - pad_h; int64_t h_in = h_out * stride_h - pad_h;
int w_in = w_out * stride_w - pad_w; int64_t w_in = w_out * stride_w - pad_w;
data_col += (channel_out * height_col + h_out) * width_col + w_out; data_col += (channel_out * height_col + h_out) * width_col + w_out;
data_im += (channel_in * height + h_in) * width + w_in; data_im += (channel_in * height + h_in) * width + w_in;
for (int i = 0; i < ksize_h; ++i) { for (int64_t i = 0; i < ksize_h; ++i) {
for (int j = 0; j < ksize_w; ++j) { for (int64_t j = 0; j < ksize_w; ++j) {
int h = h_in + i * dilation_h; int64_t h = h_in + i * dilation_h;
int w = w_in + j * dilation_w; int64_t w = w_in + j * dilation_w;
*data_col = (h >= 0 && w >= 0 && h < height && w < width) ? *data_col = (h >= 0 && w >= 0 && h < height && w < width) ?
data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0); data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0);
data_col += height_col * width_col; data_col += height_col * width_col;
@ -39,15 +39,15 @@ __global__ void im2col_kernel(const int n, const Dtype* data_im,
} }
template <typename Dtype> template <typename Dtype>
void im2col(cudaStream_t stream, const Dtype* data_im, const int channels, void im2col(cudaStream_t stream, const Dtype* data_im, const int64_t channels,
const int height, const int width, const int64_t height, const int64_t width,
const int height_col, const int width_col, const int64_t height_col, const int64_t width_col,
const int ksize_h, const int ksize_w, const int pad_h, const int64_t ksize_h, const int64_t ksize_w, const int64_t pad_h,
const int pad_w, const int stride_h, const int stride_w, const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, Dtype* data_col) { const int64_t dilation_h, const int64_t dilation_w, Dtype* data_col) {
// We are going to launch channels * height_col * width_col kernels, each // We are going to launch channels * height_col * width_col kernels, each
// kernel responsible for copying a single-channel grid. // kernel responsible for copying a single-channel grid.
int num_kernels = channels * height_col * width_col; int64_t num_kernels = channels * height_col * width_col;
// Launch // Launch
im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> (
num_kernels, data_im, height, width, ksize_h, ksize_w, num_kernels, data_im, height, width, ksize_h, ksize_w,
@ -60,37 +60,37 @@ void im2col(cudaStream_t stream, const Dtype* data_im, const int channels,
template <typename Dtype, typename Acctype> template <typename Dtype, typename Acctype>
__launch_bounds__(CUDA_NUM_THREADS) __launch_bounds__(CUDA_NUM_THREADS)
__global__ void col2im_kernel(const int n, const Dtype* data_col, __global__ void col2im_kernel(const int64_t n, const Dtype* data_col,
const int height, const int width, const int channels, const int64_t height, const int64_t width, const int64_t channels,
const int kernel_h, const int kernel_w, const int64_t kernel_h, const int64_t kernel_w,
const int pad_h, const int pad_w, const int64_t pad_h, const int64_t pad_w,
const int stride_h, const int stride_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, const int64_t dilation_h, const int64_t dilation_w,
const int height_col, const int width_col, const int64_t height_col, const int64_t width_col,
Dtype* data_im) { Dtype* data_im) {
CUDA_KERNEL_LOOP(index, n) { CUDA_KERNEL_LOOP(index, n) {
Acctype val = Acctype(0); Acctype val = Acctype(0);
const int w_im = index % width + pad_w; const int64_t w_im = index % width + pad_w;
const int h_im = (index / width) % height + pad_h; const int64_t h_im = (index / width) % height + pad_h;
const int c_im = index / (width * height); const int64_t c_im = index / (width * height);
int kernel_extent_w = (kernel_w - 1) * dilation_w + 1; int64_t kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
int kernel_extent_h = (kernel_h - 1) * dilation_h + 1; int64_t kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
// compute the start and end of the output // compute the start and end of the output
const int w_col_start = const int64_t w_col_start =
(w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1; (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
const int w_col_end = min(w_im / stride_w + 1, width_col); const int64_t w_col_end = min(w_im / stride_w + 1, width_col);
const int h_col_start = const int64_t h_col_start =
(h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1; (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
const int h_col_end = min(h_im / stride_h + 1, height_col); const int64_t h_col_end = min(h_im / stride_h + 1, height_col);
// TODO: use LCM of stride and dilation to avoid unnecessary loops // TODO: use LCM of stride and dilation to avoid unnecessary loops
for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) { for (int64_t h_col = h_col_start; h_col < h_col_end; h_col += 1) {
for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) { for (int64_t w_col = w_col_start; w_col < w_col_end; w_col += 1) {
int h_k = (h_im - h_col * stride_h); int64_t h_k = (h_im - h_col * stride_h);
int w_k = (w_im - w_col * stride_w); int64_t w_k = (w_im - w_col * stride_w);
if (h_k % dilation_h == 0 && w_k % dilation_w == 0) { if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
h_k /= dilation_h; h_k /= dilation_h;
w_k /= dilation_w; w_k /= dilation_w;
int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * int64_t data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
height_col + h_col) * width_col + w_col; height_col + h_col) * width_col + w_col;
val += data_col[data_col_index]; val += data_col[data_col_index];
} }
@ -101,21 +101,21 @@ __global__ void col2im_kernel(const int n, const Dtype* data_col,
} }
template <typename Dtype, typename Acctype> template <typename Dtype, typename Acctype>
void col2im(cudaStream_t stream, const Dtype* data_col, const int channels, void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels,
const int height, const int width, const int64_t height, const int64_t width,
const int output_height, const int output_width, const int64_t output_height, const int64_t output_width,
const int patch_h, const int patch_w, const int pad_h, const int64_t patch_h, const int64_t patch_w, const int64_t pad_h,
const int pad_w, const int stride_h, const int stride_w, const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, Dtype* data_im); const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im);
template <typename Dtype, typename Acctype> template <typename Dtype, typename Acctype>
void col2im(cudaStream_t stream, const Dtype* data_col, const int channels, void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels,
const int height, const int width, const int64_t height, const int64_t width,
const int output_height, const int output_width, const int64_t output_height, const int64_t output_width,
const int patch_h, const int patch_w, const int pad_h, const int64_t patch_h, const int64_t patch_w, const int64_t pad_h,
const int pad_w, const int stride_h, const int stride_w, const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, Dtype* data_im) { const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im) {
int num_kernels = channels * height * width; int64_t num_kernels = channels * height * width;
// To avoid involving atomic operations, we will launch one kernel per // To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions. // bottom dimension, and then in the kernel add up the top dimensions.
col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> (

View File

@ -54,25 +54,25 @@
// //
// ALSO do vol2col // ALSO do vol2col
static void THNN_(im2col)(const real* data_im, const int channels, static void THNN_(im2col)(const real* data_im, const int64_t channels,
const int height, const int width, const int64_t height, const int64_t width,
const int output_height, const int output_width, const int64_t output_height, const int64_t output_width,
const int kernel_h, const int kernel_w, const int64_t kernel_h, const int64_t kernel_w,
const int pad_h, const int pad_w, const int64_t pad_h, const int64_t pad_w,
const int stride_h, const int stride_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, const int64_t dilation_h, const int64_t dilation_w,
real* data_col) { real* data_col) {
const int height_col = output_height; const int64_t height_col = output_height;
const int width_col = output_width; const int64_t width_col = output_width;
const int channels_col = channels * kernel_h * kernel_w; const int64_t channels_col = channels * kernel_h * kernel_w;
for (int c_col = 0; c_col < channels_col; ++c_col) { for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
int w_offset = c_col % kernel_w; int64_t w_offset = c_col % kernel_w;
int h_offset = (c_col / kernel_w) % kernel_h; int64_t h_offset = (c_col / kernel_w) % kernel_h;
int c_im = c_col / kernel_h / kernel_w; int64_t c_im = c_col / kernel_h / kernel_w;
for (int h_col = 0; h_col < height_col; ++h_col) { for (int64_t h_col = 0; h_col < height_col; ++h_col) {
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
for (int w_col = 0; w_col < width_col; ++w_col) { for (int64_t w_col = 0; w_col < width_col; ++w_col) {
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
data_col[(c_col * height_col + h_col) * width_col + w_col] = data_col[(c_col * height_col + h_col) * width_col + w_col] =
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
data_im[(c_im * height + h_im) * width + w_im] : 0; data_im[(c_im * height + h_im) * width + w_im] : 0;
@ -81,26 +81,26 @@ static void THNN_(im2col)(const real* data_im, const int channels,
} }
} }
static void THNN_(col2im)(const real* data_col, const int channels, static void THNN_(col2im)(const real* data_col, const int64_t channels,
const int height, const int width, const int64_t height, const int64_t width,
const int output_height, const int output_width, const int64_t output_height, const int64_t output_width,
const int kernel_h, const int kernel_w, const int64_t kernel_h, const int64_t kernel_w,
const int pad_h, const int pad_w, const int64_t pad_h, const int64_t pad_w,
const int stride_h, const int stride_w, const int64_t stride_h, const int64_t stride_w,
const int dilation_h, const int dilation_w, const int64_t dilation_h, const int64_t dilation_w,
real* data_im) { real* data_im) {
memset(data_im, 0, sizeof(real) * height * width * channels); memset(data_im, 0, sizeof(real) * height * width * channels);
const int height_col = output_height; const int64_t height_col = output_height;
const int width_col = output_width; const int64_t width_col = output_width;
const int channels_col = channels * kernel_h * kernel_w; const int64_t channels_col = channels * kernel_h * kernel_w;
for (int c_col = 0; c_col < channels_col; ++c_col) { for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
int w_offset = c_col % kernel_w; int64_t w_offset = c_col % kernel_w;
int h_offset = (c_col / kernel_w) % kernel_h; int64_t h_offset = (c_col / kernel_w) % kernel_h;
int c_im = c_col / kernel_h / kernel_w; int64_t c_im = c_col / kernel_h / kernel_w;
for (int h_col = 0; h_col < height_col; ++h_col) { for (int64_t h_col = 0; h_col < height_col; ++h_col) {
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
for (int w_col = 0; w_col < width_col; ++w_col) { for (int64_t w_col = 0; w_col < width_col; ++w_col) {
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
data_im[(c_im * height + h_im) * width + w_im] += data_im[(c_im * height + h_im) * width + w_im] +=
data_col[(c_col * height_col + h_col) * width_col + w_col]; data_col[(c_col * height_col + h_col) * width_col + w_col];
@ -113,9 +113,9 @@ static inline void THNN_(Col2Im_shapeCheck)(
THNNState *state, THNNState *state,
THTensor *input, THTensor *input,
THTensor *gradOutput, THTensor *gradOutput,
int outputHeight, int outputWidth, int64_t outputHeight, int64_t outputWidth,
int kH, int kW, int dH, int dW, int64_t kH, int64_t kW, int64_t dH, int64_t dW,
int padH, int padW, int sH, int sW) { int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
THArgCheck(kW > 0 && kH > 0, 6, THArgCheck(kW > 0 && kH > 0, 6,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
@ -124,11 +124,11 @@ static inline void THNN_(Col2Im_shapeCheck)(
THArgCheck(dW > 0 && dH > 0, 8, THArgCheck(dW > 0 && dH > 0, 8,
"dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW);
int ndim = THTensor_(nDimension)(input); int64_t ndim = THTensor_(nDimension)(input);
THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input, THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
"Expected non-empty 2D or 3D input tensor, but got input of shape %s"); "Expected non-empty 2D or 3D input tensor, but got input of shape %s");
int batch_dim = (ndim == 3) ? 0 : -1; int64_t batch_dim = (ndim == 3) ? 0 : -1;
int64_t nInputPlane = input->size[batch_dim + 1]; int64_t nInputPlane = input->size[batch_dim + 1];
if (nInputPlane % (kW * kH) != 0) { if (nInputPlane % (kW * kH) != 0) {
@ -161,11 +161,11 @@ void THNN_(Col2Im_updateOutput)(
THNNState *state, THNNState *state,
THTensor *input, THTensor *input,
THTensor *output, THTensor *output,
int outputHeight, int outputWidth, int64_t outputHeight, int64_t outputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth, THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth,
kH, kW, dH, dW, padH, padW, sH, sW); kH, kW, dH, dW, padH, padW, sH, sW);
@ -189,10 +189,10 @@ void THNN_(Col2Im_updateOutput)(
THTensor *input_n = THTensor_(new)(); THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)(); THTensor *output_n = THTensor_(new)();
int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
for (int elt = 0; elt < batchSize; elt++) { for (int64_t elt = 0; elt < batchSize; elt++) {
THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt); THTensor_(select)(output_n, output, 0, elt);
@ -220,10 +220,10 @@ void THNN_(Col2Im_updateGradInput)(
THNNState *state, THNNState *state,
THTensor *gradOutput, THTensor *gradOutput,
THTensor *gradInput, THTensor *gradInput,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
kH, kW, dH, dW, padH, padW, sH, sW); kH, kW, dH, dW, padH, padW, sH, sW);

View File

@ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)(
THNNState *state, THNNState *state,
THTensor *input, THTensor *input,
THTensor *gradOutput, THTensor *gradOutput,
int kH, int kW, int dH, int dW, int64_t kH, int64_t kW, int64_t dH, int64_t dW,
int padH, int padW, int sH, int sW) { int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
THArgCheck(kW > 0 && kH > 0, 4, THArgCheck(kW > 0 && kH > 0, 4,
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
@ -16,21 +16,21 @@ static inline void THNN_(Im2Col_shapeCheck)(
THArgCheck(sW > 0 && sH > 0, 10, THArgCheck(sW > 0 && sH > 0, 10,
"stride should be greater than zero, but got sH: %d sW: %d", sH, sW); "stride should be greater than zero, but got sH: %d sW: %d", sH, sW);
int ndim = THTensor_(nDimension)(input); int64_t ndim = THTensor_(nDimension)(input);
THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
"Expected non-empty 3D or 4D input tensor, but got input of shape %s"); "Expected non-empty 3D or 4D input tensor, but got input of shape %s");
int dim_batch = 0; int64_t dim_batch = 0;
if (ndim == 3) { if (ndim == 3) {
dim_batch = -1; dim_batch = -1;
} }
int nInputPlane = THTensor_(size)(input, dim_batch + 1); int64_t nInputPlane = THTensor_(size)(input, dim_batch + 1);
int inputHeight = THTensor_(size)(input, dim_batch + 2); int64_t inputHeight = THTensor_(size)(input, dim_batch + 2);
int inputWidth = THTensor_(size)(input, dim_batch + 3); int64_t inputWidth = THTensor_(size)(input, dim_batch + 3);
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
int nOutputPlane = nInputPlane * kW * kH; int64_t nOutputPlane = nInputPlane * kW * kH;
int outputLength = outputHeight * outputWidth; int64_t outputLength = outputHeight * outputWidth;
if (outputHeight < 1 || outputWidth < 1) { if (outputHeight < 1 || outputWidth < 1) {
THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), "
@ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)(
THNNState *state, THNNState *state,
THTensor *input, THTensor *input,
THTensor *output, THTensor *output,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW); THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW);
@ -60,15 +60,15 @@ void THNN_(Im2Col_updateOutput)(
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
} }
int batchSize = THTensor_(size)(input, 0); int64_t batchSize = THTensor_(size)(input, 0);
int nInputPlane = THTensor_(size)(input, 1); int64_t nInputPlane = THTensor_(size)(input, 1);
int inputHeight = THTensor_(size)(input, 2); int64_t inputHeight = THTensor_(size)(input, 2);
int inputWidth = THTensor_(size)(input, 3); int64_t inputWidth = THTensor_(size)(input, 3);
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
int nOutputPlane = nInputPlane * kW * kH; int64_t nOutputPlane = nInputPlane * kW * kH;
int outputLength = outputHeight * outputWidth; int64_t outputLength = outputHeight * outputWidth;
THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength); THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength);
THTensor_(zero)(output); THTensor_(zero)(output);
@ -76,7 +76,7 @@ void THNN_(Im2Col_updateOutput)(
THTensor *input_n = THTensor_(new)(); THTensor *input_n = THTensor_(new)();
THTensor *output_n = THTensor_(new)(); THTensor *output_n = THTensor_(new)();
for (int elt = 0; elt < batchSize; elt++) { for (int64_t elt = 0; elt < batchSize; elt++) {
THTensor_(select)(input_n, input, 0, elt); THTensor_(select)(input_n, input, 0, elt);
THTensor_(select)(output_n, output, 0, elt); THTensor_(select)(output_n, output, 0, elt);
@ -102,11 +102,11 @@ void THNN_(Im2Col_updateGradInput)(
THNNState *state, THNNState *state,
THTensor *gradOutput, THTensor *gradOutput,
THTensor *gradInput, THTensor *gradInput,
int inputHeight, int inputWidth, int64_t inputHeight, int64_t inputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW) { int64_t sH, int64_t sW) {
THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput,

View File

@ -220,8 +220,8 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)(
dilationH, dilationW, 0); dilationH, dilationW, 0);
// Params // Params
int nInputPlane = weight->size[1]; int64_t nInputPlane = weight->size[1];
int nOutputPlane = weight->size[0]; int64_t nOutputPlane = weight->size[0];
input = THTensor_(newContiguous)(input); input = THTensor_(newContiguous)(input);
weight = THTensor_(newContiguous)(weight); weight = THTensor_(newContiguous)(weight);

View File

@ -221,8 +221,8 @@ void THNN_(SpatialFullDilatedConvolution_updateGradInput)(
(input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW,
dilationH, dilationW, adjH, adjW, 0); dilationH, dilationW, adjH, adjW, 0);
int nInputPlane = THTensor_(size)(weight,0); int64_t nInputPlane = THTensor_(size)(weight,0);
int nOutputPlane = THTensor_(size)(weight,1); int64_t nOutputPlane = THTensor_(size)(weight,1);
input = THTensor_(newContiguous)(input); input = THTensor_(newContiguous)(input);
gradOutput = THTensor_(newContiguous)(gradOutput); gradOutput = THTensor_(newContiguous)(gradOutput);
@ -328,7 +328,7 @@ void THNN_(SpatialFullDilatedConvolution_accGradParameters)(
(input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW,
dilationH, dilationW, adjH, adjW, 1); dilationH, dilationW, adjH, adjW, 1);
int nOutputPlane; int64_t nOutputPlane;
if (gradWeight) { if (gradWeight) {
nOutputPlane = THTensor_(size)(gradWeight, 1); nOutputPlane = THTensor_(size)(gradWeight, 1);
} else if (gradBias) { } else if (gradBias) {

View File

@ -147,39 +147,39 @@ TH_API void THNN_(Im2Col_updateOutput)(
THNNState *state, THNNState *state,
THTensor *input, THTensor *input,
THTensor *output, THTensor *output,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
TH_API void THNN_(Im2Col_updateGradInput)( TH_API void THNN_(Im2Col_updateGradInput)(
THNNState *state, THNNState *state,
THTensor *gradOutput, THTensor *gradOutput,
THTensor *gradInput, THTensor *gradInput,
int inputHeight, int inputWidth, int64_t inputHeight, int64_t inputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
TH_API void THNN_(Col2Im_updateOutput)( TH_API void THNN_(Col2Im_updateOutput)(
THNNState *state, THNNState *state,
THTensor *input, THTensor *input,
THTensor *output, THTensor *output,
int outputHeight, int outputWidth, int64_t outputHeight, int64_t outputWidth,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
TH_API void THNN_(Col2Im_updateGradInput)( TH_API void THNN_(Col2Im_updateGradInput)(
THNNState *state, THNNState *state,
THTensor *gradOutput, THTensor *gradOutput,
THTensor *gradInput, THTensor *gradInput,
int kH, int kW, int64_t kH, int64_t kW,
int dH, int dW, int64_t dH, int64_t dW,
int padH, int padW, int64_t padH, int64_t padW,
int sH, int sW); int64_t sH, int64_t sW);
TH_API void THNN_(L1Cost_updateOutput)( TH_API void THNN_(L1Cost_updateOutput)(
THNNState *state, // library's state THNNState *state, // library's state

View File

@ -224,11 +224,7 @@ if(USE_CUDA)
# it. We will then manually add the cudart library as interface libs. # it. We will then manually add the cudart library as interface libs.
set(__tmp ${CUDA_LIBRARIES}) set(__tmp ${CUDA_LIBRARIES})
set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
if(CAFFE2_STATIC_LINK_CUDA) torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS})
torch_cuda_based_add_library(caffe2_gpu STATIC ${Caffe2_GPU_SRCS})
else()
torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS})
endif()
set(CUDA_LIBRARIES ${__tmp}) set(CUDA_LIBRARIES ${__tmp})
target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart) target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart)

View File

@ -1,4 +1,5 @@
#include "caffe2/utils/proto_wrap.h" #include "caffe2/utils/proto_wrap.h"
#include "caffe2/core/common.h"
#include <google/protobuf/stubs/common.h> #include <google/protobuf/stubs/common.h>
#include <google/protobuf/generated_message_util.h> #include <google/protobuf/generated_message_util.h>
@ -8,7 +9,7 @@ namespace caffe {
// Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function // Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
// used to avoid duplicated global variable in the case when protobuf // used to avoid duplicated global variable in the case when protobuf
// is built with hidden visibility. // is built with hidden visibility.
const ::std::string& GetEmptyStringAlreadyInited() { CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
} }
@ -19,7 +20,7 @@ namespace ONNX_NAMESPACE {
// ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function // ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
// used to avoid duplicated global variable in the case when protobuf // used to avoid duplicated global variable in the case when protobuf
// is built with hidden visibility. // is built with hidden visibility.
const ::std::string& GetEmptyStringAlreadyInited() { CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
} }
@ -30,7 +31,7 @@ namespace caffe2 {
// Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function // Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
// used to avoid duplicated global variable in the case when protobuf // used to avoid duplicated global variable in the case when protobuf
// is built with hidden visibility. // is built with hidden visibility.
const ::std::string& GetEmptyStringAlreadyInited() { CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
} }

View File

@ -1102,6 +1102,11 @@ Linear functions
.. autofunction:: linear .. autofunction:: linear
:hidden:`bilinear`
~~~~~~~~~~~~~~~~
.. autofunction:: bilinear
Dropout functions Dropout functions
----------------- -----------------

View File

@ -337,6 +337,7 @@ view of a storage and defines numeric operations on it.
.. automethod:: rsqrt .. automethod:: rsqrt
.. automethod:: rsqrt_ .. automethod:: rsqrt_
.. automethod:: scatter_ .. automethod:: scatter_
.. automethod:: scatter_add_
.. automethod:: select .. automethod:: select
.. automethod:: set_ .. automethod:: set_
.. automethod:: share_memory_ .. automethod:: share_memory_

View File

@ -251,6 +251,7 @@ Spectral Ops
Other Operations Other Operations
~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~
.. autofunction:: bincount
.. autofunction:: cross .. autofunction:: cross
.. autofunction:: diag .. autofunction:: diag
.. autofunction:: diagflat .. autofunction:: diagflat
@ -258,6 +259,7 @@ Other Operations
.. autofunction:: einsum .. autofunction:: einsum
.. autofunction:: flip .. autofunction:: flip
.. autofunction:: histc .. autofunction:: histc
.. autofunction:: meshgrid
.. autofunction:: renorm .. autofunction:: renorm
.. autofunction:: trace .. autofunction:: trace
.. autofunction:: tril .. autofunction:: tril

View File

@ -152,6 +152,8 @@ IS_LINUX = (platform.system() == 'Linux')
FULL_CAFFE2 = check_env_flag('FULL_CAFFE2') FULL_CAFFE2 = check_env_flag('FULL_CAFFE2')
BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH') BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH')
USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK')
NUM_JOBS = multiprocessing.cpu_count() NUM_JOBS = multiprocessing.cpu_count()
max_jobs = os.getenv("MAX_JOBS") max_jobs = os.getenv("MAX_JOBS")
if max_jobs is not None: if max_jobs is not None:
@ -318,6 +320,8 @@ def build_libs(libs):
if USE_CUDA: if USE_CUDA:
my_env["CUDA_BIN_PATH"] = CUDA_HOME my_env["CUDA_BIN_PATH"] = CUDA_HOME
build_libs_cmd += ['--use-cuda'] build_libs_cmd += ['--use-cuda']
if USE_CUDA_STATIC_LINK:
build_libs_cmd += ['--cuda-static-link']
if USE_ROCM: if USE_ROCM:
build_libs_cmd += ['--use-rocm'] build_libs_cmd += ['--use-rocm']
if USE_NNPACK: if USE_NNPACK:

View File

@ -28,7 +28,7 @@ import errno
import torch import torch
import torch.cuda import torch.cuda
from torch._utils_internal import get_writable_path from torch._utils_internal import get_writable_path
from torch._six import string_classes from torch._six import string_classes, inf
import torch.backends.cudnn import torch.backends.cudnn
import torch.backends.mkl import torch.backends.mkl
@ -353,7 +353,7 @@ class TestCase(unittest.TestCase):
elif isinstance(x, bool) and isinstance(y, bool): elif isinstance(x, bool) and isinstance(y, bool):
super(TestCase, self).assertEqual(x, y, message) super(TestCase, self).assertEqual(x, y, message)
elif isinstance(x, Number) and isinstance(y, Number): elif isinstance(x, Number) and isinstance(y, Number):
if abs(x) == float('inf') or abs(y) == float('inf'): if abs(x) == inf or abs(y) == inf:
if allow_inf: if allow_inf:
super(TestCase, self).assertEqual(x, y, message) super(TestCase, self).assertEqual(x, y, message)
else: else:

View File

@ -10,6 +10,7 @@ from collections import OrderedDict
from itertools import product from itertools import product
from operator import mul, itemgetter from operator import mul, itemgetter
from functools import reduce, wraps from functools import reduce, wraps
from torch._six import inf, nan
from torch.autograd.gradcheck import gradgradcheck, gradcheck from torch.autograd.gradcheck import gradgradcheck, gradcheck
from torch.autograd.function import once_differentiable from torch.autograd.function import once_differentiable
from torch.autograd.profiler import profile from torch.autograd.profiler import profile
@ -1524,12 +1525,12 @@ class TestAutograd(TestCase):
pyscalar = -12345.1 pyscalar = -12345.1
f[0] = pyscalar f[0] = pyscalar
self.assertEqual(float(f), pyscalar) self.assertEqual(float(f), pyscalar)
f[0] = float('nan') f[0] = nan
self.assertTrue(math.isnan(float(f))) self.assertTrue(math.isnan(float(f)))
f[0] = float('inf') f[0] = inf
self.assertEqual(float(f), float('inf'), allow_inf=True) self.assertEqual(float(f), inf, allow_inf=True)
f[0] = float('-inf') f[0] = -inf
self.assertEqual(float(f), float('-inf'), allow_inf=True) self.assertEqual(float(f), -inf, allow_inf=True)
# integral -> floating point # integral -> floating point
# check we can convert something that loses precision # check we can convert something that loses precision
@ -1539,11 +1540,11 @@ class TestAutograd(TestCase):
self.assertEqual(float(l), float(pyscalar)) self.assertEqual(float(l), float(pyscalar))
# floating point -> integral # floating point -> integral
f[0] = float('nan') f[0] = nan
self.assertRaises(ValueError, lambda: integral_conv(f[0])) self.assertRaises(ValueError, lambda: integral_conv(f[0]))
f[0] = float('inf') f[0] = inf
self.assertRaises(OverflowError, lambda: integral_conv(f[0])) self.assertRaises(OverflowError, lambda: integral_conv(f[0]))
f[0] = float('-inf') f[0] = -inf
self.assertRaises(OverflowError, lambda: integral_conv(f[0])) self.assertRaises(OverflowError, lambda: integral_conv(f[0]))
f[0] = sys.float_info.max f[0] = sys.float_info.max
self.assertEqual(integral_conv(f), sys.float_info.max) self.assertEqual(integral_conv(f), sys.float_info.max)
@ -1558,9 +1559,9 @@ class TestAutograd(TestCase):
test_nonzero(l, -2, True) test_nonzero(l, -2, True)
test_nonzero(f, 0.0, False) test_nonzero(f, 0.0, False)
test_nonzero(f, sys.float_info.min, True) test_nonzero(f, sys.float_info.min, True)
test_nonzero(f, float('nan'), bool(float('nan'))) test_nonzero(f, nan, bool(nan))
test_nonzero(f, float('inf'), bool(float('inf'))) test_nonzero(f, inf, bool(inf))
test_nonzero(f, float('-inf'), bool(float('-inf'))) test_nonzero(f, -inf, bool(-inf))
def test_pyscalar_conversions(self): def test_pyscalar_conversions(self):
self._test_pyscalar_conversions(lambda x: x, lambda x: int(x)) self._test_pyscalar_conversions(lambda x: x, lambda x: int(x))
@ -2825,7 +2826,7 @@ method_tests = [
('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]), ('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]),
('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]), ('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]),
('renorm', (S, S, S), (1, 2, 3), 'norm_1'), ('renorm', (S, S, S), (1, 2, 3), 'norm_1'),
('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'), ('renorm', (S, S, S), (inf, 2, 0.5), 'norm_inf'),
('repeat', (S,), (2,), 'single_number'), ('repeat', (S,), (2,), 'single_number'),
('repeat', (), (2, 3), 'scalar'), ('repeat', (), (2, 3), 'scalar'),
('repeat', (2, 2), (3, 2)), ('repeat', (2, 2), (3, 2)),
@ -2917,7 +2918,7 @@ method_tests = [
('norm', (S, S), (0.5,), '0_5'), ('norm', (S, S), (0.5,), '0_5'),
('norm', (S, S), (1,), '1'), ('norm', (S, S), (1,), '1'),
('norm', (S, S), (3,), '3'), ('norm', (S, S), (3,), '3'),
('norm', (S, S), (float('inf'),), 'inf'), ('norm', (S, S), (inf,), 'inf'),
('norm', (S, S), (-1,), 'neg_1'), ('norm', (S, S), (-1,), 'neg_1'),
('norm', (S, S), (-0.5,), 'neg_0_5'), ('norm', (S, S), (-0.5,), 'neg_0_5'),
('norm', (S, S), (-1.5,), 'neg_1_5'), ('norm', (S, S), (-1.5,), 'neg_1_5'),

View File

@ -1,3 +1,4 @@
import os
import unittest import unittest
import sys import sys
@ -15,7 +16,10 @@ import common
from torch.utils.cpp_extension import CUDA_HOME from torch.utils.cpp_extension import CUDA_HOME
TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_available() TEST_CUDNN = False
if TEST_CUDA:
CUDNN_HEADER_EXISTS = os.path.isfile(os.path.join(CUDA_HOME, 'include/cudnn.h'))
TEST_CUDNN = TEST_CUDA and CUDNN_HEADER_EXISTS and torch.backends.cudnn.is_available()
class TestCppExtension(common.TestCase): class TestCppExtension(common.TestCase):

View File

@ -12,6 +12,7 @@ import torch
import torch.cuda import torch.cuda
import torch.cuda.comm as comm import torch.cuda.comm as comm
from torch import multiprocessing as mp from torch import multiprocessing as mp
from torch._six import inf, nan
from test_torch import TestTorch from test_torch import TestTorch
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \ from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \
@ -782,7 +783,7 @@ class TestCuda(TestCase):
if not end0: if not end0:
gen1_max_times = torch.LongTensor(1).random_(0, 3)[0] gen1_max_times = torch.LongTensor(1).random_(0, 3)[0]
else: else:
gen1_max_times = float('inf') gen1_max_times = inf
t = 0 t = 0
while t < gen1_max_times and not end1: while t < gen1_max_times and not end1:
end1 = advance(gen1, end1) end1 = advance(gen1, end1)
@ -901,7 +902,7 @@ class TestCuda(TestCase):
(lambda x: x.max(0)[0], 'max_dim')] (lambda x: x.max(0)[0], 'max_dim')]
for f, name in tests: for f, name in tests:
a = torch.arange(25.0).view(5, 5) a = torch.arange(25.0).view(5, 5)
a[2, 2] = float('nan') a[2, 2] = nan
actual = f(a.cuda()).cpu() actual = f(a.cuda()).cpu()
expected = f(a).cpu() expected = f(a).cpu()
self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name)) self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name))
@ -1503,9 +1504,9 @@ class TestCuda(TestCase):
def test_multinomial_invalid_probs_cuda(self): def test_multinomial_invalid_probs_cuda(self):
test_method = TestCuda._test_multinomial_invalid_probs_cuda test_method = TestCuda._test_multinomial_invalid_probs_cuda
self._spawn_method(test_method, torch.Tensor([0, -1])) self._spawn_method(test_method, torch.Tensor([0, -1]))
self._spawn_method(test_method, torch.Tensor([0, float('inf')])) self._spawn_method(test_method, torch.Tensor([0, inf]))
self._spawn_method(test_method, torch.Tensor([0, float('-inf')])) self._spawn_method(test_method, torch.Tensor([0, -inf]))
self._spawn_method(test_method, torch.Tensor([0, float('nan')])) self._spawn_method(test_method, torch.Tensor([0, nan]))
def test_broadcast(self): def test_broadcast(self):
TestTorch._test_broadcast(self, lambda t: t.cuda()) TestTorch._test_broadcast(self, lambda t: t.cuda())
@ -1686,7 +1687,6 @@ class TestCuda(TestCase):
cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111, cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111,
-100.99999994, -1931.99999994, 0.000000111, -100.99999994, -1931.99999994, 0.000000111,
-0.000000111, 0, -1, -2, -931]) -0.000000111, 0, -1, -2, -931])
nan = float('nan')
expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan]) expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan])
gpu_tensor = cpu_tensor.cuda() gpu_tensor = cpu_tensor.cuda()
cpu_out = cpu_tensor.digamma() cpu_out = cpu_tensor.digamma()

View File

@ -30,6 +30,7 @@ from itertools import product
from random import shuffle from random import shuffle
import torch import torch
from torch._six import inf
from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN
from common_cuda import TEST_CUDA from common_cuda import TEST_CUDA
from torch.autograd import grad, gradcheck from torch.autograd import grad, gradcheck
@ -782,7 +783,7 @@ class TestDistributions(TestCase):
s = 0.3 s = 0.3
self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3)) self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3))
self.assertEqual(Geometric(1).sample(), 0) self.assertEqual(Geometric(1).sample(), 0)
self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -float('inf'), allow_inf=True) self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -inf, allow_inf=True)
self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0) self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0)
self.assertFalse(Geometric(p).sample().requires_grad) self.assertFalse(Geometric(p).sample().requires_grad)
self.assertEqual(Geometric(r).sample((8,)).size(), (8,)) self.assertEqual(Geometric(r).sample((8,)).size(), (8,))
@ -1162,8 +1163,8 @@ class TestDistributions(TestCase):
uniform = Uniform(low_1d, high_1d) uniform = Uniform(low_1d, high_1d)
above_high = torch.tensor([4.0]) above_high = torch.tensor([4.0])
below_low = torch.tensor([-1.0]) below_low = torch.tensor([-1.0])
self.assertEqual(uniform.log_prob(above_high).item(), -float('inf'), allow_inf=True) self.assertEqual(uniform.log_prob(above_high).item(), -inf, allow_inf=True)
self.assertEqual(uniform.log_prob(below_low).item(), -float('inf'), allow_inf=True) self.assertEqual(uniform.log_prob(below_low).item(), -inf, allow_inf=True)
# check cdf computation when value outside range # check cdf computation when value outside range
self.assertEqual(uniform.cdf(below_low).item(), 0) self.assertEqual(uniform.cdf(below_low).item(), 0)
@ -1190,7 +1191,7 @@ class TestDistributions(TestCase):
loc_1d = torch.zeros(1, requires_grad=True) loc_1d = torch.zeros(1, requires_grad=True)
scale_1d = torch.ones(1, requires_grad=True) scale_1d = torch.ones(1, requires_grad=True)
self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean)) self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean))
self.assertEqual(Cauchy(loc_1d, scale_1d).variance, float('inf'), allow_inf=True) self.assertEqual(Cauchy(loc_1d, scale_1d).variance, inf, allow_inf=True)
self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5)) self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5))
self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5)) self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5))
self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,)) self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,))
@ -1216,7 +1217,7 @@ class TestDistributions(TestCase):
scale = torch.ones(5, 5, requires_grad=True) scale = torch.ones(5, 5, requires_grad=True)
scale_1d = torch.ones(1, requires_grad=True) scale_1d = torch.ones(1, requires_grad=True)
self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean)) self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean))
self.assertEqual(HalfCauchy(scale_1d).variance, float('inf'), allow_inf=True) self.assertEqual(HalfCauchy(scale_1d).variance, inf, allow_inf=True)
self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5)) self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5))
self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5)) self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5))
self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,)) self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,))
@ -1714,8 +1715,8 @@ class TestDistributions(TestCase):
alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)
scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True)
alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True)
self.assertEqual(Pareto(scale_1d, 0.5).mean, float('inf'), allow_inf=True) self.assertEqual(Pareto(scale_1d, 0.5).mean, inf, allow_inf=True)
self.assertEqual(Pareto(scale_1d, 0.5).variance, float('inf'), allow_inf=True) self.assertEqual(Pareto(scale_1d, 0.5).variance, inf, allow_inf=True)
self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3)) self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3))
self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3)) self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3))
self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1)) self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1))
@ -1832,7 +1833,7 @@ class TestDistributions(TestCase):
df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True)
self.assertTrue(is_all_nan(StudentT(1).mean)) self.assertTrue(is_all_nan(StudentT(1).mean))
self.assertTrue(is_all_nan(StudentT(1).variance)) self.assertTrue(is_all_nan(StudentT(1).variance))
self.assertEqual(StudentT(2).variance, float('inf'), allow_inf=True) self.assertEqual(StudentT(2).variance, inf, allow_inf=True)
self.assertEqual(StudentT(df).sample().size(), (2, 3)) self.assertEqual(StudentT(df).sample().size(), (2, 3))
self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3)) self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3))
self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1)) self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1))
@ -2962,7 +2963,7 @@ class TestKL(TestCase):
def test_kl_infinite(self): def test_kl_infinite(self):
for p, q in self.infinite_examples: for p, q in self.infinite_examples:
self.assertTrue((kl_divergence(p, q) == float('inf')).all(), self.assertTrue((kl_divergence(p, q) == inf).all(),
'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__)) 'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__))
def test_kl_edgecases(self): def test_kl_edgecases(self):
@ -2996,7 +2997,7 @@ class TestKL(TestCase):
continue continue
x = dist.sample(sample_shape=(60000,)) x = dist.sample(sample_shape=(60000,))
expected = -dist.log_prob(x).mean(0) expected = -dist.log_prob(x).mean(0)
ignore = (expected == float('inf')) ignore = (expected == inf)
expected[ignore] = actual[ignore] expected[ignore] = actual[ignore]
self.assertEqual(actual, expected, prec=0.2, message='\n'.join([ self.assertEqual(actual, expected, prec=0.2, message='\n'.join([
'{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)), '{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)),
@ -3157,12 +3158,12 @@ class TestNumericalStability(TestCase):
def test_categorical_log_prob_with_logits(self): def test_categorical_log_prob_with_logits(self):
for dtype in ([torch.float, torch.double]): for dtype in ([torch.float, torch.double]):
p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True) p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True)
categorical = OneHotCategorical(logits=p) categorical = OneHotCategorical(logits=p)
log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype)) log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype))
self.assertEqual(log_pdf_prob_1.item(), 0) self.assertEqual(log_pdf_prob_1.item(), 0)
log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype)) log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype))
self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True) self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True)
def test_multinomial_log_prob(self): def test_multinomial_log_prob(self):
for dtype in ([torch.float, torch.double]): for dtype in ([torch.float, torch.double]):
@ -3174,12 +3175,12 @@ class TestNumericalStability(TestCase):
def test_multinomial_log_prob_with_logits(self): def test_multinomial_log_prob_with_logits(self):
for dtype in ([torch.float, torch.double]): for dtype in ([torch.float, torch.double]):
p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True) p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True)
multinomial = Multinomial(10, logits=p) multinomial = Multinomial(10, logits=p)
log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype)) log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype))
self.assertEqual(log_pdf_prob_1.item(), 0) self.assertEqual(log_pdf_prob_1.item(), 0)
log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype)) log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype))
self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True) self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True)
class TestLazyLogitsInitialization(TestCase): class TestLazyLogitsInitialization(TestCase):

View File

@ -15,6 +15,7 @@ import hashlib
import os import os
import torch import torch
from torch._six import inf, nan
import torch.backends.cudnn as cudnn import torch.backends.cudnn as cudnn
import torch.nn as nn import torch.nn as nn
import torch.nn.functional as F import torch.nn.functional as F
@ -1465,7 +1466,7 @@ class TestNN(NNTestCase):
def compute_norm(norm_type): def compute_norm(norm_type):
norm_type = float(norm_type) norm_type = float(norm_type)
if norm_type != float('inf'): if norm_type != inf:
total_norm = 0 total_norm = 0
for p in l.parameters(): for p in l.parameters():
total_norm += p.grad.data.abs().pow(norm_type).sum() total_norm += p.grad.data.abs().pow(norm_type).sum()
@ -1560,8 +1561,6 @@ class TestNN(NNTestCase):
# We don't want to make propagating NaN a hard requirement on ops, but for # We don't want to make propagating NaN a hard requirement on ops, but for
# these easy ones, we should make them do so. # these easy ones, we should make them do so.
def _test_nonlinearity_propagate_nan(self, device): def _test_nonlinearity_propagate_nan(self, device):
nan = float('nan')
def test(nonlinearity, *args, **kwargs): def test(nonlinearity, *args, **kwargs):
x = torch.tensor([nan], device=device) x = torch.tensor([nan], device=device)
fn = getattr(F, nonlinearity) fn = getattr(F, nonlinearity)
@ -2547,7 +2546,7 @@ class TestNN(NNTestCase):
for num_dim in [1, 2, 3]: for num_dim in [1, 2, 3]:
fn_name = '{}max_pool{}d'.format(adaptive, num_dim) fn_name = '{}max_pool{}d'.format(adaptive, num_dim)
fn = getattr(F, fn_name) fn = getattr(F, fn_name)
x = torch.full([1, 1] + num_dim * [3], float('nan')) x = torch.full([1, 1] + num_dim * [3], nan)
res = fn(x, 1 if adaptive else 3) res = fn(x, 1 if adaptive else 3)
self.assertTrue(math.isnan(res.item())) self.assertTrue(math.isnan(res.item()))

View File

@ -3,6 +3,7 @@ import unittest
import functools import functools
from copy import deepcopy from copy import deepcopy
import torch import torch
from torch._six import inf
import torch.optim as optim import torch.optim as optim
import torch.legacy.optim as old_optim import torch.legacy.optim as old_optim
import torch.nn.functional as F import torch.nn.functional as F
@ -478,8 +479,8 @@ class TestOptim(TestCase):
@unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN") @unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN")
def test_lbfgs_return_type(self): def test_lbfgs_return_type(self):
params = [torch.randn(10, 5), torch.randn(10)] params = [torch.randn(10, 5), torch.randn(10)]
opt1 = optim.LBFGS(params, 0.01, tolerance_grad=float('inf')) opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf)
opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-float('inf')) opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf)
def closure(): def closure():
return torch.Tensor([10]) return torch.Tensor([10])

View File

@ -16,6 +16,7 @@ import gzip
from torch._utils_internal import get_file_path, get_file_path_2 from torch._utils_internal import get_file_path, get_file_path_2
from torch.utils.dlpack import from_dlpack, to_dlpack from torch.utils.dlpack import from_dlpack, to_dlpack
from torch._utils import _rebuild_tensor from torch._utils import _rebuild_tensor
from torch._six import inf, nan
from itertools import product, combinations from itertools import product, combinations
from functools import reduce from functools import reduce
from torch import multiprocessing as mp from torch import multiprocessing as mp
@ -241,17 +242,17 @@ class TestTorch(TestCase):
self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0)) self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0))
self.assertFalse(torch.allclose(x, y)) self.assertFalse(torch.allclose(x, y))
self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8]))) self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8])))
x = torch.tensor([2.0, 3.0, float('nan')]) x = torch.tensor([2.0, 3.0, nan])
y = torch.tensor([2.01, 3.01, float('nan')]) y = torch.tensor([2.01, 3.01, nan])
self.assertFalse(torch.allclose(x, y, rtol=1e-2)) self.assertFalse(torch.allclose(x, y, rtol=1e-2))
self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True)) self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True))
self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True)) self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True))
inf = torch.tensor([float('inf')]) inf_t = torch.tensor([inf])
self.assertTrue(torch.allclose(inf, inf)) self.assertTrue(torch.allclose(inf_t, inf_t))
self.assertTrue(torch.allclose(-inf, -inf)) self.assertTrue(torch.allclose(-inf_t, -inf_t))
self.assertFalse(torch.allclose(inf, -inf)) self.assertFalse(torch.allclose(inf_t, -inf_t))
self.assertFalse(torch.allclose(inf, torch.tensor([1e20]))) self.assertFalse(torch.allclose(inf_t, torch.tensor([1e20])))
self.assertFalse(torch.allclose(-inf, torch.tensor([-1e20]))) self.assertFalse(torch.allclose(-inf_t, torch.tensor([-1e20])))
def test_linear_algebra_scalar_raises(self): def test_linear_algebra_scalar_raises(self):
m = torch.randn(5, 5) m = torch.randn(5, 5)
@ -359,13 +360,13 @@ class TestTorch(TestCase):
try: try:
return math.sinh(x) return math.sinh(x)
except OverflowError: except OverflowError:
return float('inf') if x > 0 else float('-inf') return inf if x > 0 else -inf
self._test_math(torch.sinh, sinh) self._test_math(torch.sinh, sinh)
def test_lgamma(self): def test_lgamma(self):
def lgamma(x): def lgamma(x):
if x <= 0 and x == int(x): if x <= 0 and x == int(x):
return float('inf') return inf
return math.lgamma(x) return math.lgamma(x)
self._test_math(torch.lgamma, lgamma) self._test_math(torch.lgamma, lgamma)
@ -392,14 +393,14 @@ class TestTorch(TestCase):
# scipy 1.1.0 changed when it returns +/-inf vs. NaN # scipy 1.1.0 changed when it returns +/-inf vs. NaN
def torch_digamma_without_inf(inp): def torch_digamma_without_inf(inp):
res = torch.digamma(inp) res = torch.digamma(inp)
res[(res == float('-inf')) | (res == float('inf'))] = float('nan') res[(res == -inf) | (res == inf)] = nan
return res return res
def scipy_digamma_without_inf(inp): def scipy_digamma_without_inf(inp):
res = digamma(inp) res = digamma(inp)
if np.isscalar(res): if np.isscalar(res):
return res if np.isfinite(res) else float('nan') return res if np.isfinite(res) else nan
res[np.isinf(res)] = float('nan') res[np.isinf(res)] = nan
return res return res
self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input()) self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input())
@ -413,7 +414,7 @@ class TestTorch(TestCase):
self._digamma_input(test_poles=False)) self._digamma_input(test_poles=False))
def test_asin(self): def test_asin(self):
self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else float('nan')) self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else nan)
def test_cos(self): def test_cos(self):
self._test_math_by_name('cos') self._test_math_by_name('cos')
@ -425,11 +426,11 @@ class TestTorch(TestCase):
except OverflowError: except OverflowError:
# Return inf on overflow. # Return inf on overflow.
# See http://en.cppreference.com/w/cpp/numeric/math/cosh # See http://en.cppreference.com/w/cpp/numeric/math/cosh
return float('inf') return inf
self._test_math(torch.cosh, cosh) self._test_math(torch.cosh, cosh)
def test_acos(self): def test_acos(self):
self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else float('nan')) self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else nan)
def test_tan(self): def test_tan(self):
self._test_math_by_name('tan') self._test_math_by_name('tan')
@ -443,36 +444,36 @@ class TestTorch(TestCase):
def test_log(self): def test_log(self):
def log(x): def log(x):
if x == 0: if x == 0:
return float('-inf') return -inf
elif x < 0: elif x < 0:
return float('nan') return nan
return math.log(x) return math.log(x)
self._test_math(torch.log, log) self._test_math(torch.log, log)
def test_log10(self): def test_log10(self):
def log10(x): def log10(x):
if x == 0: if x == 0:
return float('-inf') return -inf
elif x < 0: elif x < 0:
return float('nan') return nan
return math.log10(x) return math.log10(x)
self._test_math(torch.log10, log10) self._test_math(torch.log10, log10)
def test_log1p(self): def test_log1p(self):
def log1p(x): def log1p(x):
if x == -1: if x == -1:
return float('-inf') return -inf
elif x < -1: elif x < -1:
return float('nan') return nan
return math.log1p(x) return math.log1p(x)
self._test_math(torch.log1p, log1p) self._test_math(torch.log1p, log1p)
def test_log2(self): def test_log2(self):
def log2(x): def log2(x):
if x == 0: if x == 0:
return float('-inf') return -inf
elif x < 0: elif x < 0:
return float('nan') return nan
try: try:
return math.log2(x) return math.log2(x)
except AttributeError: except AttributeError:
@ -480,7 +481,7 @@ class TestTorch(TestCase):
self._test_math(torch.log2, log2) self._test_math(torch.log2, log2)
def test_sqrt(self): def test_sqrt(self):
self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else float('nan')) self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else nan)
def test_erf(self): def test_erf(self):
self._test_math_by_name('erf') self._test_math_by_name('erf')
@ -493,9 +494,9 @@ class TestTorch(TestCase):
inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.) inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.)
self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues)) self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues))
# test inf # test inf
self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([float('-inf'), float('inf')]))) self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([-inf, inf])))
# test nan # test nan
self.assertEqual(tensor([-2, 2]).erfinv(), tensor([float('nan'), float('nan')])) self.assertEqual(tensor([-2, 2]).erfinv(), tensor([nan, nan]))
checkType(torch.FloatTensor) checkType(torch.FloatTensor)
checkType(torch.DoubleTensor) checkType(torch.DoubleTensor)
@ -505,7 +506,7 @@ class TestTorch(TestCase):
try: try:
return math.exp(x) return math.exp(x)
except OverflowError: except OverflowError:
return float('inf') return inf
self._test_math(torch.exp, exp) self._test_math(torch.exp, exp)
def test_expm1(self): def test_expm1(self):
@ -513,7 +514,7 @@ class TestTorch(TestCase):
try: try:
return math.expm1(x) return math.expm1(x)
except OverflowError: except OverflowError:
return float('inf') return inf
self._test_math(torch.expm1, expm1) self._test_math(torch.expm1, expm1)
def test_floor(self): def test_floor(self):
@ -525,9 +526,9 @@ class TestTorch(TestCase):
def test_rsqrt(self): def test_rsqrt(self):
def rsqrt(x): def rsqrt(x):
if x == 0: if x == 0:
return float('inf') return inf
elif x < 0: elif x < 0:
return float('nan') return nan
return 1.0 / math.sqrt(x) return 1.0 / math.sqrt(x)
self._test_math(torch.rsqrt, rsqrt) self._test_math(torch.rsqrt, rsqrt)
@ -615,7 +616,7 @@ class TestTorch(TestCase):
# NaNs # NaNs
for index in (0, 4, 99): for index in (0, 4, 99):
m1 = torch.randn(100) m1 = torch.randn(100)
m1[index] = float('nan') m1[index] = nan
res1val, res1ind = torch.max(m1, 0) res1val, res1ind = torch.max(m1, 0)
self.assertTrue(math.isnan(res1val)) self.assertTrue(math.isnan(res1val))
self.assertEqual(res1ind, index) self.assertEqual(res1ind, index)
@ -633,14 +634,14 @@ class TestTorch(TestCase):
# full reduction # full reduction
x = torch.randn(5, device=device) x = torch.randn(5, device=device)
xn = x.cpu().numpy() xn = x.cpu().numpy()
for p in [0, 1, 2, 3, 4, float('inf')]: for p in [0, 1, 2, 3, 4, inf]:
res = x.norm(p).item() res = x.norm(p).item()
expected = np.linalg.norm(xn, p) expected = np.linalg.norm(xn, p)
self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p)) self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p))
# one dimension # one dimension
x = torch.randn(5, 5, device=device) x = torch.randn(5, 5, device=device)
xn = x.cpu().numpy() xn = x.cpu().numpy()
for p in [0, 1, 2, 3, 4, float('inf')]: for p in [0, 1, 2, 3, 4, inf]:
res = x.norm(p, 1).cpu().numpy() res = x.norm(p, 1).cpu().numpy()
expected = np.linalg.norm(xn, p, 1) expected = np.linalg.norm(xn, p, 1)
self.assertEqual(res.shape, expected.shape) self.assertEqual(res.shape, expected.shape)
@ -808,10 +809,10 @@ class TestTorch(TestCase):
('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1), ('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1),
('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0), ('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0),
('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0), ('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0),
('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), float('nan')), ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), nan),
('var', lambda *args, **kwargs: torch.var(*args, **kwargs), float('nan')), ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), nan),
('std', lambda *args, **kwargs: torch.std(*args, **kwargs), float('nan')), ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), nan),
('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), float('-inf')), ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), -inf),
] ]
devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
@ -878,8 +879,8 @@ class TestTorch(TestCase):
def test_logsumexp(self): def test_logsumexp(self):
from scipy.special import logsumexp from scipy.special import logsumexp
a = torch.randn(5, 4) a = torch.randn(5, 4)
a[0, 0] = float('inf') a[0, 0] = inf
a[1, :] = float('-inf') a[1, :] = -inf
actual = a.logsumexp(1) actual = a.logsumexp(1)
expected = logsumexp(a.numpy(), 1) expected = logsumexp(a.numpy(), 1)
self.assertEqual(expected.shape, actual.shape) self.assertEqual(expected.shape, actual.shape)
@ -1540,7 +1541,7 @@ class TestTorch(TestCase):
self._test_cop(torch.mul, lambda x, y: x * y) self._test_cop(torch.mul, lambda x, y: x * y)
def test_cpow(self): def test_cpow(self):
self._test_cop(torch.pow, lambda x, y: float('nan') if x < 0 else math.pow(x, y)) self._test_cop(torch.pow, lambda x, y: nan if x < 0 else math.pow(x, y))
@unittest.skipIf(not TEST_NUMPY, 'Numpy not found') @unittest.skipIf(not TEST_NUMPY, 'Numpy not found')
def test_einsum(self): def test_einsum(self):
@ -2416,7 +2417,7 @@ class TestTorch(TestCase):
# full reduction # full reduction
x = torch.randn(5, 5) x = torch.randn(5, 5)
xn = x.numpy() xn = x.numpy()
for p in [1, 2, 3, 4, float('inf')]: for p in [1, 2, 3, 4, inf]:
res = x.renorm(p, 1, 1) res = x.renorm(p, 1, 1)
expected = x / x.norm(p, 0, keepdim=True).clamp(min=1) expected = x / x.norm(p, 0, keepdim=True).clamp(min=1)
self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p)) self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p))
@ -2532,9 +2533,9 @@ class TestTorch(TestCase):
def test_multinomial_invalid_probs(self): def test_multinomial_invalid_probs(self):
test_method = TestTorch._test_multinomial_invalid_probs test_method = TestTorch._test_multinomial_invalid_probs
self._spawn_method(test_method, torch.Tensor([0, -1])) self._spawn_method(test_method, torch.Tensor([0, -1]))
self._spawn_method(test_method, torch.Tensor([0, float('inf')])) self._spawn_method(test_method, torch.Tensor([0, inf]))
self._spawn_method(test_method, torch.Tensor([0, float('-inf')])) self._spawn_method(test_method, torch.Tensor([0, -inf]))
self._spawn_method(test_method, torch.Tensor([0, float('nan')])) self._spawn_method(test_method, torch.Tensor([0, nan]))
@suppress_warnings @suppress_warnings
def test_range(self): def test_range(self):
@ -4672,15 +4673,15 @@ class TestTorch(TestCase):
self.assertEqual(x.nelement(), all.long().sum()) self.assertEqual(x.nelement(), all.long().sum())
def test_isfinite(self): def test_isfinite(self):
x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan'), -10]) x = torch.Tensor([1, inf, 2, -inf, nan, -10])
self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1])) self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1]))
def test_isinf(self): def test_isinf(self):
x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan')]) x = torch.Tensor([1, inf, 2, -inf, nan])
self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0])) self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0]))
def test_isnan(self): def test_isnan(self):
x = torch.Tensor([1, float('nan'), 2]) x = torch.Tensor([1, nan, 2])
self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0])) self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0]))
def test_RNGState(self): def test_RNGState(self):
@ -7418,7 +7419,7 @@ class TestTorch(TestCase):
self.assertExpected(str(x), subname='negint') self.assertExpected(str(x), subname='negint')
# test inf and nan # test inf and nan
x = torch.tensor([4, float('inf'), 1.5, float('-inf'), 0, float('nan'), 1]) x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1])
self.assertEqual(x.__repr__(), str(x)) self.assertEqual(x.__repr__(), str(x))
self.assertExpected(str(x), subname='nonfinite') self.assertExpected(str(x), subname='nonfinite')

View File

@ -413,6 +413,7 @@ class TestFFI(TestCase):
@unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package") @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
@unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows") @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows")
def test_gpu(self): def test_gpu(self):
from torch.utils.cpp_extension import CUDA_HOME
create_extension( create_extension(
name='gpulib', name='gpulib',
headers=[test_dir + '/ffi/src/cuda/cudalib.h'], headers=[test_dir + '/ffi/src/cuda/cudalib.h'],
@ -421,6 +422,7 @@ class TestFFI(TestCase):
], ],
with_cuda=True, with_cuda=True,
verbose=False, verbose=False,
include_dirs=[os.path.join(CUDA_HOME, 'include')],
).build() ).build()
import gpulib import gpulib
tensor = torch.ones(2, 2).float() tensor = torch.ones(2, 2).float()

View File

@ -41,6 +41,9 @@ while [[ $# -gt 0 ]]; do
--full-caffe2) --full-caffe2)
FULL_CAFFE2=1 FULL_CAFFE2=1
;; ;;
--cuda-static-link)
CAFFE2_STATIC_LINK_CUDA=1
;;
*) *)
break break
;; ;;
@ -261,6 +264,7 @@ function build_caffe2() {
-DBUILD_SHARED_LIBS=ON \ -DBUILD_SHARED_LIBS=ON \
-DONNX_NAMESPACE=$ONNX_NAMESPACE \ -DONNX_NAMESPACE=$ONNX_NAMESPACE \
-DUSE_CUDA=$USE_CUDA \ -DUSE_CUDA=$USE_CUDA \
-DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \
-DUSE_ROCM=$USE_ROCM \ -DUSE_ROCM=$USE_ROCM \
-DUSE_NNPACK=$USE_NNPACK \ -DUSE_NNPACK=$USE_NNPACK \
-DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \ -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \

View File

@ -25,6 +25,13 @@ import sys
PY2 = sys.version_info[0] == 2 PY2 = sys.version_info[0] == 2
PY3 = sys.version_info[0] == 3 PY3 = sys.version_info[0] == 3
if PY2:
inf = float('inf')
nan = float('nan')
else:
import math
inf = math.inf
nan = math.nan
if PY2: if PY2:
string_classes = basestring string_classes = basestring

View File

@ -1743,8 +1743,8 @@ scatter_(dim, index, src) -> Tensor
Writes all values from the tensor :attr:`src` into :attr:`self` at the indices Writes all values from the tensor :attr:`src` into :attr:`self` at the indices
specified in the :attr:`index` tensor. For each value in :attr:`src`, its output specified in the :attr:`index` tensor. For each value in :attr:`src`, its output
index is specified by its index in :attr:`src` for dimension != :attr:`dim` and index is specified by its index in :attr:`src` for ``dimension != dim`` and by
by the corresponding value in :attr:`index` for dimension = :attr:`dim`. the corresponding value in :attr:`index` for ``dimension = dim``.
For a 3-D tensor, :attr:`self` is updated as:: For a 3-D tensor, :attr:`self` is updated as::
@ -1754,14 +1754,14 @@ For a 3-D tensor, :attr:`self` is updated as::
This is the reverse operation of the manner described in :meth:`~Tensor.gather`. This is the reverse operation of the manner described in :meth:`~Tensor.gather`.
:attr:`self`, :attr:`index` and :attr:`src` should have same number of :attr:`self`, :attr:`index` and :attr:`src` (if it is a Tensor) should have same
dimensions. It is also required that `index.size(d) <= src.size(d)` for all number of dimensions. It is also required that ``index.size(d) <= src.size(d)``
dimensions `d`, and that `index.size(d) <= self.size(d)` for all dimensions for all dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all
`d != dim`. dimensions ``d != dim``.
Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be
between `0` and `(self.size(dim) -1)` inclusive, and all values in a row along between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row
the specified dimension :attr:`dim` must be unique. along the specified dimension :attr:`dim` must be unique.
Args: Args:
dim (int): the axis along which to index dim (int): the axis along which to index
@ -1785,6 +1785,50 @@ Example::
[ 0.0000, 0.0000, 0.0000, 1.2300]]) [ 0.0000, 0.0000, 0.0000, 1.2300]])
""") """)
add_docstr_all('scatter_add_',
r"""
scatter_add_(dim, index, other) -> Tensor
Adds all values from the tensor :attr:`other` into :attr:`self` at the indices
specified in the :attr:`index` tensor in a similar fashion as
:meth:`~torch.Tensor.scatter_`. For each value in :attr:`other`, it is added to
an index in :attr:`self` which is specified by its index in :attr:`other`
for ``dimension != dim`` and by the corresponding value in :attr:`index` for
``dimension = dim``.
For a 3-D tensor, :attr:`self` is updated as::
self[index[i][j][k]][j][k] += other[i][j][k] # if dim == 0
self[i][index[i][j][k]][k] += other[i][j][k] # if dim == 1
self[i][j][index[i][j][k]] += other[i][j][k] # if dim == 2
:attr:`self`, :attr:`index` and :attr:`other` should have same number of
dimensions. It is also required that ``index.size(d) <= other.size(d)`` for all
dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all dimensions
``d != dim``.
Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be
between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row along
the specified dimension :attr:`dim` must be unique.
Args:
dim (int): the axis along which to index
index (LongTensor): the indices of elements to scatter and add
other (Tensor): the source elements to scatter and add
Example::
>>> x = torch.rand(2, 5)
>>> x
tensor([[0.7404, 0.0427, 0.6480, 0.3806, 0.8328],
[0.7953, 0.2009, 0.9154, 0.6782, 0.9620]])
>>> torch.ones(3, 5).scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x)
tensor([[1.7404, 1.2009, 1.9154, 1.3806, 1.8328],
[1.0000, 1.0427, 1.0000, 1.6782, 1.0000],
[1.7953, 1.0000, 1.6480, 1.0000, 1.9620]])
""")
add_docstr_all('select', add_docstr_all('select',
r""" r"""
select(dim, index) -> Tensor select(dim, index) -> Tensor

View File

@ -2,6 +2,7 @@ import math
import torch import torch
from functools import reduce from functools import reduce
from sys import float_info from sys import float_info
from torch._six import inf, nan
class __PrinterOptions(object): class __PrinterOptions(object):
@ -50,7 +51,7 @@ def set_printoptions(
PRINT_OPTS.linewidth = 80 PRINT_OPTS.linewidth = 80
elif profile == "full": elif profile == "full":
PRINT_OPTS.precision = 4 PRINT_OPTS.precision = 4
PRINT_OPTS.threshold = float('inf') PRINT_OPTS.threshold = inf
PRINT_OPTS.edgeitems = 3 PRINT_OPTS.edgeitems = 3
PRINT_OPTS.linewidth = 80 PRINT_OPTS.linewidth = 80
@ -101,8 +102,8 @@ class _Formatter(object):
else: else:
copy_abs = copy.abs() copy_abs = copy.abs()
pos_inf_mask = copy_abs.eq(float('inf')) pos_inf_mask = copy_abs.eq(inf)
neg_inf_mask = copy_abs.eq(float('-inf')) neg_inf_mask = copy_abs.eq(-inf)
nan_mask = copy_abs.ne(copy) nan_mask = copy_abs.ne(copy)
invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask
if invalid_value_mask.all(): if invalid_value_mask.all():

View File

@ -1,4 +1,5 @@
import torch import torch
from torch._six import nan
from torch.distributions import constraints from torch.distributions import constraints
from torch.distributions.distribution import Distribution from torch.distributions.distribution import Distribution
from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all
@ -72,11 +73,11 @@ class Categorical(Distribution):
@property @property
def mean(self): def mean(self):
return self.probs.new_tensor(float('nan')).expand(self._extended_shape()) return self.probs.new_tensor(nan).expand(self._extended_shape())
@property @property
def variance(self): def variance(self):
return self.probs.new_tensor(float('nan')).expand(self._extended_shape()) return self.probs.new_tensor(nan).expand(self._extended_shape())
def sample(self, sample_shape=torch.Size()): def sample(self, sample_shape=torch.Size()):
sample_shape = self._extended_shape(sample_shape) sample_shape = self._extended_shape(sample_shape)

View File

@ -1,4 +1,5 @@
import math import math
from torch._six import inf, nan
from numbers import Number from numbers import Number
import torch import torch
@ -37,11 +38,11 @@ class Cauchy(Distribution):
@property @property
def mean(self): def mean(self):
return self.loc.new_tensor(float('nan')).expand(self._extended_shape()) return self.loc.new_tensor(nan).expand(self._extended_shape())
@property @property
def variance(self): def variance(self):
return self.loc.new_tensor(float('inf')).expand(self._extended_shape()) return self.loc.new_tensor(inf).expand(self._extended_shape())
def rsample(self, sample_shape=torch.Size()): def rsample(self, sample_shape=torch.Size()):
shape = self._extended_shape(sample_shape) shape = self._extended_shape(sample_shape)

View File

@ -1,6 +1,7 @@
from numbers import Number from numbers import Number
import torch import torch
import math import math
from torch._six import nan
from torch.distributions import constraints from torch.distributions import constraints
from torch.distributions.distribution import Distribution from torch.distributions.distribution import Distribution
from torch.distributions.gamma import Gamma from torch.distributions.gamma import Gamma
@ -39,13 +40,13 @@ class FisherSnedecor(Distribution):
@property @property
def mean(self): def mean(self):
df2 = self.df2.clone() df2 = self.df2.clone()
df2[df2 <= 2] = float('nan') df2[df2 <= 2] = nan
return df2 / (df2 - 2) return df2 / (df2 - 2)
@property @property
def variance(self): def variance(self):
df2 = self.df2.clone() df2 = self.df2.clone()
df2[df2 <= 4] = float('nan') df2[df2 <= 4] = nan
return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4)) return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4))
def rsample(self, sample_shape=torch.Size(())): def rsample(self, sample_shape=torch.Size(())):

View File

@ -1,5 +1,6 @@
import math import math
from torch._six import inf
from torch.distributions import constraints from torch.distributions import constraints
from torch.distributions.transforms import AbsTransform from torch.distributions.transforms import AbsTransform
from torch.distributions.cauchy import Cauchy from torch.distributions.cauchy import Cauchy
@ -44,7 +45,7 @@ class HalfCauchy(TransformedDistribution):
def log_prob(self, value): def log_prob(self, value):
log_prob = self.base_dist.log_prob(value) + math.log(2) log_prob = self.base_dist.log_prob(value) + math.log(2)
log_prob[value.expand(log_prob.shape) < 0] = -float('inf') log_prob[value.expand(log_prob.shape) < 0] = -inf
return log_prob return log_prob
def cdf(self, value): def cdf(self, value):

View File

@ -1,5 +1,6 @@
import math import math
from torch._six import inf
from torch.distributions import constraints from torch.distributions import constraints
from torch.distributions.transforms import AbsTransform from torch.distributions.transforms import AbsTransform
from torch.distributions.normal import Normal from torch.distributions.normal import Normal
@ -44,7 +45,7 @@ class HalfNormal(TransformedDistribution):
def log_prob(self, value): def log_prob(self, value):
log_prob = self.base_dist.log_prob(value) + math.log(2) log_prob = self.base_dist.log_prob(value) + math.log(2)
log_prob[value.expand(log_prob.shape) < 0] = -float('inf') log_prob[value.expand(log_prob.shape) < 0] = -inf
return log_prob return log_prob
def cdf(self, value): def cdf(self, value):

View File

@ -3,6 +3,7 @@ import warnings
from functools import total_ordering from functools import total_ordering
import torch import torch
from torch._six import inf
from .bernoulli import Bernoulli from .bernoulli import Bernoulli
from .beta import Beta from .beta import Beta
@ -113,7 +114,7 @@ def _infinite_like(tensor):
""" """
Helper function for obtaining infinite KL Divergence throughout Helper function for obtaining infinite KL Divergence throughout
""" """
return tensor.new_tensor(float('inf')).expand_as(tensor) return tensor.new_tensor(inf).expand_as(tensor)
def _x_log_x(tensor): def _x_log_x(tensor):
@ -173,10 +174,10 @@ _euler_gamma = 0.57721566490153286060
@register_kl(Bernoulli, Bernoulli) @register_kl(Bernoulli, Bernoulli)
def _kl_bernoulli_bernoulli(p, q): def _kl_bernoulli_bernoulli(p, q):
t1 = p.probs * (p.probs / q.probs).log() t1 = p.probs * (p.probs / q.probs).log()
t1[q.probs == 0] = float('inf') t1[q.probs == 0] = inf
t1[p.probs == 0] = 0 t1[p.probs == 0] = 0
t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log() t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log()
t2[q.probs == 1] = float('inf') t2[q.probs == 1] = inf
t2[p.probs == 1] = 0 t2[p.probs == 1] = 0
return t1 + t2 return t1 + t2
@ -208,7 +209,7 @@ def _kl_binomial_binomial(p, q):
@register_kl(Categorical, Categorical) @register_kl(Categorical, Categorical)
def _kl_categorical_categorical(p, q): def _kl_categorical_categorical(p, q):
t = p.probs * (p.logits - q.logits) t = p.probs * (p.logits - q.logits)
t[q.probs == 0] = float('inf') t[q.probs == 0] = inf
t[p.probs == 0] = 0 t[p.probs == 0] = 0
return t.sum(-1) return t.sum(-1)
@ -322,7 +323,7 @@ def _kl_pareto_pareto(p, q):
t1 = q.alpha * scale_ratio.log() t1 = q.alpha * scale_ratio.log()
t2 = -alpha_ratio.log() t2 = -alpha_ratio.log()
result = t1 + t2 + alpha_ratio - 1 result = t1 + t2 + alpha_ratio - 1
result[p.support.lower_bound < q.support.lower_bound] = float('inf') result[p.support.lower_bound < q.support.lower_bound] = inf
return result return result
@ -346,7 +347,7 @@ def _kl_transformed_transformed(p, q):
@register_kl(Uniform, Uniform) @register_kl(Uniform, Uniform)
def _kl_uniform_uniform(p, q): def _kl_uniform_uniform(p, q):
result = ((q.high - q.low) / (p.high - p.low)).log() result = ((q.high - q.low) / (p.high - p.low)).log()
result[(q.low > p.low) | (q.high < p.high)] = float('inf') result[(q.low > p.low) | (q.high < p.high)] = inf
return result return result
@ -392,7 +393,7 @@ def _kl_beta_normal(p, q):
@register_kl(Beta, Uniform) @register_kl(Beta, Uniform)
def _kl_beta_uniform(p, q): def _kl_beta_uniform(p, q):
result = -p.entropy() + (q.high - q.low).log() result = -p.entropy() + (q.high - q.low).log()
result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = float('inf') result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = inf
return result return result
@ -543,7 +544,7 @@ def _kl_pareto_exponential(p, q):
t2 = p.alpha.reciprocal() t2 = p.alpha.reciprocal()
t3 = p.alpha * scale_rate_prod / (p.alpha - 1) t3 = p.alpha * scale_rate_prod / (p.alpha - 1)
result = t1 - t2 + t3 - 1 result = t1 - t2 + t3 - 1
result[p.alpha <= 1] = float('inf') result[p.alpha <= 1] = inf
return result return result
@ -555,7 +556,7 @@ def _kl_pareto_gamma(p, q):
t3 = (1 - q.concentration) * common_term t3 = (1 - q.concentration) * common_term
t4 = q.rate * p.alpha * p.scale / (p.alpha - 1) t4 = q.rate * p.alpha * p.scale / (p.alpha - 1)
result = t1 + t2 + t3 + t4 - 1 result = t1 + t2 + t3 + t4 - 1
result[p.alpha <= 1] = float('inf') result[p.alpha <= 1] = inf
return result return result
# TODO: Add Pareto-Laplace KL Divergence # TODO: Add Pareto-Laplace KL Divergence
@ -570,7 +571,7 @@ def _kl_pareto_normal(p, q):
t3 = p.alpha * common_term.pow(2) / (p.alpha - 2) t3 = p.alpha * common_term.pow(2) / (p.alpha - 2)
t4 = (p.alpha * common_term - q.loc).pow(2) t4 = (p.alpha * common_term - q.loc).pow(2)
result = t1 - t2 + (t3 + t4) / var_normal - 1 result = t1 - t2 + (t3 + t4) / var_normal - 1
result[p.alpha <= 2] = float('inf') result[p.alpha <= 2] = inf
return result return result
@ -588,14 +589,14 @@ def _kl_uniform_beta(p, q):
t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term
t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma() t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma()
result = t3 + t4 - t1 - t2 result = t3 + t4 - t1 - t2
result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = float('inf') result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = inf
return result return result
@register_kl(Uniform, Exponential) @register_kl(Uniform, Exponential)
def _kl_uniform_exponetial(p, q): def _kl_uniform_exponetial(p, q):
result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log() result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log()
result[p.low < q.support.lower_bound] = float('inf') result[p.low < q.support.lower_bound] = inf
return result return result
@ -607,7 +608,7 @@ def _kl_uniform_gamma(p, q):
t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term
t4 = q.rate * (p.high + p.low) / 2 t4 = q.rate * (p.high + p.low) / 2
result = -t1 + t2 + t3 + t4 result = -t1 + t2 + t3 + t4
result[p.low < q.support.lower_bound] = float('inf') result[p.low < q.support.lower_bound] = inf
return result return result
@ -638,5 +639,5 @@ def _kl_uniform_pareto(p, q):
t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log() t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log()
t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform
result = t2 * (q.alpha + 1) - t1 result = t2 * (q.alpha + 1) - t1
result[p.low < q.support.lower_bound] = float('inf') result[p.low < q.support.lower_bound] = inf
return result return result

View File

@ -1,4 +1,5 @@
import torch import torch
from torch._six import inf
from torch.distributions.distribution import Distribution from torch.distributions.distribution import Distribution
from torch.distributions import Categorical from torch.distributions import Categorical
from numbers import Number from numbers import Number
@ -93,6 +94,6 @@ class Multinomial(Distribution):
logits, value = broadcast_all(self.logits.clone(), value) logits, value = broadcast_all(self.logits.clone(), value)
log_factorial_n = torch.lgamma(value.sum(-1) + 1) log_factorial_n = torch.lgamma(value.sum(-1) + 1)
log_factorial_xs = torch.lgamma(value + 1).sum(-1) log_factorial_xs = torch.lgamma(value + 1).sum(-1)
logits[(value == 0) & (logits == -float('inf'))] = 0 logits[(value == 0) & (logits == -inf)] = 0
log_powers = (logits * value).sum(-1) log_powers = (logits * value).sum(-1)
return log_factorial_n - log_factorial_xs + log_powers return log_factorial_n - log_factorial_xs + log_powers

View File

@ -1,5 +1,6 @@
from numbers import Number from numbers import Number
import torch import torch
from torch._six import inf, nan
import math import math
from torch.distributions import constraints from torch.distributions import constraints
from torch.distributions.distribution import Distribution from torch.distributions.distribution import Distribution
@ -27,15 +28,15 @@ class StudentT(Distribution):
@property @property
def mean(self): def mean(self):
m = self.loc.clone() m = self.loc.clone()
m[self.df <= 1] = float('nan') m[self.df <= 1] = nan
return m return m
@property @property
def variance(self): def variance(self):
m = self.df.clone() m = self.df.clone()
m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2) m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2)
m[(self.df <= 2) & (self.df > 1)] = float('inf') m[(self.df <= 2) & (self.df > 1)] = inf
m[self.df <= 1] = float('nan') m[self.df <= 1] = nan
return m return m
def __init__(self, df, loc=0., scale=1., validate_args=None): def __init__(self, df, loc=0., scale=1., validate_args=None):

View File

@ -1,5 +1,6 @@
import torch import torch
import torch.nn.functional as F import torch.nn.functional as F
from torch._six import inf
from operator import mul from operator import mul
from functools import reduce from functools import reduce
import math import math
@ -155,7 +156,7 @@ def isfinite(tensor):
""" """
if not isinstance(tensor, torch.Tensor): if not isinstance(tensor, torch.Tensor):
raise ValueError("The argument is not a tensor", str(tensor)) raise ValueError("The argument is not a tensor", str(tensor))
return (tensor == tensor) & (tensor.abs() != float('inf')) return (tensor == tensor) & (tensor.abs() != inf)
def isinf(tensor): def isinf(tensor):
@ -174,7 +175,7 @@ def isinf(tensor):
""" """
if not isinstance(tensor, torch.Tensor): if not isinstance(tensor, torch.Tensor):
raise ValueError("The argument is not a tensor", str(tensor)) raise ValueError("The argument is not a tensor", str(tensor))
return tensor.abs() == float('inf') return tensor.abs() == inf
def stft(input, n_fft, hop_length=None, win_length=None, window=None, def stft(input, n_fft, hop_length=None, win_length=None, window=None,

View File

@ -1,4 +1,5 @@
import torch import torch
from torch._six import inf
from .Module import Module from .Module import Module
from .utils import clear from .utils import clear
@ -34,7 +35,7 @@ class Normalize(Module):
self._output.resize_as_(input) self._output.resize_as_(input)
# specialization for the infinity norm # specialization for the infinity norm
if self.p == float('inf'): if self.p == inf:
if not self._indices: if not self._indices:
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \ self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
else torch.LongTensor() else torch.LongTensor()
@ -72,7 +73,7 @@ class Normalize(Module):
self.cross = input.new() self.cross = input.new()
# compute diagonal term with gradOutput # compute diagonal term with gradOutput
self._gradInput.resize_(n, d) self._gradInput.resize_(n, d)
if self.p == float('inf'): if self.p == inf:
# specialization for the inf case # specialization for the inf case
torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput) torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput)
self.buffer.resize_as_(input).zero_() self.buffer.resize_as_(input).zero_()
@ -113,7 +114,7 @@ class Normalize(Module):
self._gradInput.add_(-1, self.buffer) self._gradInput.add_(-1, self.buffer)
# reuse cross buffer for normalization # reuse cross buffer for normalization
if self.p == float('inf'): if self.p == inf:
torch.mul(self.norm, self.norm, out=self.cross) torch.mul(self.norm, self.norm, out=self.cross)
else: else:
torch.mul(self.normp, self.norm, out=self.cross) torch.mul(self.normp, self.norm, out=self.cross)

View File

@ -1,10 +1,11 @@
import math import math
INFINITY = float('inf') INFINITY = float('inf')
NAN = float('nan')
def sqrt_nothrow(x): def sqrt_nothrow(x):
return math.sqrt(x) if x >= 0 else float('nan') return math.sqrt(x) if x >= 0 else NAN
def cg(opfunc, x, config, state=None): def cg(opfunc, x, config, state=None):
@ -145,7 +146,7 @@ def cg(opfunc, x, config, state=None):
A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2) B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
_denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3)) _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3))
z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan') z2 = -d2 * z3 * z3 / _denom if _denom != 0 else NAN
if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0: if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0:
if limit < -0.5: if limit < -0.5:

View File

@ -523,7 +523,7 @@ class BCEWithLogitsLoss(_Loss):
:math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision. :math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision.
For example, if a dataset contains 100 positive and 300 negative examples of a single class, For example, if a dataset contains 100 positive and 300 negative examples of a single class,
then `pos_weight` for the class should be equal to math:`\frac{300}{100}=3`. then `pos_weight` for the class should be equal to :math:`\frac{300}{100}=3`.
The loss would act as if the dataset contains math:`3\times 100=300` positive examples. The loss would act as if the dataset contains math:`3\times 100=300` positive examples.
Args: Args:

View File

@ -691,7 +691,7 @@ class _LPPoolNd(Module):
self.ceil_mode = ceil_mode self.ceil_mode = ceil_mode
def extra_repr(self): def extra_repr(self):
return 'norm_type={norm_type}, kernel_size{kernel_size}, stride={stride}, ' \ return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \
'ceil_mode={ceil_mode}'.format(**self.__dict__) 'ceil_mode={ceil_mode}'.format(**self.__dict__)

View File

@ -1,5 +1,6 @@
import warnings import warnings
import torch import torch
from torch._six import inf
def clip_grad_norm_(parameters, max_norm, norm_type=2): def clip_grad_norm_(parameters, max_norm, norm_type=2):
@ -23,7 +24,7 @@ def clip_grad_norm_(parameters, max_norm, norm_type=2):
parameters = list(filter(lambda p: p.grad is not None, parameters)) parameters = list(filter(lambda p: p.grad is not None, parameters))
max_norm = float(max_norm) max_norm = float(max_norm)
norm_type = float(norm_type) norm_type = float(norm_type)
if norm_type == float('inf'): if norm_type == inf:
total_norm = max(p.grad.data.abs().max() for p in parameters) total_norm = max(p.grad.data.abs().max() for p in parameters)
else: else:
total_norm = 0 total_norm = 0

View File

@ -1,4 +1,6 @@
import math import math
import torch
from torch._six import inf
from bisect import bisect_right from bisect import bisect_right
from functools import partial from functools import partial
from .optimizer import Optimizer from .optimizer import Optimizer
@ -367,9 +369,9 @@ class ReduceLROnPlateau(object):
raise ValueError('threshold mode ' + threshold_mode + ' is unknown!') raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
if mode == 'min': if mode == 'min':
self.mode_worse = float('inf') self.mode_worse = inf
else: # mode == 'max': else: # mode == 'max':
self.mode_worse = (-float('inf')) self.mode_worse = -inf
self.is_better = partial(self._cmp, mode, threshold_mode, threshold) self.is_better = partial(self._cmp, mode, threshold_mode, threshold)

View File

@ -65,6 +65,10 @@ CUDA_HOME = _find_cuda_home()
BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+') BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+')
def is_binary_build():
return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__)
def check_compiler_abi_compatibility(compiler): def check_compiler_abi_compatibility(compiler):
''' '''
Verifies that the given compiler is ABI-compatible with PyTorch. Verifies that the given compiler is ABI-compatible with PyTorch.
@ -77,7 +81,7 @@ def check_compiler_abi_compatibility(compiler):
False if the compiler is (likely) ABI-incompatible with PyTorch, False if the compiler is (likely) ABI-incompatible with PyTorch,
else True. else True.
''' '''
if BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__): if not is_binary_build():
return True return True
try: try:
check_cmd = '{}' if sys.platform == 'win32' else '{} --version' check_cmd = '{}' if sys.platform == 'win32' else '{} --version'
@ -134,6 +138,7 @@ class BuildExtension(build_ext):
self._check_abi() self._check_abi()
for extension in self.extensions: for extension in self.extensions:
self._define_torch_extension_name(extension) self._define_torch_extension_name(extension)
self._add_gnu_abi_flag_if_binary(extension)
# Register .cu and .cuh as valid source extensions. # Register .cu and .cuh as valid source extensions.
self.compiler.src_extensions += ['.cu', '.cuh'] self.compiler.src_extensions += ['.cu', '.cuh']
@ -266,6 +271,21 @@ class BuildExtension(build_ext):
else: else:
extension.extra_compile_args.append(define) extension.extra_compile_args.append(define)
def _add_gnu_abi_flag_if_binary(self, extension):
# If the version string looks like a binary build,
# we know that PyTorch was compiled with gcc 4.9.2.
# if the extension is compiled with gcc >= 5.1,
# then we have to define _GLIBCXX_USE_CXX11_ABI=0
# so that the std::string in the API is resolved to
# non-C++11 symbols
define = '-D_GLIBCXX_USE_CXX11_ABI=0'
if is_binary_build():
if isinstance(extension.extra_compile_args, dict):
for args in extension.extra_compile_args.values():
args.append(define)
else:
extension.extra_compile_args.append(define)
def CppExtension(name, sources, *args, **kwargs): def CppExtension(name, sources, *args, **kwargs):
''' '''
@ -785,6 +805,9 @@ def _write_ninja_file(path,
common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
common_cflags += ['-I{}'.format(include) for include in includes] common_cflags += ['-I{}'.format(include) for include in includes]
if is_binary_build():
common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0']
cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
if sys.platform == 'win32': if sys.platform == 'win32':
from distutils.spawn import _nt_quote_args from distutils.spawn import _nt_quote_args