mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-26 00:24:53 +08:00 
			
		
		
		
	Compare commits
	
		
			17 Commits
		
	
	
		
			v1.12.0-rc
			...
			v0.4.1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a24163a95e | |||
| f08f222db3 | |||
| 8f916179f8 | |||
| 7b7e6dbfa7 | |||
| 84b8c1c357 | |||
| b595c3e9ca | |||
| 6ecc275272 | |||
| f34528a723 | |||
| 2edf053549 | |||
| 76c16a5a64 | |||
| f6fac92692 | |||
| bb60c97805 | |||
| 886a367247 | |||
| 416c8ef1d1 | |||
| 2fbbe42a30 | |||
| f07e550b08 | |||
| 3684cc4e52 | 
| @ -151,10 +151,6 @@ endif() | |||||||
| # ---[ CMake scripts + modules | # ---[ CMake scripts + modules | ||||||
| list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) | list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) | ||||||
|  |  | ||||||
| if (MSVC AND ${BUILD_SHARED_LIBS}) |  | ||||||
|   set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) |  | ||||||
| endif() |  | ||||||
|  |  | ||||||
| # ---[ CMake build directories | # ---[ CMake build directories | ||||||
| set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | ||||||
| set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | ||||||
|  | |||||||
| @ -13,7 +13,7 @@ else() | |||||||
|   cmake_dependent_option( |   cmake_dependent_option( | ||||||
|       USE_CUDNN "Use cuDNN" ON |       USE_CUDNN "Use cuDNN" ON | ||||||
|       "USE_CUDA" OFF) |       "USE_CUDA" OFF) | ||||||
|   option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) |   option(ATEN_NO_TEST "Do not build ATen test binaries" ON) | ||||||
|  |  | ||||||
|   # Flag for shared dependencies |   # Flag for shared dependencies | ||||||
|   set(BUILD_ATEN ON) |   set(BUILD_ATEN ON) | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| #include <ATen/optional.h> | #include <ATen/optional.h> | ||||||
|  | #include <ATen/Backtrace.h> | ||||||
|  |  | ||||||
| #include <functional> | #include <functional> | ||||||
| #include <memory> | #include <memory> | ||||||
|  | |||||||
| @ -4,9 +4,11 @@ | |||||||
| #include <string> | #include <string> | ||||||
| #include <typeinfo> | #include <typeinfo> | ||||||
|  |  | ||||||
|  | #include <ATen/ATenGeneral.h> | ||||||
|  |  | ||||||
| namespace at { | namespace at { | ||||||
| /// Utility to demangle a C++ symbol name. | /// Utility to demangle a C++ symbol name. | ||||||
| std::string demangle(const char* name); | AT_API std::string demangle(const char* name); | ||||||
|  |  | ||||||
| /// Returns the printable name of the type. | /// Returns the printable name of the type. | ||||||
| template <typename T> | template <typename T> | ||||||
| @ -19,7 +21,7 @@ inline const char* demangle_type() { | |||||||
| #endif // __GXX_RTTI | #endif // __GXX_RTTI | ||||||
| } | } | ||||||
|  |  | ||||||
| std::string get_backtrace( | AT_API std::string get_backtrace( | ||||||
|     size_t frames_to_skip = 0, |     size_t frames_to_skip = 0, | ||||||
|     size_t maximum_number_of_frames = 64, |     size_t maximum_number_of_frames = 64, | ||||||
|     bool skip_python_frames = true); |     bool skip_python_frames = true); | ||||||
|  | |||||||
| @ -250,6 +250,7 @@ IF(USE_CUDA AND NOT USE_ROCM) | |||||||
|   ENDIF(USE_MAGMA) |   ENDIF(USE_MAGMA) | ||||||
|   IF ($ENV{ATEN_STATIC_CUDA}) |   IF ($ENV{ATEN_STATIC_CUDA}) | ||||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") |     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") | ||||||
|  |     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a") | ||||||
|   ENDIF($ENV{ATEN_STATIC_CUDA}) |   ENDIF($ENV{ATEN_STATIC_CUDA}) | ||||||
| ENDIF() | ENDIF() | ||||||
|  |  | ||||||
| @ -405,11 +406,11 @@ ENDFOREACH() | |||||||
| INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml | INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml | ||||||
|   DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) |   DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) | ||||||
|  |  | ||||||
| if(ATEN_NO_TEST) | # if(ATEN_NO_TEST) | ||||||
|   message("disable test because ATEN_NO_TEST is set") | #   message("disable test because ATEN_NO_TEST is set") | ||||||
| else() | # else() | ||||||
|   add_subdirectory(test) | #   add_subdirectory(test) | ||||||
| endif() | # endif() | ||||||
|  |  | ||||||
| if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) | if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) | ||||||
|   foreach(test_src ${ATen_CPU_TEST_SRCS}) |   foreach(test_src ${ATen_CPU_TEST_SRCS}) | ||||||
|  | |||||||
| @ -3,6 +3,8 @@ | |||||||
| #include <cstdint> | #include <cstdint> | ||||||
| #include <utility> | #include <utility> | ||||||
|  |  | ||||||
|  | #include <ATen/ATenGeneral.h> | ||||||
|  |  | ||||||
| /* | /* | ||||||
| * A CUDA stream interface with no CUDA build dependency. | * A CUDA stream interface with no CUDA build dependency. | ||||||
| *  | *  | ||||||
| @ -25,27 +27,27 @@ namespace detail { | |||||||
|  |  | ||||||
| // Pointer-based API (for internal use) | // Pointer-based API (for internal use) | ||||||
| // Note: ATen/Context is preferred to work with streams safely | // Note: ATen/Context is preferred to work with streams safely | ||||||
| CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device); | AT_API CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device); | ||||||
| CUDAStreamInternals* CUDAStream_getDefaultStream(); | AT_API CUDAStreamInternals* CUDAStream_getDefaultStream(); | ||||||
|  |  | ||||||
| CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority); | AT_API CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority); | ||||||
|  |  | ||||||
| CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device); | AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device); | ||||||
| CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream(); | AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream(); | ||||||
|  |  | ||||||
| // Note: these Unsafe gets should NEVER be used and are only here for legacy | // Note: these Unsafe gets should NEVER be used and are only here for legacy | ||||||
| // purposes. Once those uses are gone they should be removed. | // purposes. Once those uses are gone they should be removed. | ||||||
| CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device); | AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device); | ||||||
| CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe(); | AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe(); | ||||||
|  |  | ||||||
| void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals); | AT_API void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals); | ||||||
| void CUDAStream_setStream(CUDAStreamInternals* internals); | AT_API void CUDAStream_setStream(CUDAStreamInternals* internals); | ||||||
|  |  | ||||||
| cudaStream_t CUDAStream_stream(CUDAStreamInternals*); | AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*); | ||||||
| int64_t CUDAStream_device(CUDAStreamInternals*); | AT_API int64_t CUDAStream_device(CUDAStreamInternals*); | ||||||
|  |  | ||||||
| bool CUDAStream_retain(CUDAStreamInternals*); | AT_API bool CUDAStream_retain(CUDAStreamInternals*); | ||||||
| void CUDAStream_free(CUDAStreamInternals*&); | AT_API void CUDAStream_free(CUDAStreamInternals*&); | ||||||
|  |  | ||||||
| } // namespace detail | } // namespace detail | ||||||
|  |  | ||||||
| @ -64,10 +66,10 @@ struct CUDAStream { | |||||||
|   ~CUDAStream() { detail::CUDAStream_free(internals_); } |   ~CUDAStream() { detail::CUDAStream_free(internals_); } | ||||||
|  |  | ||||||
|   // Copy constructor |   // Copy constructor | ||||||
|   CUDAStream(const CUDAStream& other); |   AT_API CUDAStream(const CUDAStream& other); | ||||||
|  |  | ||||||
|   // Move constructor |   // Move constructor | ||||||
|   CUDAStream(CUDAStream&& other);   |   AT_API CUDAStream(CUDAStream&& other); | ||||||
|  |  | ||||||
|   // Assignment operator |   // Assignment operator | ||||||
|   CUDAStream& operator=(CUDAStream other) { |   CUDAStream& operator=(CUDAStream other) { | ||||||
|  | |||||||
| @ -111,8 +111,8 @@ struct Device { | |||||||
| }; | }; | ||||||
| } // namespace at | } // namespace at | ||||||
|  |  | ||||||
| std::ostream& operator<<(std::ostream& stream, at::Device::Type type); | AT_API std::ostream& operator<<(std::ostream& stream, at::Device::Type type); | ||||||
| std::ostream& operator<<(std::ostream& stream, const at::Device& device); | AT_API std::ostream& operator<<(std::ostream& stream, const at::Device& device); | ||||||
|  |  | ||||||
| namespace std { | namespace std { | ||||||
|   template<> struct hash<at::Device> |   template<> struct hash<at::Device> | ||||||
|  | |||||||
| @ -43,7 +43,7 @@ | |||||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ |       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ |       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||||
|       default:                                                                \ |       default:                                                                \ | ||||||
|         AT_ERROR("%s not implemented for '%s'", (NAME), the_type.toString()); \ |         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \ | ||||||
|     }                                                                         \ |     }                                                                         \ | ||||||
|   }() |   }() | ||||||
|  |  | ||||||
|  | |||||||
| @ -35,8 +35,8 @@ namespace at { | |||||||
|  |  | ||||||
| namespace detail { | namespace detail { | ||||||
|  |  | ||||||
| float halfbits2float(unsigned short bits); | AT_API float halfbits2float(unsigned short bits); | ||||||
| unsigned short float2halfbits(float value); | AT_API unsigned short float2halfbits(float value); | ||||||
|  |  | ||||||
| } | } | ||||||
|  |  | ||||||
|  | |||||||
| @ -33,6 +33,8 @@ | |||||||
| #include <type_traits> | #include <type_traits> | ||||||
| #include <utility> | #include <utility> | ||||||
|  |  | ||||||
|  | #include <ATen/ATenGeneral.h> | ||||||
|  |  | ||||||
| #if __GNUG__ && __GNUC__ < 5 | #if __GNUG__ && __GNUC__ < 5 | ||||||
| #define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) | #define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) | ||||||
| #else | #else | ||||||
| @ -57,7 +59,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) { | |||||||
| } | } | ||||||
|  |  | ||||||
| /// This is all the non-templated stuff common to all SmallVectors. | /// This is all the non-templated stuff common to all SmallVectors. | ||||||
| class SmallVectorBase { | class AT_API SmallVectorBase { | ||||||
| protected: | protected: | ||||||
|   void *BeginX, *EndX, *CapacityX; |   void *BeginX, *EndX, *CapacityX; | ||||||
|  |  | ||||||
|  | |||||||
| @ -5,7 +5,7 @@ | |||||||
| #include "ATen/Error.h" | #include "ATen/Error.h" | ||||||
|  |  | ||||||
| namespace at { | namespace at { | ||||||
| struct SparseTensorImpl : public TensorImpl { | struct AT_API SparseTensorImpl : public TensorImpl { | ||||||
|   // Stored in COO format, indices + values. |   // Stored in COO format, indices + values. | ||||||
|  |  | ||||||
|   // Ideal INVARIANTS: |   // Ideal INVARIANTS: | ||||||
|  | |||||||
| @ -19,7 +19,7 @@ namespace at { | |||||||
| /// `torch::TensorOptions` subclass of this `TensorOptions`, which changes | /// `torch::TensorOptions` subclass of this `TensorOptions`, which changes | ||||||
| /// `type()` to return a variable type instead of a tensor type, such that | /// `type()` to return a variable type instead of a tensor type, such that | ||||||
| /// variables are created inside factory methods, instead of tensors. | /// variables are created inside factory methods, instead of tensors. | ||||||
| struct TensorOptions { | struct AT_API TensorOptions { | ||||||
|   TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {} |   TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {} | ||||||
|  |  | ||||||
|   /// Constructs the `TensorOptions` with defaults taken from the thread local |   /// Constructs the `TensorOptions` with defaults taken from the thread local | ||||||
|  | |||||||
| @ -143,7 +143,7 @@ static inline ${return_type} ${api_name}(${formals}) { | |||||||
| """) | """) | ||||||
| # add a native declaration for a native function | # add a native declaration for a native function | ||||||
| NATIVE_DECLARATION = CodeTemplate("""\ | NATIVE_DECLARATION = CodeTemplate("""\ | ||||||
| ${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); | AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); | ||||||
| """) | """) | ||||||
|  |  | ||||||
| # special method definition for factory functions in Functions.h | # special method definition for factory functions in Functions.h | ||||||
|  | |||||||
| @ -35,11 +35,14 @@ | |||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
| # if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) | # if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) | ||||||
| #  define TH_API TH_EXTERNC __declspec(dllexport) | #  define TH_API TH_EXTERNC __declspec(dllexport) | ||||||
|  | #  define TH_CPP_API extern __declspec(dllexport) | ||||||
| # else | # else | ||||||
| #  define TH_API TH_EXTERNC __declspec(dllimport) | #  define TH_API TH_EXTERNC __declspec(dllimport) | ||||||
|  | #  define TH_CPP_API extern __declspec(dllimport) | ||||||
| # endif | # endif | ||||||
| #else | #else | ||||||
| # define TH_API TH_EXTERNC | # define TH_API TH_EXTERNC | ||||||
|  | # define TH_CPP_API extern | ||||||
| #endif | #endif | ||||||
|  |  | ||||||
| #ifdef _WIN32 | #ifdef _WIN32 | ||||||
|  | |||||||
| @ -69,18 +69,18 @@ TH_API THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t si | |||||||
| TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size, | TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size, | ||||||
|                                              at::Allocator *allocator); |                                              at::Allocator *allocator); | ||||||
|  |  | ||||||
| ptrdiff_t THStorage_size(const THStorage *self); | TH_API ptrdiff_t THStorage_size(const THStorage *self); | ||||||
| size_t THStorage_elementSize(); | TH_API size_t THStorage_elementSize(); | ||||||
| THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags); | TH_API THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags); | ||||||
| void THStorage_setFlag(THStorage *storage, const char flag); | TH_API void THStorage_setFlag(THStorage *storage, const char flag); | ||||||
| void THStorage_clearFlag(THStorage *storage, const char flag); | TH_API void THStorage_clearFlag(THStorage *storage, const char flag); | ||||||
| void THStorage_retain(THStorage *storage); | TH_API void THStorage_retain(THStorage *storage); | ||||||
| THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type, | TH_API THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type, | ||||||
|                                              at::DataPtr&& data, ptrdiff_t size, |                                                     at::DataPtr&& data, ptrdiff_t size, | ||||||
|                                              at::Allocator* allocator); |                                                     at::Allocator* allocator); | ||||||
| void THStorage_resize(THStorage *storage, ptrdiff_t size); | TH_API void THStorage_resize(THStorage *storage, ptrdiff_t size); | ||||||
| void THStorage_swap(THStorage *storage1, THStorage *storage2); | TH_API void THStorage_swap(THStorage *storage1, THStorage *storage2); | ||||||
|  |  | ||||||
| void THStorage_weakRetain(THStorage *weak_storage); | TH_API void THStorage_weakRetain(THStorage *weak_storage); | ||||||
| void THStorage_weakFree(THStorage *weak_storage); | TH_API void THStorage_weakFree(THStorage *weak_storage); | ||||||
| THStorage* THStorage_weakLock(THStorage *weak_storage); | TH_API THStorage* THStorage_weakLock(THStorage *weak_storage); | ||||||
|  | |||||||
| @ -83,5 +83,5 @@ struct THTensor | |||||||
| #include "THGenerateAllTypes.h" | #include "THGenerateAllTypes.h" | ||||||
|  |  | ||||||
| TH_API void THTensor_free(THTensor *self); | TH_API void THTensor_free(THTensor *self); | ||||||
| at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride, | TH_CPP_API at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride, | ||||||
|                                                            at::IntList newshape); |                                                                       at::IntList newshape); | ||||||
|  | |||||||
| @ -6,9 +6,9 @@ static inline void THNN_(Col2Im_shapeCheck)( | |||||||
|                          THCState *state, |                          THCState *state, | ||||||
|                          THCTensor *input, |                          THCTensor *input, | ||||||
|                          THCTensor *gradOutput, |                          THCTensor *gradOutput, | ||||||
|                          int outputHeight, int outputWidth, |                          int64_t outputHeight, int64_t outputWidth, | ||||||
|                          int kH, int kW, int dH, int dW, |                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||||
|                          int padH, int padW, int sH, int sW) { |                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THArgCheck(kW > 0 && kH > 0, 6, |   THArgCheck(kW > 0 && kH > 0, 6, | ||||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); |              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||||
| @ -17,7 +17,7 @@ static inline void THNN_(Col2Im_shapeCheck)( | |||||||
|   THArgCheck(dW > 0 && dH > 0, 8, |   THArgCheck(dW > 0 && dH > 0, 8, | ||||||
|              "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); |              "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); | ||||||
|  |  | ||||||
|   int ndim = THCTensor_(nDimension)(state, input); |   int64_t ndim = THCTensor_(nDimension)(state, input); | ||||||
|   THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input, |   THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input, | ||||||
|                   "Expected non-empty 2D or 3D input tensor, but got input of shape %s"); |                   "Expected non-empty 2D or 3D input tensor, but got input of shape %s"); | ||||||
|  |  | ||||||
| @ -54,11 +54,11 @@ void THNN_(Col2Im_updateOutput)( | |||||||
|            THCState *state, |            THCState *state, | ||||||
|            THCTensor *input, |            THCTensor *input, | ||||||
|            THCTensor *output, |            THCTensor *output, | ||||||
|            int outputHeight, int outputWidth, |            int64_t outputHeight, int64_t outputWidth, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THCUNN_assertSameGPU(state, 2, input, output); |   THCUNN_assertSameGPU(state, 2, input, output); | ||||||
|  |  | ||||||
| @ -84,10 +84,10 @@ void THNN_(Col2Im_updateOutput)( | |||||||
|   THCTensor *input_n = THCTensor_(new)(state); |   THCTensor *input_n = THCTensor_(new)(state); | ||||||
|   THCTensor *output_n = THCTensor_(new)(state); |   THCTensor *output_n = THCTensor_(new)(state); | ||||||
|  |  | ||||||
|   int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; |   int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||||
|   int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; |   int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||||
|  |  | ||||||
|   for (int elt = 0; elt < batchSize; elt++) { |   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||||
|     THCTensor_(select)(state, input_n, input, 0, elt); |     THCTensor_(select)(state, input_n, input, 0, elt); | ||||||
|     THCTensor_(select)(state, output_n, output, 0, elt); |     THCTensor_(select)(state, output_n, output, 0, elt); | ||||||
|  |  | ||||||
| @ -116,10 +116,10 @@ void THNN_(Col2Im_updateGradInput)( | |||||||
|            THCState *state, |            THCState *state, | ||||||
|            THCTensor *gradOutput, |            THCTensor *gradOutput, | ||||||
|            THCTensor *gradInput, |            THCTensor *gradInput, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, |   THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, | ||||||
|                              kH, kW, dH, dW, padH, padW, sH, sW); |                              kH, kW, dH, dW, padH, padW, sH, sW); | ||||||
|  | |||||||
| @ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)( | |||||||
|                          THCState *state, |                          THCState *state, | ||||||
|                          THCTensor *input, |                          THCTensor *input, | ||||||
|                          THCTensor *gradOutput, |                          THCTensor *gradOutput, | ||||||
|                          int kH, int kW, int dH, int dW, |                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||||
|                          int padH, int padW, int sH, int sW) { |                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THArgCheck(kW > 0 && kH > 0, 4, |   THArgCheck(kW > 0 && kH > 0, 4, | ||||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); |              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||||
| @ -18,7 +18,7 @@ static inline void THNN_(Im2Col_shapeCheck)( | |||||||
|   THArgCheck(sW > 0 && sH > 0, 10, |   THArgCheck(sW > 0 && sH > 0, 10, | ||||||
|              "stride should be greater than zero, but got sH: %d sW: %d", sH, sW); |              "stride should be greater than zero, but got sH: %d sW: %d", sH, sW); | ||||||
|  |  | ||||||
|   int ndim = THCTensor_(nDimension)(state, input); |   int64_t ndim = THCTensor_(nDimension)(state, input); | ||||||
|   THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input, |   THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input, | ||||||
|                 "Expected non-empty 3D or 4D input tensor, but got input of shape %s"); |                 "Expected non-empty 3D or 4D input tensor, but got input of shape %s"); | ||||||
|  |  | ||||||
| @ -26,11 +26,11 @@ static inline void THNN_(Im2Col_shapeCheck)( | |||||||
|   if (ndim == 3) { |   if (ndim == 3) { | ||||||
|     dim_batch = -1; |     dim_batch = -1; | ||||||
|   } |   } | ||||||
|   int nInputPlane  = THCTensor_(size)(state, input, dim_batch + 1); |   int64_t nInputPlane  = THCTensor_(size)(state, input, dim_batch + 1); | ||||||
|   int inputHeight  = THCTensor_(size)(state, input, dim_batch + 2); |   int64_t inputHeight  = THCTensor_(size)(state, input, dim_batch + 2); | ||||||
|   int inputWidth   = THCTensor_(size)(state, input, dim_batch + 3); |   int64_t inputWidth   = THCTensor_(size)(state, input, dim_batch + 3); | ||||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; |   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; |   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||||
|  |  | ||||||
|   if (outputHeight < 1 || outputWidth < 1) { |   if (outputHeight < 1 || outputWidth < 1) { | ||||||
|     THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " |     THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " | ||||||
| @ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)( | |||||||
|            THCState *state, |            THCState *state, | ||||||
|            THCTensor *input, |            THCTensor *input, | ||||||
|            THCTensor *output, |            THCTensor *output, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THCUNN_assertSameGPU(state, 2, input, output); |   THCUNN_assertSameGPU(state, 2, input, output); | ||||||
|  |  | ||||||
| @ -62,15 +62,15 @@ void THNN_(Im2Col_updateOutput)( | |||||||
|     THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]); |     THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   int batchSize    = THCTensor_(size)(state, input, 0); |   int64_t batchSize    = THCTensor_(size)(state, input, 0); | ||||||
|   int nInputPlane  = THCTensor_(size)(state, input, 1); |   int64_t nInputPlane  = THCTensor_(size)(state, input, 1); | ||||||
|   int inputHeight  = THCTensor_(size)(state, input, 2); |   int64_t inputHeight  = THCTensor_(size)(state, input, 2); | ||||||
|   int inputWidth   = THCTensor_(size)(state, input, 3); |   int64_t inputWidth   = THCTensor_(size)(state, input, 3); | ||||||
|  |  | ||||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; |   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; |   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||||
|   int nOutputPlane = nInputPlane * kW * kH; |   int64_t nOutputPlane = nInputPlane * kW * kH; | ||||||
|   int outputLength = outputHeight * outputWidth; |   int64_t outputLength = outputHeight * outputWidth; | ||||||
|  |  | ||||||
|   THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength); |   THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength); | ||||||
|   THCTensor_(zero)(state, output); |   THCTensor_(zero)(state, output); | ||||||
| @ -78,7 +78,7 @@ void THNN_(Im2Col_updateOutput)( | |||||||
|   THCTensor *input_n = THCTensor_(new)(state); |   THCTensor *input_n = THCTensor_(new)(state); | ||||||
|   THCTensor *output_n = THCTensor_(new)(state); |   THCTensor *output_n = THCTensor_(new)(state); | ||||||
|  |  | ||||||
|   for (int elt = 0; elt < batchSize; elt++) { |   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||||
|     THCTensor_(select)(state, input_n, input, 0, elt); |     THCTensor_(select)(state, input_n, input, 0, elt); | ||||||
|     THCTensor_(select)(state, output_n, output, 0, elt); |     THCTensor_(select)(state, output_n, output, 0, elt); | ||||||
|  |  | ||||||
| @ -104,11 +104,11 @@ void THNN_(Im2Col_updateGradInput)( | |||||||
|            THCState *state, |            THCState *state, | ||||||
|            THCTensor *gradOutput, |            THCTensor *gradOutput, | ||||||
|            THCTensor *gradInput, |            THCTensor *gradInput, | ||||||
|            int inputHeight, int inputWidth, |            int64_t inputHeight, int64_t inputWidth, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, |   THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, | ||||||
|                              inputHeight, inputWidth, |                              inputHeight, inputWidth, | ||||||
|  | |||||||
| @ -183,39 +183,39 @@ THC_API void THNN_(Im2Col_updateOutput)( | |||||||
|                   THCState *state, |                   THCState *state, | ||||||
|                   THCTensor *input, |                   THCTensor *input, | ||||||
|                   THCTensor *output, |                   THCTensor *output, | ||||||
|                   int kH, int kW, |                   int64_t kH, int64_t kW, | ||||||
|                   int dH, int dW, |                   int64_t dH, int64_t dW, | ||||||
|                   int padH, int padW, |                   int64_t padH, int64_t padW, | ||||||
|                   int sH, int sW); |                   int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| THC_API void THNN_(Im2Col_updateGradInput)( | THC_API void THNN_(Im2Col_updateGradInput)( | ||||||
|                   THCState *state, |                   THCState *state, | ||||||
|                   THCTensor *gradOutput, |                   THCTensor *gradOutput, | ||||||
|                   THCTensor *gradInput, |                   THCTensor *gradInput, | ||||||
|                   int inputHeight, int inputWidth, |                   int64_t inputHeight, int64_t inputWidth, | ||||||
|                   int kH, int kW, |                   int64_t kH, int64_t kW, | ||||||
|                   int dH, int dW, |                   int64_t dH, int64_t dW, | ||||||
|                   int padH, int padW, |                   int64_t padH, int64_t padW, | ||||||
|                   int sH, int sW); |                   int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| THC_API void THNN_(Col2Im_updateOutput)( | THC_API void THNN_(Col2Im_updateOutput)( | ||||||
|                   THCState *state, |                   THCState *state, | ||||||
|                   THCTensor *input, |                   THCTensor *input, | ||||||
|                   THCTensor *output, |                   THCTensor *output, | ||||||
|                   int outputHeight, int outputWidth, |                   int64_t outputHeight, int64_t outputWidth, | ||||||
|                   int kH, int kW, |                   int64_t kH, int64_t kW, | ||||||
|                   int dH, int dW, |                   int64_t dH, int64_t dW, | ||||||
|                   int padH, int padW, |                   int64_t padH, int64_t padW, | ||||||
|                   int sH, int sW); |                   int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
|  THC_API void THNN_(Col2Im_updateGradInput)( |  THC_API void THNN_(Col2Im_updateGradInput)( | ||||||
|                   THCState *state, |                   THCState *state, | ||||||
|                   THCTensor *gradOutput, |                   THCTensor *gradOutput, | ||||||
|                   THCTensor *gradInput, |                   THCTensor *gradInput, | ||||||
|                   int kH, int kW, |                   int64_t kH, int64_t kW, | ||||||
|                   int dH, int dW, |                   int64_t dH, int64_t dW, | ||||||
|                   int padH, int padW, |                   int64_t padH, int64_t padW, | ||||||
|                   int sH, int sW); |                   int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| THC_API void THNN_(LeakyReLU_updateOutput)( | THC_API void THNN_(LeakyReLU_updateOutput)( | ||||||
|                   THCState *state, |                   THCState *state, | ||||||
|  | |||||||
| @ -8,28 +8,28 @@ | |||||||
| // (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu) | // (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu) | ||||||
| template <typename Dtype> | template <typename Dtype> | ||||||
| __launch_bounds__(CUDA_NUM_THREADS) | __launch_bounds__(CUDA_NUM_THREADS) | ||||||
| __global__ void im2col_kernel(const int n, const Dtype* data_im, | __global__ void im2col_kernel(const int64_t n, const Dtype* data_im, | ||||||
|                               const int height, const int width, |                               const int64_t height, const int64_t width, | ||||||
|                               const int ksize_h, const int ksize_w, |                               const int64_t ksize_h, const int64_t ksize_w, | ||||||
|                               const int pad_h, const int pad_w, |                               const int64_t pad_h, const int64_t pad_w, | ||||||
|                               const int stride_h, const int stride_w, |                               const int64_t stride_h, const int64_t stride_w, | ||||||
|                               const int dilation_h, const int dilation_w, |                               const int64_t dilation_h, const int64_t dilation_w, | ||||||
|                               const int height_col, const int width_col, |                               const int64_t height_col, const int64_t width_col, | ||||||
|     Dtype* data_col) { |     Dtype* data_col) { | ||||||
|   CUDA_KERNEL_LOOP(index, n) { |   CUDA_KERNEL_LOOP(index, n) { | ||||||
|     int w_out = index % width_col; |     int64_t w_out = index % width_col; | ||||||
|     index /= width_col; |     index /= width_col; | ||||||
|     int h_out = index % height_col; |     int64_t h_out = index % height_col; | ||||||
|     int channel_in = index / height_col; |     int64_t channel_in = index / height_col; | ||||||
|     int channel_out = channel_in * ksize_h * ksize_w; |     int64_t channel_out = channel_in * ksize_h * ksize_w; | ||||||
|     int h_in = h_out * stride_h - pad_h; |     int64_t h_in = h_out * stride_h - pad_h; | ||||||
|     int w_in = w_out * stride_w - pad_w; |     int64_t w_in = w_out * stride_w - pad_w; | ||||||
|     data_col += (channel_out * height_col + h_out) * width_col + w_out; |     data_col += (channel_out * height_col + h_out) * width_col + w_out; | ||||||
|     data_im += (channel_in * height + h_in) * width + w_in; |     data_im += (channel_in * height + h_in) * width + w_in; | ||||||
|     for (int i = 0; i < ksize_h; ++i) { |     for (int64_t i = 0; i < ksize_h; ++i) { | ||||||
|       for (int j = 0; j < ksize_w; ++j) { |       for (int64_t j = 0; j < ksize_w; ++j) { | ||||||
|         int h = h_in + i * dilation_h; |         int64_t h = h_in + i * dilation_h; | ||||||
|         int w = w_in + j * dilation_w; |         int64_t w = w_in + j * dilation_w; | ||||||
|         *data_col = (h >= 0 && w >= 0 && h < height && w < width) ? |         *data_col = (h >= 0 && w >= 0 && h < height && w < width) ? | ||||||
|           data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0); |           data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0); | ||||||
|         data_col += height_col * width_col; |         data_col += height_col * width_col; | ||||||
| @ -39,15 +39,15 @@ __global__ void im2col_kernel(const int n, const Dtype* data_im, | |||||||
| } | } | ||||||
|  |  | ||||||
| template <typename Dtype> | template <typename Dtype> | ||||||
| void im2col(cudaStream_t stream, const Dtype* data_im, const int channels, | void im2col(cudaStream_t stream, const Dtype* data_im, const int64_t channels, | ||||||
|             const int height, const int width, |             const int64_t height, const int64_t width, | ||||||
|             const int height_col, const int width_col, |             const int64_t height_col, const int64_t width_col, | ||||||
|             const int ksize_h, const int ksize_w, const int pad_h, |             const int64_t ksize_h, const int64_t ksize_w, const int64_t pad_h, | ||||||
|             const int pad_w, const int stride_h, const int stride_w, |             const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, | ||||||
|             const int dilation_h, const int dilation_w, Dtype* data_col) { |             const int64_t dilation_h, const int64_t dilation_w, Dtype* data_col) { | ||||||
|   // We are going to launch channels * height_col * width_col kernels, each |   // We are going to launch channels * height_col * width_col kernels, each | ||||||
|   // kernel responsible for copying a single-channel grid. |   // kernel responsible for copying a single-channel grid. | ||||||
|   int num_kernels = channels * height_col * width_col; |   int64_t num_kernels = channels * height_col * width_col; | ||||||
|   // Launch |   // Launch | ||||||
|   im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( |   im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( | ||||||
|       num_kernels, data_im, height, width, ksize_h, ksize_w, |       num_kernels, data_im, height, width, ksize_h, ksize_w, | ||||||
| @ -60,37 +60,37 @@ void im2col(cudaStream_t stream, const Dtype* data_im, const int channels, | |||||||
|  |  | ||||||
| template <typename Dtype, typename Acctype> | template <typename Dtype, typename Acctype> | ||||||
| __launch_bounds__(CUDA_NUM_THREADS) | __launch_bounds__(CUDA_NUM_THREADS) | ||||||
| __global__ void col2im_kernel(const int n, const Dtype* data_col, | __global__ void col2im_kernel(const int64_t n, const Dtype* data_col, | ||||||
|                                   const int height, const int width, const int channels, |                                   const int64_t height, const int64_t width, const int64_t channels, | ||||||
|                                   const int kernel_h, const int kernel_w, |                                   const int64_t kernel_h, const int64_t kernel_w, | ||||||
|                                   const int pad_h, const int pad_w, |                                   const int64_t pad_h, const int64_t pad_w, | ||||||
|                                   const int stride_h, const int stride_w, |                                   const int64_t stride_h, const int64_t stride_w, | ||||||
|                                   const int dilation_h, const int dilation_w, |                                   const int64_t dilation_h, const int64_t dilation_w, | ||||||
|                                   const int height_col, const int width_col, |                                   const int64_t height_col, const int64_t width_col, | ||||||
|                                   Dtype* data_im) { |                                   Dtype* data_im) { | ||||||
|   CUDA_KERNEL_LOOP(index, n) { |   CUDA_KERNEL_LOOP(index, n) { | ||||||
|     Acctype val = Acctype(0); |     Acctype val = Acctype(0); | ||||||
|     const int w_im = index % width + pad_w; |     const int64_t w_im = index % width + pad_w; | ||||||
|     const int h_im = (index / width) % height + pad_h; |     const int64_t h_im = (index / width) % height + pad_h; | ||||||
|     const int c_im = index / (width * height); |     const int64_t c_im = index / (width * height); | ||||||
|     int kernel_extent_w = (kernel_w - 1) * dilation_w + 1; |     int64_t kernel_extent_w = (kernel_w - 1) * dilation_w + 1; | ||||||
|     int kernel_extent_h = (kernel_h - 1) * dilation_h + 1; |     int64_t kernel_extent_h = (kernel_h - 1) * dilation_h + 1; | ||||||
|     // compute the start and end of the output |     // compute the start and end of the output | ||||||
|     const int w_col_start = |     const int64_t w_col_start = | ||||||
|       (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1; |       (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1; | ||||||
|     const int w_col_end = min(w_im / stride_w + 1, width_col); |     const int64_t w_col_end = min(w_im / stride_w + 1, width_col); | ||||||
|     const int h_col_start = |     const int64_t h_col_start = | ||||||
|       (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1; |       (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1; | ||||||
|     const int h_col_end = min(h_im / stride_h + 1, height_col); |     const int64_t h_col_end = min(h_im / stride_h + 1, height_col); | ||||||
|     // TODO: use LCM of stride and dilation to avoid unnecessary loops |     // TODO: use LCM of stride and dilation to avoid unnecessary loops | ||||||
|     for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) { |     for (int64_t h_col = h_col_start; h_col < h_col_end; h_col += 1) { | ||||||
|       for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) { |       for (int64_t w_col = w_col_start; w_col < w_col_end; w_col += 1) { | ||||||
|         int h_k = (h_im - h_col * stride_h); |         int64_t h_k = (h_im - h_col * stride_h); | ||||||
|         int w_k = (w_im - w_col * stride_w); |         int64_t w_k = (w_im - w_col * stride_w); | ||||||
|         if (h_k % dilation_h == 0 && w_k % dilation_w == 0) { |         if (h_k % dilation_h == 0 && w_k % dilation_w == 0) { | ||||||
|           h_k /= dilation_h; |           h_k /= dilation_h; | ||||||
|           w_k /= dilation_w; |           w_k /= dilation_w; | ||||||
|           int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * |           int64_t data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * | ||||||
|                                 height_col + h_col) * width_col + w_col; |                                 height_col + h_col) * width_col + w_col; | ||||||
|           val += data_col[data_col_index]; |           val += data_col[data_col_index]; | ||||||
|         } |         } | ||||||
| @ -101,21 +101,21 @@ __global__ void col2im_kernel(const int n, const Dtype* data_col, | |||||||
| } | } | ||||||
|  |  | ||||||
| template <typename Dtype, typename Acctype> | template <typename Dtype, typename Acctype> | ||||||
| void col2im(cudaStream_t stream, const Dtype* data_col, const int channels, | void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels, | ||||||
|             const int height, const int width, |             const int64_t height, const int64_t width, | ||||||
|             const int output_height, const int output_width, |             const int64_t output_height, const int64_t output_width, | ||||||
|             const int patch_h, const int patch_w, const int pad_h, |             const int64_t patch_h, const int64_t patch_w, const int64_t pad_h, | ||||||
|             const int pad_w, const int stride_h, const int stride_w, |             const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, | ||||||
|             const int dilation_h, const int dilation_w, Dtype* data_im); |             const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im); | ||||||
|  |  | ||||||
| template <typename Dtype, typename Acctype> | template <typename Dtype, typename Acctype> | ||||||
| void col2im(cudaStream_t stream, const Dtype* data_col, const int channels, | void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels, | ||||||
|             const int height, const int width, |             const int64_t height, const int64_t width, | ||||||
|             const int output_height, const int output_width, |             const int64_t output_height, const int64_t output_width, | ||||||
|             const int patch_h, const int patch_w, const int pad_h, |             const int64_t patch_h, const int64_t patch_w, const int64_t pad_h, | ||||||
|             const int pad_w, const int stride_h, const int stride_w, |             const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, | ||||||
|             const int dilation_h, const int dilation_w, Dtype* data_im) { |             const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im) { | ||||||
|   int num_kernels = channels * height * width; |   int64_t num_kernels = channels * height * width; | ||||||
|   // To avoid involving atomic operations, we will launch one kernel per |   // To avoid involving atomic operations, we will launch one kernel per | ||||||
|   // bottom dimension, and then in the kernel add up the top dimensions. |   // bottom dimension, and then in the kernel add up the top dimensions. | ||||||
|   col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( |   col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( | ||||||
|  | |||||||
| @ -54,25 +54,25 @@ | |||||||
| // | // | ||||||
| // ALSO do vol2col | // ALSO do vol2col | ||||||
|  |  | ||||||
| static void THNN_(im2col)(const real* data_im, const int channels, | static void THNN_(im2col)(const real* data_im, const int64_t channels, | ||||||
|       const int height, const int width, |       const int64_t height, const int64_t width, | ||||||
|       const int output_height, const int output_width, |       const int64_t output_height, const int64_t output_width, | ||||||
|       const int kernel_h, const int kernel_w, |       const int64_t kernel_h, const int64_t kernel_w, | ||||||
|       const int pad_h, const int pad_w, |       const int64_t pad_h, const int64_t pad_w, | ||||||
|       const int stride_h, const int stride_w, |       const int64_t stride_h, const int64_t stride_w, | ||||||
|       const int dilation_h, const int dilation_w, |       const int64_t dilation_h, const int64_t dilation_w, | ||||||
|       real* data_col) { |       real* data_col) { | ||||||
|   const int height_col = output_height; |   const int64_t height_col = output_height; | ||||||
|   const int width_col = output_width; |   const int64_t width_col = output_width; | ||||||
|   const int channels_col = channels * kernel_h * kernel_w; |   const int64_t channels_col = channels * kernel_h * kernel_w; | ||||||
|   for (int c_col = 0; c_col < channels_col; ++c_col) { |   for (int64_t c_col = 0; c_col < channels_col; ++c_col) { | ||||||
|     int w_offset = c_col % kernel_w; |     int64_t w_offset = c_col % kernel_w; | ||||||
|     int h_offset = (c_col / kernel_w) % kernel_h; |     int64_t h_offset = (c_col / kernel_w) % kernel_h; | ||||||
|     int c_im = c_col / kernel_h / kernel_w; |     int64_t c_im = c_col / kernel_h / kernel_w; | ||||||
|     for (int h_col = 0; h_col < height_col; ++h_col) { |     for (int64_t h_col = 0; h_col < height_col; ++h_col) { | ||||||
|       int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; |       int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h; | ||||||
|       for (int w_col = 0; w_col < width_col; ++w_col) { |       for (int64_t w_col = 0; w_col < width_col; ++w_col) { | ||||||
|         int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; |         int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w; | ||||||
|         data_col[(c_col * height_col + h_col) * width_col + w_col] = |         data_col[(c_col * height_col + h_col) * width_col + w_col] = | ||||||
|           (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? |           (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? | ||||||
|           data_im[(c_im * height + h_im) * width + w_im] : 0; |           data_im[(c_im * height + h_im) * width + w_im] : 0; | ||||||
| @ -81,26 +81,26 @@ static void THNN_(im2col)(const real* data_im, const int channels, | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| static void THNN_(col2im)(const real* data_col, const int channels, | static void THNN_(col2im)(const real* data_col, const int64_t channels, | ||||||
|       const int height, const int width, |       const int64_t height, const int64_t width, | ||||||
|       const int output_height, const int output_width, |       const int64_t output_height, const int64_t output_width, | ||||||
|       const int kernel_h, const int kernel_w, |       const int64_t kernel_h, const int64_t kernel_w, | ||||||
|       const int pad_h, const int pad_w, |       const int64_t pad_h, const int64_t pad_w, | ||||||
|       const int stride_h, const int stride_w, |       const int64_t stride_h, const int64_t stride_w, | ||||||
|       const int dilation_h, const int dilation_w, |       const int64_t dilation_h, const int64_t dilation_w, | ||||||
|       real* data_im) { |       real* data_im) { | ||||||
|   memset(data_im, 0, sizeof(real) * height * width * channels); |   memset(data_im, 0, sizeof(real) * height * width * channels); | ||||||
|   const int height_col = output_height; |   const int64_t height_col = output_height; | ||||||
|   const int width_col = output_width; |   const int64_t width_col = output_width; | ||||||
|   const int channels_col = channels * kernel_h * kernel_w; |   const int64_t channels_col = channels * kernel_h * kernel_w; | ||||||
|   for (int c_col = 0; c_col < channels_col; ++c_col) { |   for (int64_t c_col = 0; c_col < channels_col; ++c_col) { | ||||||
|     int w_offset = c_col % kernel_w; |     int64_t w_offset = c_col % kernel_w; | ||||||
|     int h_offset = (c_col / kernel_w) % kernel_h; |     int64_t h_offset = (c_col / kernel_w) % kernel_h; | ||||||
|     int c_im = c_col / kernel_h / kernel_w; |     int64_t c_im = c_col / kernel_h / kernel_w; | ||||||
|     for (int h_col = 0; h_col < height_col; ++h_col) { |     for (int64_t h_col = 0; h_col < height_col; ++h_col) { | ||||||
|       int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; |       int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h; | ||||||
|       for (int w_col = 0; w_col < width_col; ++w_col) { |       for (int64_t w_col = 0; w_col < width_col; ++w_col) { | ||||||
|         int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; |         int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w; | ||||||
|         if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) |         if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) | ||||||
|           data_im[(c_im * height + h_im) * width + w_im] += |           data_im[(c_im * height + h_im) * width + w_im] += | ||||||
|             data_col[(c_col * height_col + h_col) * width_col + w_col]; |             data_col[(c_col * height_col + h_col) * width_col + w_col]; | ||||||
| @ -113,9 +113,9 @@ static inline void THNN_(Col2Im_shapeCheck)( | |||||||
|                          THNNState *state, |                          THNNState *state, | ||||||
|                          THTensor *input, |                          THTensor *input, | ||||||
|                          THTensor *gradOutput, |                          THTensor *gradOutput, | ||||||
|                          int outputHeight, int outputWidth, |                          int64_t outputHeight, int64_t outputWidth, | ||||||
|                          int kH, int kW, int dH, int dW, |                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||||
|                          int padH, int padW, int sH, int sW) { |                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THArgCheck(kW > 0 && kH > 0, 6, |   THArgCheck(kW > 0 && kH > 0, 6, | ||||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); |              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||||
| @ -124,11 +124,11 @@ static inline void THNN_(Col2Im_shapeCheck)( | |||||||
|   THArgCheck(dW > 0 && dH > 0, 8, |   THArgCheck(dW > 0 && dH > 0, 8, | ||||||
|              "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); |              "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); | ||||||
|  |  | ||||||
|   int ndim = THTensor_(nDimension)(input); |   int64_t ndim = THTensor_(nDimension)(input); | ||||||
|   THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input, |   THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input, | ||||||
|                 "Expected non-empty 2D or 3D input tensor, but got input of shape %s"); |                 "Expected non-empty 2D or 3D input tensor, but got input of shape %s"); | ||||||
|  |  | ||||||
|   int batch_dim = (ndim == 3) ? 0 : -1; |   int64_t batch_dim = (ndim == 3) ? 0 : -1; | ||||||
|   int64_t nInputPlane  = input->size[batch_dim + 1]; |   int64_t nInputPlane  = input->size[batch_dim + 1]; | ||||||
|  |  | ||||||
|   if (nInputPlane % (kW * kH) != 0) { |   if (nInputPlane % (kW * kH) != 0) { | ||||||
| @ -161,11 +161,11 @@ void THNN_(Col2Im_updateOutput)( | |||||||
|            THNNState *state, |            THNNState *state, | ||||||
|            THTensor *input, |            THTensor *input, | ||||||
|            THTensor *output, |            THTensor *output, | ||||||
|            int outputHeight, int outputWidth, |            int64_t outputHeight, int64_t outputWidth, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth, |   THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth, | ||||||
|                            kH, kW, dH, dW, padH, padW, sH, sW); |                            kH, kW, dH, dW, padH, padW, sH, sW); | ||||||
| @ -189,10 +189,10 @@ void THNN_(Col2Im_updateOutput)( | |||||||
|   THTensor *input_n = THTensor_(new)(); |   THTensor *input_n = THTensor_(new)(); | ||||||
|   THTensor *output_n = THTensor_(new)(); |   THTensor *output_n = THTensor_(new)(); | ||||||
|  |  | ||||||
|   int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; |   int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||||
|   int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; |   int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||||
|  |  | ||||||
|   for (int elt = 0; elt < batchSize; elt++) { |   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||||
|     THTensor_(select)(input_n, input, 0, elt); |     THTensor_(select)(input_n, input, 0, elt); | ||||||
|     THTensor_(select)(output_n, output, 0, elt); |     THTensor_(select)(output_n, output, 0, elt); | ||||||
|  |  | ||||||
| @ -220,10 +220,10 @@ void THNN_(Col2Im_updateGradInput)( | |||||||
|            THNNState *state, |            THNNState *state, | ||||||
|            THTensor *gradOutput, |            THTensor *gradOutput, | ||||||
|            THTensor *gradInput, |            THTensor *gradInput, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, |   THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, | ||||||
|                              kH, kW, dH, dW, padH, padW, sH, sW); |                              kH, kW, dH, dW, padH, padW, sH, sW); | ||||||
|  | |||||||
| @ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)( | |||||||
|                          THNNState *state, |                          THNNState *state, | ||||||
|                          THTensor *input, |                          THTensor *input, | ||||||
|                          THTensor *gradOutput, |                          THTensor *gradOutput, | ||||||
|                          int kH, int kW, int dH, int dW, |                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||||
|                          int padH, int padW, int sH, int sW) { |                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THArgCheck(kW > 0 && kH > 0, 4, |   THArgCheck(kW > 0 && kH > 0, 4, | ||||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); |              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||||
| @ -16,21 +16,21 @@ static inline void THNN_(Im2Col_shapeCheck)( | |||||||
|   THArgCheck(sW > 0 && sH > 0, 10, |   THArgCheck(sW > 0 && sH > 0, 10, | ||||||
|              "stride should be greater than zero, but got sH: %d sW: %d", sH, sW); |              "stride should be greater than zero, but got sH: %d sW: %d", sH, sW); | ||||||
|  |  | ||||||
|   int ndim = THTensor_(nDimension)(input); |   int64_t ndim = THTensor_(nDimension)(input); | ||||||
|   THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, |   THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, | ||||||
|                 "Expected non-empty 3D or 4D input tensor, but got input of shape %s"); |                 "Expected non-empty 3D or 4D input tensor, but got input of shape %s"); | ||||||
|  |  | ||||||
|   int dim_batch = 0; |   int64_t dim_batch = 0; | ||||||
|   if (ndim == 3) { |   if (ndim == 3) { | ||||||
|     dim_batch = -1; |     dim_batch = -1; | ||||||
|   } |   } | ||||||
|   int nInputPlane  = THTensor_(size)(input, dim_batch + 1); |   int64_t nInputPlane  = THTensor_(size)(input, dim_batch + 1); | ||||||
|   int inputHeight  = THTensor_(size)(input, dim_batch + 2); |   int64_t inputHeight  = THTensor_(size)(input, dim_batch + 2); | ||||||
|   int inputWidth   = THTensor_(size)(input, dim_batch + 3); |   int64_t inputWidth   = THTensor_(size)(input, dim_batch + 3); | ||||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; |   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; |   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||||
|   int nOutputPlane = nInputPlane * kW * kH; |   int64_t nOutputPlane = nInputPlane * kW * kH; | ||||||
|   int outputLength = outputHeight * outputWidth; |   int64_t outputLength = outputHeight * outputWidth; | ||||||
|  |  | ||||||
|   if (outputHeight < 1 || outputWidth < 1) { |   if (outputHeight < 1 || outputWidth < 1) { | ||||||
|     THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " |     THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " | ||||||
| @ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)( | |||||||
|            THNNState *state, |            THNNState *state, | ||||||
|            THTensor *input, |            THTensor *input, | ||||||
|            THTensor *output, |            THTensor *output, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|   THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW); |   THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW); | ||||||
|  |  | ||||||
| @ -60,15 +60,15 @@ void THNN_(Im2Col_updateOutput)( | |||||||
|     THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); |     THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   int batchSize    = THTensor_(size)(input, 0); |   int64_t batchSize    = THTensor_(size)(input, 0); | ||||||
|   int nInputPlane  = THTensor_(size)(input, 1); |   int64_t nInputPlane  = THTensor_(size)(input, 1); | ||||||
|   int inputHeight  = THTensor_(size)(input, 2); |   int64_t inputHeight  = THTensor_(size)(input, 2); | ||||||
|   int inputWidth   = THTensor_(size)(input, 3); |   int64_t inputWidth   = THTensor_(size)(input, 3); | ||||||
|  |  | ||||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; |   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; |   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||||
|   int nOutputPlane = nInputPlane * kW * kH; |   int64_t nOutputPlane = nInputPlane * kW * kH; | ||||||
|   int outputLength = outputHeight * outputWidth; |   int64_t outputLength = outputHeight * outputWidth; | ||||||
|  |  | ||||||
|   THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength); |   THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength); | ||||||
|   THTensor_(zero)(output); |   THTensor_(zero)(output); | ||||||
| @ -76,7 +76,7 @@ void THNN_(Im2Col_updateOutput)( | |||||||
|   THTensor *input_n = THTensor_(new)(); |   THTensor *input_n = THTensor_(new)(); | ||||||
|   THTensor *output_n = THTensor_(new)(); |   THTensor *output_n = THTensor_(new)(); | ||||||
|  |  | ||||||
|   for (int elt = 0; elt < batchSize; elt++) { |   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||||
|     THTensor_(select)(input_n, input, 0, elt); |     THTensor_(select)(input_n, input, 0, elt); | ||||||
|     THTensor_(select)(output_n, output, 0, elt); |     THTensor_(select)(output_n, output, 0, elt); | ||||||
|  |  | ||||||
| @ -102,11 +102,11 @@ void THNN_(Im2Col_updateGradInput)( | |||||||
|            THNNState *state, |            THNNState *state, | ||||||
|            THTensor *gradOutput, |            THTensor *gradOutput, | ||||||
|            THTensor *gradInput, |            THTensor *gradInput, | ||||||
|            int inputHeight, int inputWidth, |            int64_t inputHeight, int64_t inputWidth, | ||||||
|            int kH, int kW, |            int64_t kH, int64_t kW, | ||||||
|            int dH, int dW, |            int64_t dH, int64_t dW, | ||||||
|            int padH, int padW, |            int64_t padH, int64_t padW, | ||||||
|            int sH, int sW) { |            int64_t sH, int64_t sW) { | ||||||
|  |  | ||||||
|  |  | ||||||
|   THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, |   THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, | ||||||
|  | |||||||
| @ -220,8 +220,8 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)( | |||||||
|      dilationH, dilationW, 0); |      dilationH, dilationW, 0); | ||||||
|  |  | ||||||
|   // Params |   // Params | ||||||
|   int nInputPlane = weight->size[1]; |   int64_t nInputPlane = weight->size[1]; | ||||||
|   int nOutputPlane = weight->size[0]; |   int64_t nOutputPlane = weight->size[0]; | ||||||
|  |  | ||||||
|   input = THTensor_(newContiguous)(input); |   input = THTensor_(newContiguous)(input); | ||||||
|   weight = THTensor_(newContiguous)(weight); |   weight = THTensor_(newContiguous)(weight); | ||||||
|  | |||||||
| @ -221,8 +221,8 @@ void THNN_(SpatialFullDilatedConvolution_updateGradInput)( | |||||||
|     (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, |     (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, | ||||||
|      dilationH, dilationW, adjH, adjW, 0); |      dilationH, dilationW, adjH, adjW, 0); | ||||||
|  |  | ||||||
|   int nInputPlane = THTensor_(size)(weight,0); |   int64_t nInputPlane = THTensor_(size)(weight,0); | ||||||
|   int nOutputPlane = THTensor_(size)(weight,1); |   int64_t nOutputPlane = THTensor_(size)(weight,1); | ||||||
|  |  | ||||||
|   input = THTensor_(newContiguous)(input); |   input = THTensor_(newContiguous)(input); | ||||||
|   gradOutput = THTensor_(newContiguous)(gradOutput); |   gradOutput = THTensor_(newContiguous)(gradOutput); | ||||||
| @ -328,7 +328,7 @@ void THNN_(SpatialFullDilatedConvolution_accGradParameters)( | |||||||
|     (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, |     (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, | ||||||
|      dilationH, dilationW, adjH, adjW, 1); |      dilationH, dilationW, adjH, adjW, 1); | ||||||
|  |  | ||||||
|   int nOutputPlane; |   int64_t nOutputPlane; | ||||||
|   if (gradWeight) { |   if (gradWeight) { | ||||||
|     nOutputPlane = THTensor_(size)(gradWeight, 1); |     nOutputPlane = THTensor_(size)(gradWeight, 1); | ||||||
|   } else if (gradBias) { |   } else if (gradBias) { | ||||||
|  | |||||||
| @ -147,39 +147,39 @@ TH_API void THNN_(Im2Col_updateOutput)( | |||||||
|           THNNState *state, |           THNNState *state, | ||||||
|           THTensor *input, |           THTensor *input, | ||||||
|           THTensor *output, |           THTensor *output, | ||||||
|           int kH, int kW, |           int64_t kH, int64_t kW, | ||||||
|           int dH, int dW, |           int64_t dH, int64_t dW, | ||||||
|           int padH, int padW, |           int64_t padH, int64_t padW, | ||||||
|           int sH, int sW); |           int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| TH_API void THNN_(Im2Col_updateGradInput)( | TH_API void THNN_(Im2Col_updateGradInput)( | ||||||
|           THNNState *state, |           THNNState *state, | ||||||
|           THTensor *gradOutput, |           THTensor *gradOutput, | ||||||
|           THTensor *gradInput, |           THTensor *gradInput, | ||||||
|           int inputHeight, int inputWidth, |           int64_t inputHeight, int64_t inputWidth, | ||||||
|           int kH, int kW, |           int64_t kH, int64_t kW, | ||||||
|           int dH, int dW, |           int64_t dH, int64_t dW, | ||||||
|           int padH, int padW, |           int64_t padH, int64_t padW, | ||||||
|           int sH, int sW); |           int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| TH_API void THNN_(Col2Im_updateOutput)( | TH_API void THNN_(Col2Im_updateOutput)( | ||||||
|           THNNState *state, |           THNNState *state, | ||||||
|           THTensor *input, |           THTensor *input, | ||||||
|           THTensor *output, |           THTensor *output, | ||||||
|           int outputHeight, int outputWidth, |           int64_t outputHeight, int64_t outputWidth, | ||||||
|           int kH, int kW, |           int64_t kH, int64_t kW, | ||||||
|           int dH, int dW, |           int64_t dH, int64_t dW, | ||||||
|           int padH, int padW, |           int64_t padH, int64_t padW, | ||||||
|           int sH, int sW); |           int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| TH_API void THNN_(Col2Im_updateGradInput)( | TH_API void THNN_(Col2Im_updateGradInput)( | ||||||
|           THNNState *state, |           THNNState *state, | ||||||
|           THTensor *gradOutput, |           THTensor *gradOutput, | ||||||
|           THTensor *gradInput, |           THTensor *gradInput, | ||||||
|           int kH, int kW, |           int64_t kH, int64_t kW, | ||||||
|           int dH, int dW, |           int64_t dH, int64_t dW, | ||||||
|           int padH, int padW, |           int64_t padH, int64_t padW, | ||||||
|           int sH, int sW); |           int64_t sH, int64_t sW); | ||||||
|  |  | ||||||
| TH_API void THNN_(L1Cost_updateOutput)( | TH_API void THNN_(L1Cost_updateOutput)( | ||||||
|           THNNState *state,            // library's state |           THNNState *state,            // library's state | ||||||
|  | |||||||
| @ -224,11 +224,7 @@ if(USE_CUDA) | |||||||
|   # it. We will then manually add the cudart library as interface libs. |   # it. We will then manually add the cudart library as interface libs. | ||||||
|   set(__tmp ${CUDA_LIBRARIES}) |   set(__tmp ${CUDA_LIBRARIES}) | ||||||
|   set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) |   set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) | ||||||
|   if(CAFFE2_STATIC_LINK_CUDA) |   torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS}) | ||||||
|     torch_cuda_based_add_library(caffe2_gpu STATIC ${Caffe2_GPU_SRCS}) |  | ||||||
|   else() |  | ||||||
|     torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS}) |  | ||||||
|   endif() |  | ||||||
|   set(CUDA_LIBRARIES ${__tmp}) |   set(CUDA_LIBRARIES ${__tmp}) | ||||||
|   target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart) |   target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart) | ||||||
|  |  | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| #include "caffe2/utils/proto_wrap.h" | #include "caffe2/utils/proto_wrap.h" | ||||||
|  | #include "caffe2/core/common.h" | ||||||
|  |  | ||||||
| #include <google/protobuf/stubs/common.h> | #include <google/protobuf/stubs/common.h> | ||||||
| #include <google/protobuf/generated_message_util.h> | #include <google/protobuf/generated_message_util.h> | ||||||
| @ -8,7 +9,7 @@ namespace caffe { | |||||||
| // Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | // Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | ||||||
| // used to avoid duplicated global variable in the case when protobuf | // used to avoid duplicated global variable in the case when protobuf | ||||||
| // is built with hidden visibility. | // is built with hidden visibility. | ||||||
| const ::std::string& GetEmptyStringAlreadyInited() { | CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() { | ||||||
|   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); |   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @ -19,7 +20,7 @@ namespace ONNX_NAMESPACE { | |||||||
| // ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | // ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | ||||||
| // used to avoid duplicated global variable in the case when protobuf | // used to avoid duplicated global variable in the case when protobuf | ||||||
| // is built with hidden visibility. | // is built with hidden visibility. | ||||||
| const ::std::string& GetEmptyStringAlreadyInited() { | CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() { | ||||||
|   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); |   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @ -30,7 +31,7 @@ namespace caffe2 { | |||||||
| // Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | // Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | ||||||
| // used to avoid duplicated global variable in the case when protobuf | // used to avoid duplicated global variable in the case when protobuf | ||||||
| // is built with hidden visibility. | // is built with hidden visibility. | ||||||
| const ::std::string& GetEmptyStringAlreadyInited() { | CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() { | ||||||
|   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); |   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | |||||||
| @ -1102,6 +1102,11 @@ Linear functions | |||||||
|  |  | ||||||
| .. autofunction:: linear | .. autofunction:: linear | ||||||
|  |  | ||||||
|  | :hidden:`bilinear` | ||||||
|  | ~~~~~~~~~~~~~~~~ | ||||||
|  |  | ||||||
|  | .. autofunction:: bilinear | ||||||
|  |  | ||||||
| Dropout functions | Dropout functions | ||||||
| ----------------- | ----------------- | ||||||
|  |  | ||||||
|  | |||||||
| @ -337,6 +337,7 @@ view of a storage and defines numeric operations on it. | |||||||
|    .. automethod:: rsqrt |    .. automethod:: rsqrt | ||||||
|    .. automethod:: rsqrt_ |    .. automethod:: rsqrt_ | ||||||
|    .. automethod:: scatter_ |    .. automethod:: scatter_ | ||||||
|  |    .. automethod:: scatter_add_ | ||||||
|    .. automethod:: select |    .. automethod:: select | ||||||
|    .. automethod:: set_ |    .. automethod:: set_ | ||||||
|    .. automethod:: share_memory_ |    .. automethod:: share_memory_ | ||||||
|  | |||||||
| @ -251,6 +251,7 @@ Spectral Ops | |||||||
|  |  | ||||||
| Other Operations | Other Operations | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  | .. autofunction:: bincount | ||||||
| .. autofunction:: cross | .. autofunction:: cross | ||||||
| .. autofunction:: diag | .. autofunction:: diag | ||||||
| .. autofunction:: diagflat | .. autofunction:: diagflat | ||||||
| @ -258,6 +259,7 @@ Other Operations | |||||||
| .. autofunction:: einsum | .. autofunction:: einsum | ||||||
| .. autofunction:: flip | .. autofunction:: flip | ||||||
| .. autofunction:: histc | .. autofunction:: histc | ||||||
|  | .. autofunction:: meshgrid | ||||||
| .. autofunction:: renorm | .. autofunction:: renorm | ||||||
| .. autofunction:: trace | .. autofunction:: trace | ||||||
| .. autofunction:: tril | .. autofunction:: tril | ||||||
|  | |||||||
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							| @ -152,6 +152,8 @@ IS_LINUX = (platform.system() == 'Linux') | |||||||
| FULL_CAFFE2 = check_env_flag('FULL_CAFFE2') | FULL_CAFFE2 = check_env_flag('FULL_CAFFE2') | ||||||
| BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH') | BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH') | ||||||
|  |  | ||||||
|  | USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK') | ||||||
|  |  | ||||||
| NUM_JOBS = multiprocessing.cpu_count() | NUM_JOBS = multiprocessing.cpu_count() | ||||||
| max_jobs = os.getenv("MAX_JOBS") | max_jobs = os.getenv("MAX_JOBS") | ||||||
| if max_jobs is not None: | if max_jobs is not None: | ||||||
| @ -318,6 +320,8 @@ def build_libs(libs): | |||||||
|     if USE_CUDA: |     if USE_CUDA: | ||||||
|         my_env["CUDA_BIN_PATH"] = CUDA_HOME |         my_env["CUDA_BIN_PATH"] = CUDA_HOME | ||||||
|         build_libs_cmd += ['--use-cuda'] |         build_libs_cmd += ['--use-cuda'] | ||||||
|  |     if USE_CUDA_STATIC_LINK: | ||||||
|  |         build_libs_cmd += ['--cuda-static-link'] | ||||||
|     if USE_ROCM: |     if USE_ROCM: | ||||||
|         build_libs_cmd += ['--use-rocm'] |         build_libs_cmd += ['--use-rocm'] | ||||||
|     if USE_NNPACK: |     if USE_NNPACK: | ||||||
|  | |||||||
| @ -28,7 +28,7 @@ import errno | |||||||
| import torch | import torch | ||||||
| import torch.cuda | import torch.cuda | ||||||
| from torch._utils_internal import get_writable_path | from torch._utils_internal import get_writable_path | ||||||
| from torch._six import string_classes | from torch._six import string_classes, inf | ||||||
| import torch.backends.cudnn | import torch.backends.cudnn | ||||||
| import torch.backends.mkl | import torch.backends.mkl | ||||||
|  |  | ||||||
| @ -353,7 +353,7 @@ class TestCase(unittest.TestCase): | |||||||
|         elif isinstance(x, bool) and isinstance(y, bool): |         elif isinstance(x, bool) and isinstance(y, bool): | ||||||
|             super(TestCase, self).assertEqual(x, y, message) |             super(TestCase, self).assertEqual(x, y, message) | ||||||
|         elif isinstance(x, Number) and isinstance(y, Number): |         elif isinstance(x, Number) and isinstance(y, Number): | ||||||
|             if abs(x) == float('inf') or abs(y) == float('inf'): |             if abs(x) == inf or abs(y) == inf: | ||||||
|                 if allow_inf: |                 if allow_inf: | ||||||
|                     super(TestCase, self).assertEqual(x, y, message) |                     super(TestCase, self).assertEqual(x, y, message) | ||||||
|                 else: |                 else: | ||||||
|  | |||||||
| @ -10,6 +10,7 @@ from collections import OrderedDict | |||||||
| from itertools import product | from itertools import product | ||||||
| from operator import mul, itemgetter | from operator import mul, itemgetter | ||||||
| from functools import reduce, wraps | from functools import reduce, wraps | ||||||
|  | from torch._six import inf, nan | ||||||
| from torch.autograd.gradcheck import gradgradcheck, gradcheck | from torch.autograd.gradcheck import gradgradcheck, gradcheck | ||||||
| from torch.autograd.function import once_differentiable | from torch.autograd.function import once_differentiable | ||||||
| from torch.autograd.profiler import profile | from torch.autograd.profiler import profile | ||||||
| @ -1524,12 +1525,12 @@ class TestAutograd(TestCase): | |||||||
|         pyscalar = -12345.1 |         pyscalar = -12345.1 | ||||||
|         f[0] = pyscalar |         f[0] = pyscalar | ||||||
|         self.assertEqual(float(f), pyscalar) |         self.assertEqual(float(f), pyscalar) | ||||||
|         f[0] = float('nan') |         f[0] = nan | ||||||
|         self.assertTrue(math.isnan(float(f))) |         self.assertTrue(math.isnan(float(f))) | ||||||
|         f[0] = float('inf') |         f[0] = inf | ||||||
|         self.assertEqual(float(f), float('inf'), allow_inf=True) |         self.assertEqual(float(f), inf, allow_inf=True) | ||||||
|         f[0] = float('-inf') |         f[0] = -inf | ||||||
|         self.assertEqual(float(f), float('-inf'), allow_inf=True) |         self.assertEqual(float(f), -inf, allow_inf=True) | ||||||
|  |  | ||||||
|         # integral -> floating point |         # integral -> floating point | ||||||
|         # check we can convert something that loses precision |         # check we can convert something that loses precision | ||||||
| @ -1539,11 +1540,11 @@ class TestAutograd(TestCase): | |||||||
|         self.assertEqual(float(l), float(pyscalar)) |         self.assertEqual(float(l), float(pyscalar)) | ||||||
|  |  | ||||||
|         # floating point -> integral |         # floating point -> integral | ||||||
|         f[0] = float('nan') |         f[0] = nan | ||||||
|         self.assertRaises(ValueError, lambda: integral_conv(f[0])) |         self.assertRaises(ValueError, lambda: integral_conv(f[0])) | ||||||
|         f[0] = float('inf') |         f[0] = inf | ||||||
|         self.assertRaises(OverflowError, lambda: integral_conv(f[0])) |         self.assertRaises(OverflowError, lambda: integral_conv(f[0])) | ||||||
|         f[0] = float('-inf') |         f[0] = -inf | ||||||
|         self.assertRaises(OverflowError, lambda: integral_conv(f[0])) |         self.assertRaises(OverflowError, lambda: integral_conv(f[0])) | ||||||
|         f[0] = sys.float_info.max |         f[0] = sys.float_info.max | ||||||
|         self.assertEqual(integral_conv(f), sys.float_info.max) |         self.assertEqual(integral_conv(f), sys.float_info.max) | ||||||
| @ -1558,9 +1559,9 @@ class TestAutograd(TestCase): | |||||||
|         test_nonzero(l, -2, True) |         test_nonzero(l, -2, True) | ||||||
|         test_nonzero(f, 0.0, False) |         test_nonzero(f, 0.0, False) | ||||||
|         test_nonzero(f, sys.float_info.min, True) |         test_nonzero(f, sys.float_info.min, True) | ||||||
|         test_nonzero(f, float('nan'), bool(float('nan'))) |         test_nonzero(f, nan, bool(nan)) | ||||||
|         test_nonzero(f, float('inf'), bool(float('inf'))) |         test_nonzero(f, inf, bool(inf)) | ||||||
|         test_nonzero(f, float('-inf'), bool(float('-inf'))) |         test_nonzero(f, -inf, bool(-inf)) | ||||||
|  |  | ||||||
|     def test_pyscalar_conversions(self): |     def test_pyscalar_conversions(self): | ||||||
|         self._test_pyscalar_conversions(lambda x: x, lambda x: int(x)) |         self._test_pyscalar_conversions(lambda x: x, lambda x: int(x)) | ||||||
| @ -2825,7 +2826,7 @@ method_tests = [ | |||||||
|     ('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]), |     ('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]), | ||||||
|     ('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]), |     ('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]), | ||||||
|     ('renorm', (S, S, S), (1, 2, 3), 'norm_1'), |     ('renorm', (S, S, S), (1, 2, 3), 'norm_1'), | ||||||
|     ('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'), |     ('renorm', (S, S, S), (inf, 2, 0.5), 'norm_inf'), | ||||||
|     ('repeat', (S,), (2,), 'single_number'), |     ('repeat', (S,), (2,), 'single_number'), | ||||||
|     ('repeat', (), (2, 3), 'scalar'), |     ('repeat', (), (2, 3), 'scalar'), | ||||||
|     ('repeat', (2, 2), (3, 2)), |     ('repeat', (2, 2), (3, 2)), | ||||||
| @ -2917,7 +2918,7 @@ method_tests = [ | |||||||
|     ('norm', (S, S), (0.5,), '0_5'), |     ('norm', (S, S), (0.5,), '0_5'), | ||||||
|     ('norm', (S, S), (1,), '1'), |     ('norm', (S, S), (1,), '1'), | ||||||
|     ('norm', (S, S), (3,), '3'), |     ('norm', (S, S), (3,), '3'), | ||||||
|     ('norm', (S, S), (float('inf'),), 'inf'), |     ('norm', (S, S), (inf,), 'inf'), | ||||||
|     ('norm', (S, S), (-1,), 'neg_1'), |     ('norm', (S, S), (-1,), 'neg_1'), | ||||||
|     ('norm', (S, S), (-0.5,), 'neg_0_5'), |     ('norm', (S, S), (-0.5,), 'neg_0_5'), | ||||||
|     ('norm', (S, S), (-1.5,), 'neg_1_5'), |     ('norm', (S, S), (-1.5,), 'neg_1_5'), | ||||||
|  | |||||||
| @ -1,3 +1,4 @@ | |||||||
|  | import os | ||||||
| import unittest | import unittest | ||||||
| import sys | import sys | ||||||
|  |  | ||||||
| @ -15,7 +16,10 @@ import common | |||||||
|  |  | ||||||
| from torch.utils.cpp_extension import CUDA_HOME | from torch.utils.cpp_extension import CUDA_HOME | ||||||
| TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None | TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None | ||||||
| TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_available() | TEST_CUDNN = False | ||||||
|  | if TEST_CUDA: | ||||||
|  |     CUDNN_HEADER_EXISTS = os.path.isfile(os.path.join(CUDA_HOME, 'include/cudnn.h')) | ||||||
|  |     TEST_CUDNN = TEST_CUDA and CUDNN_HEADER_EXISTS and torch.backends.cudnn.is_available() | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestCppExtension(common.TestCase): | class TestCppExtension(common.TestCase): | ||||||
|  | |||||||
| @ -12,6 +12,7 @@ import torch | |||||||
| import torch.cuda | import torch.cuda | ||||||
| import torch.cuda.comm as comm | import torch.cuda.comm as comm | ||||||
| from torch import multiprocessing as mp | from torch import multiprocessing as mp | ||||||
|  | from torch._six import inf, nan | ||||||
|  |  | ||||||
| from test_torch import TestTorch | from test_torch import TestTorch | ||||||
| from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \ | from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \ | ||||||
| @ -782,7 +783,7 @@ class TestCuda(TestCase): | |||||||
|             if not end0: |             if not end0: | ||||||
|                 gen1_max_times = torch.LongTensor(1).random_(0, 3)[0] |                 gen1_max_times = torch.LongTensor(1).random_(0, 3)[0] | ||||||
|             else: |             else: | ||||||
|                 gen1_max_times = float('inf') |                 gen1_max_times = inf | ||||||
|             t = 0 |             t = 0 | ||||||
|             while t < gen1_max_times and not end1: |             while t < gen1_max_times and not end1: | ||||||
|                 end1 = advance(gen1, end1) |                 end1 = advance(gen1, end1) | ||||||
| @ -901,7 +902,7 @@ class TestCuda(TestCase): | |||||||
|                  (lambda x: x.max(0)[0], 'max_dim')] |                  (lambda x: x.max(0)[0], 'max_dim')] | ||||||
|         for f, name in tests: |         for f, name in tests: | ||||||
|             a = torch.arange(25.0).view(5, 5) |             a = torch.arange(25.0).view(5, 5) | ||||||
|             a[2, 2] = float('nan') |             a[2, 2] = nan | ||||||
|             actual = f(a.cuda()).cpu() |             actual = f(a.cuda()).cpu() | ||||||
|             expected = f(a).cpu() |             expected = f(a).cpu() | ||||||
|             self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name)) |             self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name)) | ||||||
| @ -1503,9 +1504,9 @@ class TestCuda(TestCase): | |||||||
|     def test_multinomial_invalid_probs_cuda(self): |     def test_multinomial_invalid_probs_cuda(self): | ||||||
|         test_method = TestCuda._test_multinomial_invalid_probs_cuda |         test_method = TestCuda._test_multinomial_invalid_probs_cuda | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, -1])) |         self._spawn_method(test_method, torch.Tensor([0, -1])) | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, float('inf')])) |         self._spawn_method(test_method, torch.Tensor([0, inf])) | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, float('-inf')])) |         self._spawn_method(test_method, torch.Tensor([0, -inf])) | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, float('nan')])) |         self._spawn_method(test_method, torch.Tensor([0, nan])) | ||||||
|  |  | ||||||
|     def test_broadcast(self): |     def test_broadcast(self): | ||||||
|         TestTorch._test_broadcast(self, lambda t: t.cuda()) |         TestTorch._test_broadcast(self, lambda t: t.cuda()) | ||||||
| @ -1686,7 +1687,6 @@ class TestCuda(TestCase): | |||||||
|         cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111, |         cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111, | ||||||
|                                   -100.99999994, -1931.99999994, 0.000000111, |                                   -100.99999994, -1931.99999994, 0.000000111, | ||||||
|                                   -0.000000111, 0, -1, -2, -931]) |                                   -0.000000111, 0, -1, -2, -931]) | ||||||
|         nan = float('nan') |  | ||||||
|         expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan]) |         expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan]) | ||||||
|         gpu_tensor = cpu_tensor.cuda() |         gpu_tensor = cpu_tensor.cuda() | ||||||
|         cpu_out = cpu_tensor.digamma() |         cpu_out = cpu_tensor.digamma() | ||||||
|  | |||||||
| @ -30,6 +30,7 @@ from itertools import product | |||||||
| from random import shuffle | from random import shuffle | ||||||
|  |  | ||||||
| import torch | import torch | ||||||
|  | from torch._six import inf | ||||||
| from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN | from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN | ||||||
| from common_cuda import TEST_CUDA | from common_cuda import TEST_CUDA | ||||||
| from torch.autograd import grad, gradcheck | from torch.autograd import grad, gradcheck | ||||||
| @ -782,7 +783,7 @@ class TestDistributions(TestCase): | |||||||
|         s = 0.3 |         s = 0.3 | ||||||
|         self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3)) |         self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3)) | ||||||
|         self.assertEqual(Geometric(1).sample(), 0) |         self.assertEqual(Geometric(1).sample(), 0) | ||||||
|         self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -float('inf'), allow_inf=True) |         self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -inf, allow_inf=True) | ||||||
|         self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0) |         self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0) | ||||||
|         self.assertFalse(Geometric(p).sample().requires_grad) |         self.assertFalse(Geometric(p).sample().requires_grad) | ||||||
|         self.assertEqual(Geometric(r).sample((8,)).size(), (8,)) |         self.assertEqual(Geometric(r).sample((8,)).size(), (8,)) | ||||||
| @ -1162,8 +1163,8 @@ class TestDistributions(TestCase): | |||||||
|         uniform = Uniform(low_1d, high_1d) |         uniform = Uniform(low_1d, high_1d) | ||||||
|         above_high = torch.tensor([4.0]) |         above_high = torch.tensor([4.0]) | ||||||
|         below_low = torch.tensor([-1.0]) |         below_low = torch.tensor([-1.0]) | ||||||
|         self.assertEqual(uniform.log_prob(above_high).item(), -float('inf'), allow_inf=True) |         self.assertEqual(uniform.log_prob(above_high).item(), -inf, allow_inf=True) | ||||||
|         self.assertEqual(uniform.log_prob(below_low).item(), -float('inf'), allow_inf=True) |         self.assertEqual(uniform.log_prob(below_low).item(), -inf, allow_inf=True) | ||||||
|  |  | ||||||
|         # check cdf computation when value outside range |         # check cdf computation when value outside range | ||||||
|         self.assertEqual(uniform.cdf(below_low).item(), 0) |         self.assertEqual(uniform.cdf(below_low).item(), 0) | ||||||
| @ -1190,7 +1191,7 @@ class TestDistributions(TestCase): | |||||||
|         loc_1d = torch.zeros(1, requires_grad=True) |         loc_1d = torch.zeros(1, requires_grad=True) | ||||||
|         scale_1d = torch.ones(1, requires_grad=True) |         scale_1d = torch.ones(1, requires_grad=True) | ||||||
|         self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean)) |         self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean)) | ||||||
|         self.assertEqual(Cauchy(loc_1d, scale_1d).variance, float('inf'), allow_inf=True) |         self.assertEqual(Cauchy(loc_1d, scale_1d).variance, inf, allow_inf=True) | ||||||
|         self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5)) |         self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5)) | ||||||
|         self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5)) |         self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5)) | ||||||
|         self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,)) |         self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,)) | ||||||
| @ -1216,7 +1217,7 @@ class TestDistributions(TestCase): | |||||||
|         scale = torch.ones(5, 5, requires_grad=True) |         scale = torch.ones(5, 5, requires_grad=True) | ||||||
|         scale_1d = torch.ones(1, requires_grad=True) |         scale_1d = torch.ones(1, requires_grad=True) | ||||||
|         self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean)) |         self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean)) | ||||||
|         self.assertEqual(HalfCauchy(scale_1d).variance, float('inf'), allow_inf=True) |         self.assertEqual(HalfCauchy(scale_1d).variance, inf, allow_inf=True) | ||||||
|         self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5)) |         self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5)) | ||||||
|         self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5)) |         self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5)) | ||||||
|         self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,)) |         self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,)) | ||||||
| @ -1714,8 +1715,8 @@ class TestDistributions(TestCase): | |||||||
|         alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) |         alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) | ||||||
|         scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) |         scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) | ||||||
|         alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) |         alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) | ||||||
|         self.assertEqual(Pareto(scale_1d, 0.5).mean, float('inf'), allow_inf=True) |         self.assertEqual(Pareto(scale_1d, 0.5).mean, inf, allow_inf=True) | ||||||
|         self.assertEqual(Pareto(scale_1d, 0.5).variance, float('inf'), allow_inf=True) |         self.assertEqual(Pareto(scale_1d, 0.5).variance, inf, allow_inf=True) | ||||||
|         self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3)) |         self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3)) | ||||||
|         self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3)) |         self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3)) | ||||||
|         self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1)) |         self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1)) | ||||||
| @ -1832,7 +1833,7 @@ class TestDistributions(TestCase): | |||||||
|         df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) |         df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) | ||||||
|         self.assertTrue(is_all_nan(StudentT(1).mean)) |         self.assertTrue(is_all_nan(StudentT(1).mean)) | ||||||
|         self.assertTrue(is_all_nan(StudentT(1).variance)) |         self.assertTrue(is_all_nan(StudentT(1).variance)) | ||||||
|         self.assertEqual(StudentT(2).variance, float('inf'), allow_inf=True) |         self.assertEqual(StudentT(2).variance, inf, allow_inf=True) | ||||||
|         self.assertEqual(StudentT(df).sample().size(), (2, 3)) |         self.assertEqual(StudentT(df).sample().size(), (2, 3)) | ||||||
|         self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3)) |         self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3)) | ||||||
|         self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1)) |         self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1)) | ||||||
| @ -2962,7 +2963,7 @@ class TestKL(TestCase): | |||||||
|  |  | ||||||
|     def test_kl_infinite(self): |     def test_kl_infinite(self): | ||||||
|         for p, q in self.infinite_examples: |         for p, q in self.infinite_examples: | ||||||
|             self.assertTrue((kl_divergence(p, q) == float('inf')).all(), |             self.assertTrue((kl_divergence(p, q) == inf).all(), | ||||||
|                             'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__)) |                             'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__)) | ||||||
|  |  | ||||||
|     def test_kl_edgecases(self): |     def test_kl_edgecases(self): | ||||||
| @ -2996,7 +2997,7 @@ class TestKL(TestCase): | |||||||
|                     continue |                     continue | ||||||
|                 x = dist.sample(sample_shape=(60000,)) |                 x = dist.sample(sample_shape=(60000,)) | ||||||
|                 expected = -dist.log_prob(x).mean(0) |                 expected = -dist.log_prob(x).mean(0) | ||||||
|                 ignore = (expected == float('inf')) |                 ignore = (expected == inf) | ||||||
|                 expected[ignore] = actual[ignore] |                 expected[ignore] = actual[ignore] | ||||||
|                 self.assertEqual(actual, expected, prec=0.2, message='\n'.join([ |                 self.assertEqual(actual, expected, prec=0.2, message='\n'.join([ | ||||||
|                     '{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)), |                     '{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)), | ||||||
| @ -3157,12 +3158,12 @@ class TestNumericalStability(TestCase): | |||||||
|  |  | ||||||
|     def test_categorical_log_prob_with_logits(self): |     def test_categorical_log_prob_with_logits(self): | ||||||
|         for dtype in ([torch.float, torch.double]): |         for dtype in ([torch.float, torch.double]): | ||||||
|             p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True) |             p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True) | ||||||
|             categorical = OneHotCategorical(logits=p) |             categorical = OneHotCategorical(logits=p) | ||||||
|             log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype)) |             log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype)) | ||||||
|             self.assertEqual(log_pdf_prob_1.item(), 0) |             self.assertEqual(log_pdf_prob_1.item(), 0) | ||||||
|             log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype)) |             log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype)) | ||||||
|             self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True) |             self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True) | ||||||
|  |  | ||||||
|     def test_multinomial_log_prob(self): |     def test_multinomial_log_prob(self): | ||||||
|         for dtype in ([torch.float, torch.double]): |         for dtype in ([torch.float, torch.double]): | ||||||
| @ -3174,12 +3175,12 @@ class TestNumericalStability(TestCase): | |||||||
|  |  | ||||||
|     def test_multinomial_log_prob_with_logits(self): |     def test_multinomial_log_prob_with_logits(self): | ||||||
|         for dtype in ([torch.float, torch.double]): |         for dtype in ([torch.float, torch.double]): | ||||||
|             p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True) |             p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True) | ||||||
|             multinomial = Multinomial(10, logits=p) |             multinomial = Multinomial(10, logits=p) | ||||||
|             log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype)) |             log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype)) | ||||||
|             self.assertEqual(log_pdf_prob_1.item(), 0) |             self.assertEqual(log_pdf_prob_1.item(), 0) | ||||||
|             log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype)) |             log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype)) | ||||||
|             self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True) |             self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True) | ||||||
|  |  | ||||||
|  |  | ||||||
| class TestLazyLogitsInitialization(TestCase): | class TestLazyLogitsInitialization(TestCase): | ||||||
|  | |||||||
| @ -15,6 +15,7 @@ import hashlib | |||||||
| import os | import os | ||||||
|  |  | ||||||
| import torch | import torch | ||||||
|  | from torch._six import inf, nan | ||||||
| import torch.backends.cudnn as cudnn | import torch.backends.cudnn as cudnn | ||||||
| import torch.nn as nn | import torch.nn as nn | ||||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||||
| @ -1465,7 +1466,7 @@ class TestNN(NNTestCase): | |||||||
|  |  | ||||||
|         def compute_norm(norm_type): |         def compute_norm(norm_type): | ||||||
|             norm_type = float(norm_type) |             norm_type = float(norm_type) | ||||||
|             if norm_type != float('inf'): |             if norm_type != inf: | ||||||
|                 total_norm = 0 |                 total_norm = 0 | ||||||
|                 for p in l.parameters(): |                 for p in l.parameters(): | ||||||
|                     total_norm += p.grad.data.abs().pow(norm_type).sum() |                     total_norm += p.grad.data.abs().pow(norm_type).sum() | ||||||
| @ -1560,8 +1561,6 @@ class TestNN(NNTestCase): | |||||||
|     # We don't want to make propagating NaN a hard requirement on ops, but for |     # We don't want to make propagating NaN a hard requirement on ops, but for | ||||||
|     # these easy ones, we should make them do so. |     # these easy ones, we should make them do so. | ||||||
|     def _test_nonlinearity_propagate_nan(self, device): |     def _test_nonlinearity_propagate_nan(self, device): | ||||||
|         nan = float('nan') |  | ||||||
|  |  | ||||||
|         def test(nonlinearity, *args, **kwargs): |         def test(nonlinearity, *args, **kwargs): | ||||||
|             x = torch.tensor([nan], device=device) |             x = torch.tensor([nan], device=device) | ||||||
|             fn = getattr(F, nonlinearity) |             fn = getattr(F, nonlinearity) | ||||||
| @ -2547,7 +2546,7 @@ class TestNN(NNTestCase): | |||||||
|             for num_dim in [1, 2, 3]: |             for num_dim in [1, 2, 3]: | ||||||
|                 fn_name = '{}max_pool{}d'.format(adaptive, num_dim) |                 fn_name = '{}max_pool{}d'.format(adaptive, num_dim) | ||||||
|                 fn = getattr(F, fn_name) |                 fn = getattr(F, fn_name) | ||||||
|                 x = torch.full([1, 1] + num_dim * [3], float('nan')) |                 x = torch.full([1, 1] + num_dim * [3], nan) | ||||||
|                 res = fn(x, 1 if adaptive else 3) |                 res = fn(x, 1 if adaptive else 3) | ||||||
|                 self.assertTrue(math.isnan(res.item())) |                 self.assertTrue(math.isnan(res.item())) | ||||||
|  |  | ||||||
|  | |||||||
| @ -3,6 +3,7 @@ import unittest | |||||||
| import functools | import functools | ||||||
| from copy import deepcopy | from copy import deepcopy | ||||||
| import torch | import torch | ||||||
|  | from torch._six import inf | ||||||
| import torch.optim as optim | import torch.optim as optim | ||||||
| import torch.legacy.optim as old_optim | import torch.legacy.optim as old_optim | ||||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||||
| @ -478,8 +479,8 @@ class TestOptim(TestCase): | |||||||
|     @unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN") |     @unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN") | ||||||
|     def test_lbfgs_return_type(self): |     def test_lbfgs_return_type(self): | ||||||
|         params = [torch.randn(10, 5), torch.randn(10)] |         params = [torch.randn(10, 5), torch.randn(10)] | ||||||
|         opt1 = optim.LBFGS(params, 0.01, tolerance_grad=float('inf')) |         opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf) | ||||||
|         opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-float('inf')) |         opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf) | ||||||
|  |  | ||||||
|         def closure(): |         def closure(): | ||||||
|             return torch.Tensor([10]) |             return torch.Tensor([10]) | ||||||
|  | |||||||
| @ -16,6 +16,7 @@ import gzip | |||||||
| from torch._utils_internal import get_file_path, get_file_path_2 | from torch._utils_internal import get_file_path, get_file_path_2 | ||||||
| from torch.utils.dlpack import from_dlpack, to_dlpack | from torch.utils.dlpack import from_dlpack, to_dlpack | ||||||
| from torch._utils import _rebuild_tensor | from torch._utils import _rebuild_tensor | ||||||
|  | from torch._six import inf, nan | ||||||
| from itertools import product, combinations | from itertools import product, combinations | ||||||
| from functools import reduce | from functools import reduce | ||||||
| from torch import multiprocessing as mp | from torch import multiprocessing as mp | ||||||
| @ -241,17 +242,17 @@ class TestTorch(TestCase): | |||||||
|         self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0)) |         self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0)) | ||||||
|         self.assertFalse(torch.allclose(x, y)) |         self.assertFalse(torch.allclose(x, y)) | ||||||
|         self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8]))) |         self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8]))) | ||||||
|         x = torch.tensor([2.0, 3.0, float('nan')]) |         x = torch.tensor([2.0, 3.0, nan]) | ||||||
|         y = torch.tensor([2.01, 3.01, float('nan')]) |         y = torch.tensor([2.01, 3.01, nan]) | ||||||
|         self.assertFalse(torch.allclose(x, y, rtol=1e-2)) |         self.assertFalse(torch.allclose(x, y, rtol=1e-2)) | ||||||
|         self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True)) |         self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True)) | ||||||
|         self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True)) |         self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True)) | ||||||
|         inf = torch.tensor([float('inf')]) |         inf_t = torch.tensor([inf]) | ||||||
|         self.assertTrue(torch.allclose(inf, inf)) |         self.assertTrue(torch.allclose(inf_t, inf_t)) | ||||||
|         self.assertTrue(torch.allclose(-inf, -inf)) |         self.assertTrue(torch.allclose(-inf_t, -inf_t)) | ||||||
|         self.assertFalse(torch.allclose(inf, -inf)) |         self.assertFalse(torch.allclose(inf_t, -inf_t)) | ||||||
|         self.assertFalse(torch.allclose(inf, torch.tensor([1e20]))) |         self.assertFalse(torch.allclose(inf_t, torch.tensor([1e20]))) | ||||||
|         self.assertFalse(torch.allclose(-inf, torch.tensor([-1e20]))) |         self.assertFalse(torch.allclose(-inf_t, torch.tensor([-1e20]))) | ||||||
|  |  | ||||||
|     def test_linear_algebra_scalar_raises(self): |     def test_linear_algebra_scalar_raises(self): | ||||||
|         m = torch.randn(5, 5) |         m = torch.randn(5, 5) | ||||||
| @ -359,13 +360,13 @@ class TestTorch(TestCase): | |||||||
|             try: |             try: | ||||||
|                 return math.sinh(x) |                 return math.sinh(x) | ||||||
|             except OverflowError: |             except OverflowError: | ||||||
|                 return float('inf') if x > 0 else float('-inf') |                 return inf if x > 0 else -inf | ||||||
|         self._test_math(torch.sinh, sinh) |         self._test_math(torch.sinh, sinh) | ||||||
|  |  | ||||||
|     def test_lgamma(self): |     def test_lgamma(self): | ||||||
|         def lgamma(x): |         def lgamma(x): | ||||||
|             if x <= 0 and x == int(x): |             if x <= 0 and x == int(x): | ||||||
|                 return float('inf') |                 return inf | ||||||
|             return math.lgamma(x) |             return math.lgamma(x) | ||||||
|         self._test_math(torch.lgamma, lgamma) |         self._test_math(torch.lgamma, lgamma) | ||||||
|  |  | ||||||
| @ -392,14 +393,14 @@ class TestTorch(TestCase): | |||||||
|         # scipy 1.1.0 changed when it returns +/-inf vs. NaN |         # scipy 1.1.0 changed when it returns +/-inf vs. NaN | ||||||
|         def torch_digamma_without_inf(inp): |         def torch_digamma_without_inf(inp): | ||||||
|             res = torch.digamma(inp) |             res = torch.digamma(inp) | ||||||
|             res[(res == float('-inf')) | (res == float('inf'))] = float('nan') |             res[(res == -inf) | (res == inf)] = nan | ||||||
|             return res |             return res | ||||||
|  |  | ||||||
|         def scipy_digamma_without_inf(inp): |         def scipy_digamma_without_inf(inp): | ||||||
|             res = digamma(inp) |             res = digamma(inp) | ||||||
|             if np.isscalar(res): |             if np.isscalar(res): | ||||||
|                 return res if np.isfinite(res) else float('nan') |                 return res if np.isfinite(res) else nan | ||||||
|             res[np.isinf(res)] = float('nan') |             res[np.isinf(res)] = nan | ||||||
|             return res |             return res | ||||||
|  |  | ||||||
|         self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input()) |         self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input()) | ||||||
| @ -413,7 +414,7 @@ class TestTorch(TestCase): | |||||||
|                             self._digamma_input(test_poles=False)) |                             self._digamma_input(test_poles=False)) | ||||||
|  |  | ||||||
|     def test_asin(self): |     def test_asin(self): | ||||||
|         self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else float('nan')) |         self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else nan) | ||||||
|  |  | ||||||
|     def test_cos(self): |     def test_cos(self): | ||||||
|         self._test_math_by_name('cos') |         self._test_math_by_name('cos') | ||||||
| @ -425,11 +426,11 @@ class TestTorch(TestCase): | |||||||
|             except OverflowError: |             except OverflowError: | ||||||
|                 # Return inf on overflow. |                 # Return inf on overflow. | ||||||
|                 # See http://en.cppreference.com/w/cpp/numeric/math/cosh |                 # See http://en.cppreference.com/w/cpp/numeric/math/cosh | ||||||
|                 return float('inf') |                 return inf | ||||||
|         self._test_math(torch.cosh, cosh) |         self._test_math(torch.cosh, cosh) | ||||||
|  |  | ||||||
|     def test_acos(self): |     def test_acos(self): | ||||||
|         self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else float('nan')) |         self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else nan) | ||||||
|  |  | ||||||
|     def test_tan(self): |     def test_tan(self): | ||||||
|         self._test_math_by_name('tan') |         self._test_math_by_name('tan') | ||||||
| @ -443,36 +444,36 @@ class TestTorch(TestCase): | |||||||
|     def test_log(self): |     def test_log(self): | ||||||
|         def log(x): |         def log(x): | ||||||
|             if x == 0: |             if x == 0: | ||||||
|                 return float('-inf') |                 return -inf | ||||||
|             elif x < 0: |             elif x < 0: | ||||||
|                 return float('nan') |                 return nan | ||||||
|             return math.log(x) |             return math.log(x) | ||||||
|         self._test_math(torch.log, log) |         self._test_math(torch.log, log) | ||||||
|  |  | ||||||
|     def test_log10(self): |     def test_log10(self): | ||||||
|         def log10(x): |         def log10(x): | ||||||
|             if x == 0: |             if x == 0: | ||||||
|                 return float('-inf') |                 return -inf | ||||||
|             elif x < 0: |             elif x < 0: | ||||||
|                 return float('nan') |                 return nan | ||||||
|             return math.log10(x) |             return math.log10(x) | ||||||
|         self._test_math(torch.log10, log10) |         self._test_math(torch.log10, log10) | ||||||
|  |  | ||||||
|     def test_log1p(self): |     def test_log1p(self): | ||||||
|         def log1p(x): |         def log1p(x): | ||||||
|             if x == -1: |             if x == -1: | ||||||
|                 return float('-inf') |                 return -inf | ||||||
|             elif x < -1: |             elif x < -1: | ||||||
|                 return float('nan') |                 return nan | ||||||
|             return math.log1p(x) |             return math.log1p(x) | ||||||
|         self._test_math(torch.log1p, log1p) |         self._test_math(torch.log1p, log1p) | ||||||
|  |  | ||||||
|     def test_log2(self): |     def test_log2(self): | ||||||
|         def log2(x): |         def log2(x): | ||||||
|             if x == 0: |             if x == 0: | ||||||
|                 return float('-inf') |                 return -inf | ||||||
|             elif x < 0: |             elif x < 0: | ||||||
|                 return float('nan') |                 return nan | ||||||
|             try: |             try: | ||||||
|                 return math.log2(x) |                 return math.log2(x) | ||||||
|             except AttributeError: |             except AttributeError: | ||||||
| @ -480,7 +481,7 @@ class TestTorch(TestCase): | |||||||
|         self._test_math(torch.log2, log2) |         self._test_math(torch.log2, log2) | ||||||
|  |  | ||||||
|     def test_sqrt(self): |     def test_sqrt(self): | ||||||
|         self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else float('nan')) |         self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else nan) | ||||||
|  |  | ||||||
|     def test_erf(self): |     def test_erf(self): | ||||||
|         self._test_math_by_name('erf') |         self._test_math_by_name('erf') | ||||||
| @ -493,9 +494,9 @@ class TestTorch(TestCase): | |||||||
|             inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.) |             inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.) | ||||||
|             self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues)) |             self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues)) | ||||||
|             # test inf |             # test inf | ||||||
|             self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([float('-inf'), float('inf')]))) |             self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([-inf, inf]))) | ||||||
|             # test nan |             # test nan | ||||||
|             self.assertEqual(tensor([-2, 2]).erfinv(), tensor([float('nan'), float('nan')])) |             self.assertEqual(tensor([-2, 2]).erfinv(), tensor([nan, nan])) | ||||||
|  |  | ||||||
|         checkType(torch.FloatTensor) |         checkType(torch.FloatTensor) | ||||||
|         checkType(torch.DoubleTensor) |         checkType(torch.DoubleTensor) | ||||||
| @ -505,7 +506,7 @@ class TestTorch(TestCase): | |||||||
|             try: |             try: | ||||||
|                 return math.exp(x) |                 return math.exp(x) | ||||||
|             except OverflowError: |             except OverflowError: | ||||||
|                 return float('inf') |                 return inf | ||||||
|         self._test_math(torch.exp, exp) |         self._test_math(torch.exp, exp) | ||||||
|  |  | ||||||
|     def test_expm1(self): |     def test_expm1(self): | ||||||
| @ -513,7 +514,7 @@ class TestTorch(TestCase): | |||||||
|             try: |             try: | ||||||
|                 return math.expm1(x) |                 return math.expm1(x) | ||||||
|             except OverflowError: |             except OverflowError: | ||||||
|                 return float('inf') |                 return inf | ||||||
|         self._test_math(torch.expm1, expm1) |         self._test_math(torch.expm1, expm1) | ||||||
|  |  | ||||||
|     def test_floor(self): |     def test_floor(self): | ||||||
| @ -525,9 +526,9 @@ class TestTorch(TestCase): | |||||||
|     def test_rsqrt(self): |     def test_rsqrt(self): | ||||||
|         def rsqrt(x): |         def rsqrt(x): | ||||||
|             if x == 0: |             if x == 0: | ||||||
|                 return float('inf') |                 return inf | ||||||
|             elif x < 0: |             elif x < 0: | ||||||
|                 return float('nan') |                 return nan | ||||||
|             return 1.0 / math.sqrt(x) |             return 1.0 / math.sqrt(x) | ||||||
|  |  | ||||||
|         self._test_math(torch.rsqrt, rsqrt) |         self._test_math(torch.rsqrt, rsqrt) | ||||||
| @ -615,7 +616,7 @@ class TestTorch(TestCase): | |||||||
|         # NaNs |         # NaNs | ||||||
|         for index in (0, 4, 99): |         for index in (0, 4, 99): | ||||||
|             m1 = torch.randn(100) |             m1 = torch.randn(100) | ||||||
|             m1[index] = float('nan') |             m1[index] = nan | ||||||
|             res1val, res1ind = torch.max(m1, 0) |             res1val, res1ind = torch.max(m1, 0) | ||||||
|             self.assertTrue(math.isnan(res1val)) |             self.assertTrue(math.isnan(res1val)) | ||||||
|             self.assertEqual(res1ind, index) |             self.assertEqual(res1ind, index) | ||||||
| @ -633,14 +634,14 @@ class TestTorch(TestCase): | |||||||
|         # full reduction |         # full reduction | ||||||
|         x = torch.randn(5, device=device) |         x = torch.randn(5, device=device) | ||||||
|         xn = x.cpu().numpy() |         xn = x.cpu().numpy() | ||||||
|         for p in [0, 1, 2, 3, 4, float('inf')]: |         for p in [0, 1, 2, 3, 4, inf]: | ||||||
|             res = x.norm(p).item() |             res = x.norm(p).item() | ||||||
|             expected = np.linalg.norm(xn, p) |             expected = np.linalg.norm(xn, p) | ||||||
|             self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p)) |             self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p)) | ||||||
|         # one dimension |         # one dimension | ||||||
|         x = torch.randn(5, 5, device=device) |         x = torch.randn(5, 5, device=device) | ||||||
|         xn = x.cpu().numpy() |         xn = x.cpu().numpy() | ||||||
|         for p in [0, 1, 2, 3, 4, float('inf')]: |         for p in [0, 1, 2, 3, 4, inf]: | ||||||
|             res = x.norm(p, 1).cpu().numpy() |             res = x.norm(p, 1).cpu().numpy() | ||||||
|             expected = np.linalg.norm(xn, p, 1) |             expected = np.linalg.norm(xn, p, 1) | ||||||
|             self.assertEqual(res.shape, expected.shape) |             self.assertEqual(res.shape, expected.shape) | ||||||
| @ -808,10 +809,10 @@ class TestTorch(TestCase): | |||||||
|             ('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1), |             ('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1), | ||||||
|             ('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0), |             ('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0), | ||||||
|             ('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0), |             ('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0), | ||||||
|             ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), float('nan')), |             ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), nan), | ||||||
|             ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), float('nan')), |             ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), nan), | ||||||
|             ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), float('nan')), |             ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), nan), | ||||||
|             ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), float('-inf')), |             ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), -inf), | ||||||
|         ] |         ] | ||||||
|  |  | ||||||
|         devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] |         devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] | ||||||
| @ -878,8 +879,8 @@ class TestTorch(TestCase): | |||||||
|     def test_logsumexp(self): |     def test_logsumexp(self): | ||||||
|         from scipy.special import logsumexp |         from scipy.special import logsumexp | ||||||
|         a = torch.randn(5, 4) |         a = torch.randn(5, 4) | ||||||
|         a[0, 0] = float('inf') |         a[0, 0] = inf | ||||||
|         a[1, :] = float('-inf') |         a[1, :] = -inf | ||||||
|         actual = a.logsumexp(1) |         actual = a.logsumexp(1) | ||||||
|         expected = logsumexp(a.numpy(), 1) |         expected = logsumexp(a.numpy(), 1) | ||||||
|         self.assertEqual(expected.shape, actual.shape) |         self.assertEqual(expected.shape, actual.shape) | ||||||
| @ -1540,7 +1541,7 @@ class TestTorch(TestCase): | |||||||
|         self._test_cop(torch.mul, lambda x, y: x * y) |         self._test_cop(torch.mul, lambda x, y: x * y) | ||||||
|  |  | ||||||
|     def test_cpow(self): |     def test_cpow(self): | ||||||
|         self._test_cop(torch.pow, lambda x, y: float('nan') if x < 0 else math.pow(x, y)) |         self._test_cop(torch.pow, lambda x, y: nan if x < 0 else math.pow(x, y)) | ||||||
|  |  | ||||||
|     @unittest.skipIf(not TEST_NUMPY, 'Numpy not found') |     @unittest.skipIf(not TEST_NUMPY, 'Numpy not found') | ||||||
|     def test_einsum(self): |     def test_einsum(self): | ||||||
| @ -2416,7 +2417,7 @@ class TestTorch(TestCase): | |||||||
|         # full reduction |         # full reduction | ||||||
|         x = torch.randn(5, 5) |         x = torch.randn(5, 5) | ||||||
|         xn = x.numpy() |         xn = x.numpy() | ||||||
|         for p in [1, 2, 3, 4, float('inf')]: |         for p in [1, 2, 3, 4, inf]: | ||||||
|             res = x.renorm(p, 1, 1) |             res = x.renorm(p, 1, 1) | ||||||
|             expected = x / x.norm(p, 0, keepdim=True).clamp(min=1) |             expected = x / x.norm(p, 0, keepdim=True).clamp(min=1) | ||||||
|             self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p)) |             self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p)) | ||||||
| @ -2532,9 +2533,9 @@ class TestTorch(TestCase): | |||||||
|     def test_multinomial_invalid_probs(self): |     def test_multinomial_invalid_probs(self): | ||||||
|         test_method = TestTorch._test_multinomial_invalid_probs |         test_method = TestTorch._test_multinomial_invalid_probs | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, -1])) |         self._spawn_method(test_method, torch.Tensor([0, -1])) | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, float('inf')])) |         self._spawn_method(test_method, torch.Tensor([0, inf])) | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, float('-inf')])) |         self._spawn_method(test_method, torch.Tensor([0, -inf])) | ||||||
|         self._spawn_method(test_method, torch.Tensor([0, float('nan')])) |         self._spawn_method(test_method, torch.Tensor([0, nan])) | ||||||
|  |  | ||||||
|     @suppress_warnings |     @suppress_warnings | ||||||
|     def test_range(self): |     def test_range(self): | ||||||
| @ -4672,15 +4673,15 @@ class TestTorch(TestCase): | |||||||
|         self.assertEqual(x.nelement(), all.long().sum()) |         self.assertEqual(x.nelement(), all.long().sum()) | ||||||
|  |  | ||||||
|     def test_isfinite(self): |     def test_isfinite(self): | ||||||
|         x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan'), -10]) |         x = torch.Tensor([1, inf, 2, -inf, nan, -10]) | ||||||
|         self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1])) |         self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1])) | ||||||
|  |  | ||||||
|     def test_isinf(self): |     def test_isinf(self): | ||||||
|         x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan')]) |         x = torch.Tensor([1, inf, 2, -inf, nan]) | ||||||
|         self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0])) |         self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0])) | ||||||
|  |  | ||||||
|     def test_isnan(self): |     def test_isnan(self): | ||||||
|         x = torch.Tensor([1, float('nan'), 2]) |         x = torch.Tensor([1, nan, 2]) | ||||||
|         self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0])) |         self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0])) | ||||||
|  |  | ||||||
|     def test_RNGState(self): |     def test_RNGState(self): | ||||||
| @ -7418,7 +7419,7 @@ class TestTorch(TestCase): | |||||||
|         self.assertExpected(str(x), subname='negint') |         self.assertExpected(str(x), subname='negint') | ||||||
|  |  | ||||||
|         # test inf and nan |         # test inf and nan | ||||||
|         x = torch.tensor([4, float('inf'), 1.5, float('-inf'), 0, float('nan'), 1]) |         x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1]) | ||||||
|         self.assertEqual(x.__repr__(), str(x)) |         self.assertEqual(x.__repr__(), str(x)) | ||||||
|         self.assertExpected(str(x), subname='nonfinite') |         self.assertExpected(str(x), subname='nonfinite') | ||||||
|  |  | ||||||
|  | |||||||
| @ -413,6 +413,7 @@ class TestFFI(TestCase): | |||||||
|     @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package") |     @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package") | ||||||
|     @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows") |     @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows") | ||||||
|     def test_gpu(self): |     def test_gpu(self): | ||||||
|  |         from torch.utils.cpp_extension import CUDA_HOME | ||||||
|         create_extension( |         create_extension( | ||||||
|             name='gpulib', |             name='gpulib', | ||||||
|             headers=[test_dir + '/ffi/src/cuda/cudalib.h'], |             headers=[test_dir + '/ffi/src/cuda/cudalib.h'], | ||||||
| @ -421,6 +422,7 @@ class TestFFI(TestCase): | |||||||
|             ], |             ], | ||||||
|             with_cuda=True, |             with_cuda=True, | ||||||
|             verbose=False, |             verbose=False, | ||||||
|  |             include_dirs=[os.path.join(CUDA_HOME, 'include')], | ||||||
|         ).build() |         ).build() | ||||||
|         import gpulib |         import gpulib | ||||||
|         tensor = torch.ones(2, 2).float() |         tensor = torch.ones(2, 2).float() | ||||||
|  | |||||||
| @ -41,6 +41,9 @@ while [[ $# -gt 0 ]]; do | |||||||
|       --full-caffe2) |       --full-caffe2) | ||||||
|           FULL_CAFFE2=1 |           FULL_CAFFE2=1 | ||||||
|           ;; |           ;; | ||||||
|  |       --cuda-static-link) | ||||||
|  |           CAFFE2_STATIC_LINK_CUDA=1 | ||||||
|  |           ;; | ||||||
|       *) |       *) | ||||||
|           break |           break | ||||||
|           ;; |           ;; | ||||||
| @ -261,6 +264,7 @@ function build_caffe2() { | |||||||
|       -DBUILD_SHARED_LIBS=ON \ |       -DBUILD_SHARED_LIBS=ON \ | ||||||
|       -DONNX_NAMESPACE=$ONNX_NAMESPACE \ |       -DONNX_NAMESPACE=$ONNX_NAMESPACE \ | ||||||
|       -DUSE_CUDA=$USE_CUDA \ |       -DUSE_CUDA=$USE_CUDA \ | ||||||
|  |       -DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \ | ||||||
|       -DUSE_ROCM=$USE_ROCM \ |       -DUSE_ROCM=$USE_ROCM \ | ||||||
|       -DUSE_NNPACK=$USE_NNPACK \ |       -DUSE_NNPACK=$USE_NNPACK \ | ||||||
|       -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \ |       -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \ | ||||||
|  | |||||||
| @ -25,6 +25,13 @@ import sys | |||||||
| PY2 = sys.version_info[0] == 2 | PY2 = sys.version_info[0] == 2 | ||||||
| PY3 = sys.version_info[0] == 3 | PY3 = sys.version_info[0] == 3 | ||||||
|  |  | ||||||
|  | if PY2: | ||||||
|  |     inf = float('inf') | ||||||
|  |     nan = float('nan') | ||||||
|  | else: | ||||||
|  |     import math | ||||||
|  |     inf = math.inf | ||||||
|  |     nan = math.nan | ||||||
|  |  | ||||||
| if PY2: | if PY2: | ||||||
|     string_classes = basestring |     string_classes = basestring | ||||||
|  | |||||||
| @ -1743,8 +1743,8 @@ scatter_(dim, index, src) -> Tensor | |||||||
|  |  | ||||||
| Writes all values from the tensor :attr:`src` into :attr:`self` at the indices | Writes all values from the tensor :attr:`src` into :attr:`self` at the indices | ||||||
| specified in the :attr:`index` tensor. For each value in :attr:`src`, its output | specified in the :attr:`index` tensor. For each value in :attr:`src`, its output | ||||||
| index is specified by its index in :attr:`src` for dimension != :attr:`dim` and | index is specified by its index in :attr:`src` for ``dimension != dim`` and by | ||||||
| by the corresponding value in :attr:`index` for dimension = :attr:`dim`. | the corresponding value in :attr:`index` for ``dimension = dim``. | ||||||
|  |  | ||||||
| For a 3-D tensor, :attr:`self` is updated as:: | For a 3-D tensor, :attr:`self` is updated as:: | ||||||
|  |  | ||||||
| @ -1754,14 +1754,14 @@ For a 3-D tensor, :attr:`self` is updated as:: | |||||||
|  |  | ||||||
| This is the reverse operation of the manner described in :meth:`~Tensor.gather`. | This is the reverse operation of the manner described in :meth:`~Tensor.gather`. | ||||||
|  |  | ||||||
| :attr:`self`, :attr:`index` and :attr:`src` should have same number of | :attr:`self`, :attr:`index` and :attr:`src` (if it is a Tensor) should have same | ||||||
| dimensions. It is also required that `index.size(d) <= src.size(d)` for all | number of dimensions. It is also required that ``index.size(d) <= src.size(d)`` | ||||||
| dimensions `d`, and that `index.size(d) <= self.size(d)` for all dimensions | for all dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all | ||||||
| `d != dim`. | dimensions ``d != dim``. | ||||||
|  |  | ||||||
| Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be | Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be | ||||||
| between `0` and `(self.size(dim) -1)` inclusive, and all values in a row along | between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row | ||||||
| the specified dimension :attr:`dim` must be unique. | along the specified dimension :attr:`dim` must be unique. | ||||||
|  |  | ||||||
| Args: | Args: | ||||||
|     dim (int): the axis along which to index |     dim (int): the axis along which to index | ||||||
| @ -1785,6 +1785,50 @@ Example:: | |||||||
|             [ 0.0000,  0.0000,  0.0000,  1.2300]]) |             [ 0.0000,  0.0000,  0.0000,  1.2300]]) | ||||||
| """) | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('scatter_add_', | ||||||
|  |                r""" | ||||||
|  | scatter_add_(dim, index, other) -> Tensor | ||||||
|  |  | ||||||
|  | Adds all values from the tensor :attr:`other` into :attr:`self` at the indices | ||||||
|  | specified in the :attr:`index` tensor in a similar fashion as | ||||||
|  | :meth:`~torch.Tensor.scatter_`. For each value in :attr:`other`, it is added to | ||||||
|  | an index in :attr:`self` which is specified by its index in :attr:`other` | ||||||
|  | for ``dimension != dim`` and by the corresponding value in :attr:`index` for | ||||||
|  | ``dimension = dim``. | ||||||
|  |  | ||||||
|  | For a 3-D tensor, :attr:`self` is updated as:: | ||||||
|  |  | ||||||
|  |     self[index[i][j][k]][j][k] += other[i][j][k]  # if dim == 0 | ||||||
|  |     self[i][index[i][j][k]][k] += other[i][j][k]  # if dim == 1 | ||||||
|  |     self[i][j][index[i][j][k]] += other[i][j][k]  # if dim == 2 | ||||||
|  |  | ||||||
|  | :attr:`self`, :attr:`index` and :attr:`other` should have same number of | ||||||
|  | dimensions. It is also required that ``index.size(d) <= other.size(d)`` for all | ||||||
|  | dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all dimensions | ||||||
|  | ``d != dim``. | ||||||
|  |  | ||||||
|  | Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be | ||||||
|  | between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row along | ||||||
|  | the specified dimension :attr:`dim` must be unique. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     dim (int): the axis along which to index | ||||||
|  |     index (LongTensor): the indices of elements to scatter and add | ||||||
|  |     other (Tensor): the source elements to scatter and add | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> x = torch.rand(2, 5) | ||||||
|  |     >>> x | ||||||
|  |     tensor([[0.7404, 0.0427, 0.6480, 0.3806, 0.8328], | ||||||
|  |             [0.7953, 0.2009, 0.9154, 0.6782, 0.9620]]) | ||||||
|  |     >>> torch.ones(3, 5).scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x) | ||||||
|  |     tensor([[1.7404, 1.2009, 1.9154, 1.3806, 1.8328], | ||||||
|  |             [1.0000, 1.0427, 1.0000, 1.6782, 1.0000], | ||||||
|  |             [1.7953, 1.0000, 1.6480, 1.0000, 1.9620]]) | ||||||
|  |  | ||||||
|  | """) | ||||||
|  |  | ||||||
| add_docstr_all('select', | add_docstr_all('select', | ||||||
|                r""" |                r""" | ||||||
| select(dim, index) -> Tensor | select(dim, index) -> Tensor | ||||||
|  | |||||||
| @ -2,6 +2,7 @@ import math | |||||||
| import torch | import torch | ||||||
| from functools import reduce | from functools import reduce | ||||||
| from sys import float_info | from sys import float_info | ||||||
|  | from torch._six import inf, nan | ||||||
|  |  | ||||||
|  |  | ||||||
| class __PrinterOptions(object): | class __PrinterOptions(object): | ||||||
| @ -50,7 +51,7 @@ def set_printoptions( | |||||||
|             PRINT_OPTS.linewidth = 80 |             PRINT_OPTS.linewidth = 80 | ||||||
|         elif profile == "full": |         elif profile == "full": | ||||||
|             PRINT_OPTS.precision = 4 |             PRINT_OPTS.precision = 4 | ||||||
|             PRINT_OPTS.threshold = float('inf') |             PRINT_OPTS.threshold = inf | ||||||
|             PRINT_OPTS.edgeitems = 3 |             PRINT_OPTS.edgeitems = 3 | ||||||
|             PRINT_OPTS.linewidth = 80 |             PRINT_OPTS.linewidth = 80 | ||||||
|  |  | ||||||
| @ -101,8 +102,8 @@ class _Formatter(object): | |||||||
|  |  | ||||||
|             else: |             else: | ||||||
|                 copy_abs = copy.abs() |                 copy_abs = copy.abs() | ||||||
|                 pos_inf_mask = copy_abs.eq(float('inf')) |                 pos_inf_mask = copy_abs.eq(inf) | ||||||
|                 neg_inf_mask = copy_abs.eq(float('-inf')) |                 neg_inf_mask = copy_abs.eq(-inf) | ||||||
|                 nan_mask = copy_abs.ne(copy) |                 nan_mask = copy_abs.ne(copy) | ||||||
|                 invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask |                 invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask | ||||||
|                 if invalid_value_mask.all(): |                 if invalid_value_mask.all(): | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| import torch | import torch | ||||||
|  | from torch._six import nan | ||||||
| from torch.distributions import constraints | from torch.distributions import constraints | ||||||
| from torch.distributions.distribution import Distribution | from torch.distributions.distribution import Distribution | ||||||
| from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all | from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all | ||||||
| @ -72,11 +73,11 @@ class Categorical(Distribution): | |||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def mean(self): |     def mean(self): | ||||||
|         return self.probs.new_tensor(float('nan')).expand(self._extended_shape()) |         return self.probs.new_tensor(nan).expand(self._extended_shape()) | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def variance(self): |     def variance(self): | ||||||
|         return self.probs.new_tensor(float('nan')).expand(self._extended_shape()) |         return self.probs.new_tensor(nan).expand(self._extended_shape()) | ||||||
|  |  | ||||||
|     def sample(self, sample_shape=torch.Size()): |     def sample(self, sample_shape=torch.Size()): | ||||||
|         sample_shape = self._extended_shape(sample_shape) |         sample_shape = self._extended_shape(sample_shape) | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| import math | import math | ||||||
|  | from torch._six import inf, nan | ||||||
| from numbers import Number | from numbers import Number | ||||||
|  |  | ||||||
| import torch | import torch | ||||||
| @ -37,11 +38,11 @@ class Cauchy(Distribution): | |||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def mean(self): |     def mean(self): | ||||||
|         return self.loc.new_tensor(float('nan')).expand(self._extended_shape()) |         return self.loc.new_tensor(nan).expand(self._extended_shape()) | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def variance(self): |     def variance(self): | ||||||
|         return self.loc.new_tensor(float('inf')).expand(self._extended_shape()) |         return self.loc.new_tensor(inf).expand(self._extended_shape()) | ||||||
|  |  | ||||||
|     def rsample(self, sample_shape=torch.Size()): |     def rsample(self, sample_shape=torch.Size()): | ||||||
|         shape = self._extended_shape(sample_shape) |         shape = self._extended_shape(sample_shape) | ||||||
|  | |||||||
| @ -1,6 +1,7 @@ | |||||||
| from numbers import Number | from numbers import Number | ||||||
| import torch | import torch | ||||||
| import math | import math | ||||||
|  | from torch._six import nan | ||||||
| from torch.distributions import constraints | from torch.distributions import constraints | ||||||
| from torch.distributions.distribution import Distribution | from torch.distributions.distribution import Distribution | ||||||
| from torch.distributions.gamma import Gamma | from torch.distributions.gamma import Gamma | ||||||
| @ -39,13 +40,13 @@ class FisherSnedecor(Distribution): | |||||||
|     @property |     @property | ||||||
|     def mean(self): |     def mean(self): | ||||||
|         df2 = self.df2.clone() |         df2 = self.df2.clone() | ||||||
|         df2[df2 <= 2] = float('nan') |         df2[df2 <= 2] = nan | ||||||
|         return df2 / (df2 - 2) |         return df2 / (df2 - 2) | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def variance(self): |     def variance(self): | ||||||
|         df2 = self.df2.clone() |         df2 = self.df2.clone() | ||||||
|         df2[df2 <= 4] = float('nan') |         df2[df2 <= 4] = nan | ||||||
|         return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4)) |         return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4)) | ||||||
|  |  | ||||||
|     def rsample(self, sample_shape=torch.Size(())): |     def rsample(self, sample_shape=torch.Size(())): | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| import math | import math | ||||||
|  |  | ||||||
|  | from torch._six import inf | ||||||
| from torch.distributions import constraints | from torch.distributions import constraints | ||||||
| from torch.distributions.transforms import AbsTransform | from torch.distributions.transforms import AbsTransform | ||||||
| from torch.distributions.cauchy import Cauchy | from torch.distributions.cauchy import Cauchy | ||||||
| @ -44,7 +45,7 @@ class HalfCauchy(TransformedDistribution): | |||||||
|  |  | ||||||
|     def log_prob(self, value): |     def log_prob(self, value): | ||||||
|         log_prob = self.base_dist.log_prob(value) + math.log(2) |         log_prob = self.base_dist.log_prob(value) + math.log(2) | ||||||
|         log_prob[value.expand(log_prob.shape) < 0] = -float('inf') |         log_prob[value.expand(log_prob.shape) < 0] = -inf | ||||||
|         return log_prob |         return log_prob | ||||||
|  |  | ||||||
|     def cdf(self, value): |     def cdf(self, value): | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| import math | import math | ||||||
|  |  | ||||||
|  | from torch._six import inf | ||||||
| from torch.distributions import constraints | from torch.distributions import constraints | ||||||
| from torch.distributions.transforms import AbsTransform | from torch.distributions.transforms import AbsTransform | ||||||
| from torch.distributions.normal import Normal | from torch.distributions.normal import Normal | ||||||
| @ -44,7 +45,7 @@ class HalfNormal(TransformedDistribution): | |||||||
|  |  | ||||||
|     def log_prob(self, value): |     def log_prob(self, value): | ||||||
|         log_prob = self.base_dist.log_prob(value) + math.log(2) |         log_prob = self.base_dist.log_prob(value) + math.log(2) | ||||||
|         log_prob[value.expand(log_prob.shape) < 0] = -float('inf') |         log_prob[value.expand(log_prob.shape) < 0] = -inf | ||||||
|         return log_prob |         return log_prob | ||||||
|  |  | ||||||
|     def cdf(self, value): |     def cdf(self, value): | ||||||
|  | |||||||
| @ -3,6 +3,7 @@ import warnings | |||||||
| from functools import total_ordering | from functools import total_ordering | ||||||
|  |  | ||||||
| import torch | import torch | ||||||
|  | from torch._six import inf | ||||||
|  |  | ||||||
| from .bernoulli import Bernoulli | from .bernoulli import Bernoulli | ||||||
| from .beta import Beta | from .beta import Beta | ||||||
| @ -113,7 +114,7 @@ def _infinite_like(tensor): | |||||||
|     """ |     """ | ||||||
|     Helper function for obtaining infinite KL Divergence throughout |     Helper function for obtaining infinite KL Divergence throughout | ||||||
|     """ |     """ | ||||||
|     return tensor.new_tensor(float('inf')).expand_as(tensor) |     return tensor.new_tensor(inf).expand_as(tensor) | ||||||
|  |  | ||||||
|  |  | ||||||
| def _x_log_x(tensor): | def _x_log_x(tensor): | ||||||
| @ -173,10 +174,10 @@ _euler_gamma = 0.57721566490153286060 | |||||||
| @register_kl(Bernoulli, Bernoulli) | @register_kl(Bernoulli, Bernoulli) | ||||||
| def _kl_bernoulli_bernoulli(p, q): | def _kl_bernoulli_bernoulli(p, q): | ||||||
|     t1 = p.probs * (p.probs / q.probs).log() |     t1 = p.probs * (p.probs / q.probs).log() | ||||||
|     t1[q.probs == 0] = float('inf') |     t1[q.probs == 0] = inf | ||||||
|     t1[p.probs == 0] = 0 |     t1[p.probs == 0] = 0 | ||||||
|     t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log() |     t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log() | ||||||
|     t2[q.probs == 1] = float('inf') |     t2[q.probs == 1] = inf | ||||||
|     t2[p.probs == 1] = 0 |     t2[p.probs == 1] = 0 | ||||||
|     return t1 + t2 |     return t1 + t2 | ||||||
|  |  | ||||||
| @ -208,7 +209,7 @@ def _kl_binomial_binomial(p, q): | |||||||
| @register_kl(Categorical, Categorical) | @register_kl(Categorical, Categorical) | ||||||
| def _kl_categorical_categorical(p, q): | def _kl_categorical_categorical(p, q): | ||||||
|     t = p.probs * (p.logits - q.logits) |     t = p.probs * (p.logits - q.logits) | ||||||
|     t[q.probs == 0] = float('inf') |     t[q.probs == 0] = inf | ||||||
|     t[p.probs == 0] = 0 |     t[p.probs == 0] = 0 | ||||||
|     return t.sum(-1) |     return t.sum(-1) | ||||||
|  |  | ||||||
| @ -322,7 +323,7 @@ def _kl_pareto_pareto(p, q): | |||||||
|     t1 = q.alpha * scale_ratio.log() |     t1 = q.alpha * scale_ratio.log() | ||||||
|     t2 = -alpha_ratio.log() |     t2 = -alpha_ratio.log() | ||||||
|     result = t1 + t2 + alpha_ratio - 1 |     result = t1 + t2 + alpha_ratio - 1 | ||||||
|     result[p.support.lower_bound < q.support.lower_bound] = float('inf') |     result[p.support.lower_bound < q.support.lower_bound] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -346,7 +347,7 @@ def _kl_transformed_transformed(p, q): | |||||||
| @register_kl(Uniform, Uniform) | @register_kl(Uniform, Uniform) | ||||||
| def _kl_uniform_uniform(p, q): | def _kl_uniform_uniform(p, q): | ||||||
|     result = ((q.high - q.low) / (p.high - p.low)).log() |     result = ((q.high - q.low) / (p.high - p.low)).log() | ||||||
|     result[(q.low > p.low) | (q.high < p.high)] = float('inf') |     result[(q.low > p.low) | (q.high < p.high)] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -392,7 +393,7 @@ def _kl_beta_normal(p, q): | |||||||
| @register_kl(Beta, Uniform) | @register_kl(Beta, Uniform) | ||||||
| def _kl_beta_uniform(p, q): | def _kl_beta_uniform(p, q): | ||||||
|     result = -p.entropy() + (q.high - q.low).log() |     result = -p.entropy() + (q.high - q.low).log() | ||||||
|     result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = float('inf') |     result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -543,7 +544,7 @@ def _kl_pareto_exponential(p, q): | |||||||
|     t2 = p.alpha.reciprocal() |     t2 = p.alpha.reciprocal() | ||||||
|     t3 = p.alpha * scale_rate_prod / (p.alpha - 1) |     t3 = p.alpha * scale_rate_prod / (p.alpha - 1) | ||||||
|     result = t1 - t2 + t3 - 1 |     result = t1 - t2 + t3 - 1 | ||||||
|     result[p.alpha <= 1] = float('inf') |     result[p.alpha <= 1] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -555,7 +556,7 @@ def _kl_pareto_gamma(p, q): | |||||||
|     t3 = (1 - q.concentration) * common_term |     t3 = (1 - q.concentration) * common_term | ||||||
|     t4 = q.rate * p.alpha * p.scale / (p.alpha - 1) |     t4 = q.rate * p.alpha * p.scale / (p.alpha - 1) | ||||||
|     result = t1 + t2 + t3 + t4 - 1 |     result = t1 + t2 + t3 + t4 - 1 | ||||||
|     result[p.alpha <= 1] = float('inf') |     result[p.alpha <= 1] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
| # TODO: Add Pareto-Laplace KL Divergence | # TODO: Add Pareto-Laplace KL Divergence | ||||||
| @ -570,7 +571,7 @@ def _kl_pareto_normal(p, q): | |||||||
|     t3 = p.alpha * common_term.pow(2) / (p.alpha - 2) |     t3 = p.alpha * common_term.pow(2) / (p.alpha - 2) | ||||||
|     t4 = (p.alpha * common_term - q.loc).pow(2) |     t4 = (p.alpha * common_term - q.loc).pow(2) | ||||||
|     result = t1 - t2 + (t3 + t4) / var_normal - 1 |     result = t1 - t2 + (t3 + t4) / var_normal - 1 | ||||||
|     result[p.alpha <= 2] = float('inf') |     result[p.alpha <= 2] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -588,14 +589,14 @@ def _kl_uniform_beta(p, q): | |||||||
|     t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term |     t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term | ||||||
|     t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma() |     t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma() | ||||||
|     result = t3 + t4 - t1 - t2 |     result = t3 + t4 - t1 - t2 | ||||||
|     result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = float('inf') |     result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @register_kl(Uniform, Exponential) | @register_kl(Uniform, Exponential) | ||||||
| def _kl_uniform_exponetial(p, q): | def _kl_uniform_exponetial(p, q): | ||||||
|     result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log() |     result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log() | ||||||
|     result[p.low < q.support.lower_bound] = float('inf') |     result[p.low < q.support.lower_bound] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -607,7 +608,7 @@ def _kl_uniform_gamma(p, q): | |||||||
|     t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term |     t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term | ||||||
|     t4 = q.rate * (p.high + p.low) / 2 |     t4 = q.rate * (p.high + p.low) / 2 | ||||||
|     result = -t1 + t2 + t3 + t4 |     result = -t1 + t2 + t3 + t4 | ||||||
|     result[p.low < q.support.lower_bound] = float('inf') |     result[p.low < q.support.lower_bound] = inf | ||||||
|     return result |     return result | ||||||
|  |  | ||||||
|  |  | ||||||
| @ -638,5 +639,5 @@ def _kl_uniform_pareto(p, q): | |||||||
|     t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log() |     t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log() | ||||||
|     t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform |     t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform | ||||||
|     result = t2 * (q.alpha + 1) - t1 |     result = t2 * (q.alpha + 1) - t1 | ||||||
|     result[p.low < q.support.lower_bound] = float('inf') |     result[p.low < q.support.lower_bound] = inf | ||||||
|     return result |     return result | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| import torch | import torch | ||||||
|  | from torch._six import inf | ||||||
| from torch.distributions.distribution import Distribution | from torch.distributions.distribution import Distribution | ||||||
| from torch.distributions import Categorical | from torch.distributions import Categorical | ||||||
| from numbers import Number | from numbers import Number | ||||||
| @ -93,6 +94,6 @@ class Multinomial(Distribution): | |||||||
|         logits, value = broadcast_all(self.logits.clone(), value) |         logits, value = broadcast_all(self.logits.clone(), value) | ||||||
|         log_factorial_n = torch.lgamma(value.sum(-1) + 1) |         log_factorial_n = torch.lgamma(value.sum(-1) + 1) | ||||||
|         log_factorial_xs = torch.lgamma(value + 1).sum(-1) |         log_factorial_xs = torch.lgamma(value + 1).sum(-1) | ||||||
|         logits[(value == 0) & (logits == -float('inf'))] = 0 |         logits[(value == 0) & (logits == -inf)] = 0 | ||||||
|         log_powers = (logits * value).sum(-1) |         log_powers = (logits * value).sum(-1) | ||||||
|         return log_factorial_n - log_factorial_xs + log_powers |         return log_factorial_n - log_factorial_xs + log_powers | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| from numbers import Number | from numbers import Number | ||||||
| import torch | import torch | ||||||
|  | from torch._six import inf, nan | ||||||
| import math | import math | ||||||
| from torch.distributions import constraints | from torch.distributions import constraints | ||||||
| from torch.distributions.distribution import Distribution | from torch.distributions.distribution import Distribution | ||||||
| @ -27,15 +28,15 @@ class StudentT(Distribution): | |||||||
|     @property |     @property | ||||||
|     def mean(self): |     def mean(self): | ||||||
|         m = self.loc.clone() |         m = self.loc.clone() | ||||||
|         m[self.df <= 1] = float('nan') |         m[self.df <= 1] = nan | ||||||
|         return m |         return m | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def variance(self): |     def variance(self): | ||||||
|         m = self.df.clone() |         m = self.df.clone() | ||||||
|         m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2) |         m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2) | ||||||
|         m[(self.df <= 2) & (self.df > 1)] = float('inf') |         m[(self.df <= 2) & (self.df > 1)] = inf | ||||||
|         m[self.df <= 1] = float('nan') |         m[self.df <= 1] = nan | ||||||
|         return m |         return m | ||||||
|  |  | ||||||
|     def __init__(self, df, loc=0., scale=1., validate_args=None): |     def __init__(self, df, loc=0., scale=1., validate_args=None): | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| import torch | import torch | ||||||
| import torch.nn.functional as F | import torch.nn.functional as F | ||||||
|  | from torch._six import inf | ||||||
| from operator import mul | from operator import mul | ||||||
| from functools import reduce | from functools import reduce | ||||||
| import math | import math | ||||||
| @ -155,7 +156,7 @@ def isfinite(tensor): | |||||||
|     """ |     """ | ||||||
|     if not isinstance(tensor, torch.Tensor): |     if not isinstance(tensor, torch.Tensor): | ||||||
|         raise ValueError("The argument is not a tensor", str(tensor)) |         raise ValueError("The argument is not a tensor", str(tensor)) | ||||||
|     return (tensor == tensor) & (tensor.abs() != float('inf')) |     return (tensor == tensor) & (tensor.abs() != inf) | ||||||
|  |  | ||||||
|  |  | ||||||
| def isinf(tensor): | def isinf(tensor): | ||||||
| @ -174,7 +175,7 @@ def isinf(tensor): | |||||||
|     """ |     """ | ||||||
|     if not isinstance(tensor, torch.Tensor): |     if not isinstance(tensor, torch.Tensor): | ||||||
|         raise ValueError("The argument is not a tensor", str(tensor)) |         raise ValueError("The argument is not a tensor", str(tensor)) | ||||||
|     return tensor.abs() == float('inf') |     return tensor.abs() == inf | ||||||
|  |  | ||||||
|  |  | ||||||
| def stft(input, n_fft, hop_length=None, win_length=None, window=None, | def stft(input, n_fft, hop_length=None, win_length=None, window=None, | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| import torch | import torch | ||||||
|  | from torch._six import inf | ||||||
| from .Module import Module | from .Module import Module | ||||||
| from .utils import clear | from .utils import clear | ||||||
|  |  | ||||||
| @ -34,7 +35,7 @@ class Normalize(Module): | |||||||
|         self._output.resize_as_(input) |         self._output.resize_as_(input) | ||||||
|  |  | ||||||
|         # specialization for the infinity norm |         # specialization for the infinity norm | ||||||
|         if self.p == float('inf'): |         if self.p == inf: | ||||||
|             if not self._indices: |             if not self._indices: | ||||||
|                 self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \ |                 self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \ | ||||||
|                     else torch.LongTensor() |                     else torch.LongTensor() | ||||||
| @ -72,7 +73,7 @@ class Normalize(Module): | |||||||
|             self.cross = input.new() |             self.cross = input.new() | ||||||
|         # compute diagonal term with gradOutput |         # compute diagonal term with gradOutput | ||||||
|         self._gradInput.resize_(n, d) |         self._gradInput.resize_(n, d) | ||||||
|         if self.p == float('inf'): |         if self.p == inf: | ||||||
|                 # specialization for the inf case |                 # specialization for the inf case | ||||||
|             torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput) |             torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput) | ||||||
|             self.buffer.resize_as_(input).zero_() |             self.buffer.resize_as_(input).zero_() | ||||||
| @ -113,7 +114,7 @@ class Normalize(Module): | |||||||
|         self._gradInput.add_(-1, self.buffer) |         self._gradInput.add_(-1, self.buffer) | ||||||
|  |  | ||||||
|         # reuse cross buffer for normalization |         # reuse cross buffer for normalization | ||||||
|         if self.p == float('inf'): |         if self.p == inf: | ||||||
|             torch.mul(self.norm, self.norm, out=self.cross) |             torch.mul(self.norm, self.norm, out=self.cross) | ||||||
|         else: |         else: | ||||||
|             torch.mul(self.normp, self.norm, out=self.cross) |             torch.mul(self.normp, self.norm, out=self.cross) | ||||||
|  | |||||||
| @ -1,10 +1,11 @@ | |||||||
| import math | import math | ||||||
|  |  | ||||||
| INFINITY = float('inf') | INFINITY = float('inf') | ||||||
|  | NAN = float('nan') | ||||||
|  |  | ||||||
|  |  | ||||||
| def sqrt_nothrow(x): | def sqrt_nothrow(x): | ||||||
|     return math.sqrt(x) if x >= 0 else float('nan') |     return math.sqrt(x) if x >= 0 else NAN | ||||||
|  |  | ||||||
|  |  | ||||||
| def cg(opfunc, x, config, state=None): | def cg(opfunc, x, config, state=None): | ||||||
| @ -145,7 +146,7 @@ def cg(opfunc, x, config, state=None): | |||||||
|             A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) |             A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) | ||||||
|             B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2) |             B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2) | ||||||
|             _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3)) |             _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3)) | ||||||
|             z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan') |             z2 = -d2 * z3 * z3 / _denom if _denom != 0 else NAN | ||||||
|  |  | ||||||
|             if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0: |             if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0: | ||||||
|                 if limit < -0.5: |                 if limit < -0.5: | ||||||
|  | |||||||
| @ -523,7 +523,7 @@ class BCEWithLogitsLoss(_Loss): | |||||||
|     :math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision. |     :math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision. | ||||||
|  |  | ||||||
|     For example, if a dataset contains 100 positive and 300 negative examples of a single class, |     For example, if a dataset contains 100 positive and 300 negative examples of a single class, | ||||||
|     then `pos_weight` for the class should be equal to math:`\frac{300}{100}=3`. |     then `pos_weight` for the class should be equal to :math:`\frac{300}{100}=3`. | ||||||
|     The loss would act as if the dataset contains math:`3\times 100=300` positive examples. |     The loss would act as if the dataset contains math:`3\times 100=300` positive examples. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|  | |||||||
| @ -691,7 +691,7 @@ class _LPPoolNd(Module): | |||||||
|         self.ceil_mode = ceil_mode |         self.ceil_mode = ceil_mode | ||||||
|  |  | ||||||
|     def extra_repr(self): |     def extra_repr(self): | ||||||
|         return 'norm_type={norm_type}, kernel_size{kernel_size}, stride={stride}, ' \ |         return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \ | ||||||
|             'ceil_mode={ceil_mode}'.format(**self.__dict__) |             'ceil_mode={ceil_mode}'.format(**self.__dict__) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | |||||||
| @ -1,5 +1,6 @@ | |||||||
| import warnings | import warnings | ||||||
| import torch | import torch | ||||||
|  | from torch._six import inf | ||||||
|  |  | ||||||
|  |  | ||||||
| def clip_grad_norm_(parameters, max_norm, norm_type=2): | def clip_grad_norm_(parameters, max_norm, norm_type=2): | ||||||
| @ -23,7 +24,7 @@ def clip_grad_norm_(parameters, max_norm, norm_type=2): | |||||||
|     parameters = list(filter(lambda p: p.grad is not None, parameters)) |     parameters = list(filter(lambda p: p.grad is not None, parameters)) | ||||||
|     max_norm = float(max_norm) |     max_norm = float(max_norm) | ||||||
|     norm_type = float(norm_type) |     norm_type = float(norm_type) | ||||||
|     if norm_type == float('inf'): |     if norm_type == inf: | ||||||
|         total_norm = max(p.grad.data.abs().max() for p in parameters) |         total_norm = max(p.grad.data.abs().max() for p in parameters) | ||||||
|     else: |     else: | ||||||
|         total_norm = 0 |         total_norm = 0 | ||||||
|  | |||||||
| @ -1,4 +1,6 @@ | |||||||
| import math | import math | ||||||
|  | import torch | ||||||
|  | from torch._six import inf | ||||||
| from bisect import bisect_right | from bisect import bisect_right | ||||||
| from functools import partial | from functools import partial | ||||||
| from .optimizer import Optimizer | from .optimizer import Optimizer | ||||||
| @ -367,9 +369,9 @@ class ReduceLROnPlateau(object): | |||||||
|             raise ValueError('threshold mode ' + threshold_mode + ' is unknown!') |             raise ValueError('threshold mode ' + threshold_mode + ' is unknown!') | ||||||
|  |  | ||||||
|         if mode == 'min': |         if mode == 'min': | ||||||
|             self.mode_worse = float('inf') |             self.mode_worse = inf | ||||||
|         else:  # mode == 'max': |         else:  # mode == 'max': | ||||||
|             self.mode_worse = (-float('inf')) |             self.mode_worse = -inf | ||||||
|  |  | ||||||
|         self.is_better = partial(self._cmp, mode, threshold_mode, threshold) |         self.is_better = partial(self._cmp, mode, threshold_mode, threshold) | ||||||
|  |  | ||||||
|  | |||||||
| @ -65,6 +65,10 @@ CUDA_HOME = _find_cuda_home() | |||||||
| BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+') | BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+') | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def is_binary_build(): | ||||||
|  |     return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__) | ||||||
|  |  | ||||||
|  |  | ||||||
| def check_compiler_abi_compatibility(compiler): | def check_compiler_abi_compatibility(compiler): | ||||||
|     ''' |     ''' | ||||||
|     Verifies that the given compiler is ABI-compatible with PyTorch. |     Verifies that the given compiler is ABI-compatible with PyTorch. | ||||||
| @ -77,7 +81,7 @@ def check_compiler_abi_compatibility(compiler): | |||||||
|         False if the compiler is (likely) ABI-incompatible with PyTorch, |         False if the compiler is (likely) ABI-incompatible with PyTorch, | ||||||
|         else True. |         else True. | ||||||
|     ''' |     ''' | ||||||
|     if BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__): |     if not is_binary_build(): | ||||||
|         return True |         return True | ||||||
|     try: |     try: | ||||||
|         check_cmd = '{}' if sys.platform == 'win32' else '{} --version' |         check_cmd = '{}' if sys.platform == 'win32' else '{} --version' | ||||||
| @ -134,6 +138,7 @@ class BuildExtension(build_ext): | |||||||
|         self._check_abi() |         self._check_abi() | ||||||
|         for extension in self.extensions: |         for extension in self.extensions: | ||||||
|             self._define_torch_extension_name(extension) |             self._define_torch_extension_name(extension) | ||||||
|  |             self._add_gnu_abi_flag_if_binary(extension) | ||||||
|  |  | ||||||
|         # Register .cu and .cuh as valid source extensions. |         # Register .cu and .cuh as valid source extensions. | ||||||
|         self.compiler.src_extensions += ['.cu', '.cuh'] |         self.compiler.src_extensions += ['.cu', '.cuh'] | ||||||
| @ -266,6 +271,21 @@ class BuildExtension(build_ext): | |||||||
|         else: |         else: | ||||||
|             extension.extra_compile_args.append(define) |             extension.extra_compile_args.append(define) | ||||||
|  |  | ||||||
|  |     def _add_gnu_abi_flag_if_binary(self, extension): | ||||||
|  |         # If the version string looks like a binary build, | ||||||
|  |         # we know that PyTorch was compiled with gcc 4.9.2. | ||||||
|  |         # if the extension is compiled with gcc >= 5.1, | ||||||
|  |         # then we have to define _GLIBCXX_USE_CXX11_ABI=0 | ||||||
|  |         # so that the std::string in the API is resolved to | ||||||
|  |         # non-C++11 symbols | ||||||
|  |         define = '-D_GLIBCXX_USE_CXX11_ABI=0' | ||||||
|  |         if is_binary_build(): | ||||||
|  |             if isinstance(extension.extra_compile_args, dict): | ||||||
|  |                 for args in extension.extra_compile_args.values(): | ||||||
|  |                     args.append(define) | ||||||
|  |             else: | ||||||
|  |                 extension.extra_compile_args.append(define) | ||||||
|  |  | ||||||
|  |  | ||||||
| def CppExtension(name, sources, *args, **kwargs): | def CppExtension(name, sources, *args, **kwargs): | ||||||
|     ''' |     ''' | ||||||
| @ -785,6 +805,9 @@ def _write_ninja_file(path, | |||||||
|     common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] |     common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] | ||||||
|     common_cflags += ['-I{}'.format(include) for include in includes] |     common_cflags += ['-I{}'.format(include) for include in includes] | ||||||
|  |  | ||||||
|  |     if is_binary_build(): | ||||||
|  |         common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0'] | ||||||
|  |  | ||||||
|     cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags |     cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags | ||||||
|     if sys.platform == 'win32': |     if sys.platform == 'win32': | ||||||
|         from distutils.spawn import _nt_quote_args |         from distutils.spawn import _nt_quote_args | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	