mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-27 00:54:52 +08:00 
			
		
		
		
	Compare commits
	
		
			17 Commits
		
	
	
		
			per_channe
			...
			v0.4.1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a24163a95e | |||
| f08f222db3 | |||
| 8f916179f8 | |||
| 7b7e6dbfa7 | |||
| 84b8c1c357 | |||
| b595c3e9ca | |||
| 6ecc275272 | |||
| f34528a723 | |||
| 2edf053549 | |||
| 76c16a5a64 | |||
| f6fac92692 | |||
| bb60c97805 | |||
| 886a367247 | |||
| 416c8ef1d1 | |||
| 2fbbe42a30 | |||
| f07e550b08 | |||
| 3684cc4e52 | 
| @ -151,10 +151,6 @@ endif() | ||||
| # ---[ CMake scripts + modules | ||||
| list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules) | ||||
|  | ||||
| if (MSVC AND ${BUILD_SHARED_LIBS}) | ||||
|   set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON) | ||||
| endif() | ||||
|  | ||||
| # ---[ CMake build directories | ||||
| set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | ||||
| set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib) | ||||
|  | ||||
| @ -13,7 +13,7 @@ else() | ||||
|   cmake_dependent_option( | ||||
|       USE_CUDNN "Use cuDNN" ON | ||||
|       "USE_CUDA" OFF) | ||||
|   option(ATEN_NO_TEST "Do not build ATen test binaries" OFF) | ||||
|   option(ATEN_NO_TEST "Do not build ATen test binaries" ON) | ||||
|  | ||||
|   # Flag for shared dependencies | ||||
|   set(BUILD_ATEN ON) | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| #include <ATen/optional.h> | ||||
| #include <ATen/Backtrace.h> | ||||
|  | ||||
| #include <functional> | ||||
| #include <memory> | ||||
|  | ||||
| @ -4,9 +4,11 @@ | ||||
| #include <string> | ||||
| #include <typeinfo> | ||||
|  | ||||
| #include <ATen/ATenGeneral.h> | ||||
|  | ||||
| namespace at { | ||||
| /// Utility to demangle a C++ symbol name. | ||||
| std::string demangle(const char* name); | ||||
| AT_API std::string demangle(const char* name); | ||||
|  | ||||
| /// Returns the printable name of the type. | ||||
| template <typename T> | ||||
| @ -19,7 +21,7 @@ inline const char* demangle_type() { | ||||
| #endif // __GXX_RTTI | ||||
| } | ||||
|  | ||||
| std::string get_backtrace( | ||||
| AT_API std::string get_backtrace( | ||||
|     size_t frames_to_skip = 0, | ||||
|     size_t maximum_number_of_frames = 64, | ||||
|     bool skip_python_frames = true); | ||||
|  | ||||
| @ -250,6 +250,7 @@ IF(USE_CUDA AND NOT USE_ROCM) | ||||
|   ENDIF(USE_MAGMA) | ||||
|   IF ($ENV{ATEN_STATIC_CUDA}) | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a") | ||||
|     list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a") | ||||
|   ENDIF($ENV{ATEN_STATIC_CUDA}) | ||||
| ENDIF() | ||||
|  | ||||
| @ -405,11 +406,11 @@ ENDFOREACH() | ||||
| INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml | ||||
|   DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen) | ||||
|  | ||||
| if(ATEN_NO_TEST) | ||||
|   message("disable test because ATEN_NO_TEST is set") | ||||
| else() | ||||
|   add_subdirectory(test) | ||||
| endif() | ||||
| # if(ATEN_NO_TEST) | ||||
| #   message("disable test because ATEN_NO_TEST is set") | ||||
| # else() | ||||
| #   add_subdirectory(test) | ||||
| # endif() | ||||
|  | ||||
| if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) | ||||
|   foreach(test_src ${ATen_CPU_TEST_SRCS}) | ||||
|  | ||||
| @ -3,6 +3,8 @@ | ||||
| #include <cstdint> | ||||
| #include <utility> | ||||
|  | ||||
| #include <ATen/ATenGeneral.h> | ||||
|  | ||||
| /* | ||||
| * A CUDA stream interface with no CUDA build dependency. | ||||
| *  | ||||
| @ -25,27 +27,27 @@ namespace detail { | ||||
|  | ||||
| // Pointer-based API (for internal use) | ||||
| // Note: ATen/Context is preferred to work with streams safely | ||||
| CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device); | ||||
| CUDAStreamInternals* CUDAStream_getDefaultStream(); | ||||
| AT_API CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device); | ||||
| AT_API CUDAStreamInternals* CUDAStream_getDefaultStream(); | ||||
|  | ||||
| CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority); | ||||
| AT_API CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority); | ||||
|  | ||||
| CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device); | ||||
| CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream(); | ||||
| AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device); | ||||
| AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream(); | ||||
|  | ||||
| // Note: these Unsafe gets should NEVER be used and are only here for legacy | ||||
| // purposes. Once those uses are gone they should be removed. | ||||
| CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device); | ||||
| CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe(); | ||||
| AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device); | ||||
| AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe(); | ||||
|  | ||||
| void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals); | ||||
| void CUDAStream_setStream(CUDAStreamInternals* internals); | ||||
| AT_API void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals); | ||||
| AT_API void CUDAStream_setStream(CUDAStreamInternals* internals); | ||||
|  | ||||
| cudaStream_t CUDAStream_stream(CUDAStreamInternals*); | ||||
| int64_t CUDAStream_device(CUDAStreamInternals*); | ||||
| AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*); | ||||
| AT_API int64_t CUDAStream_device(CUDAStreamInternals*); | ||||
|  | ||||
| bool CUDAStream_retain(CUDAStreamInternals*); | ||||
| void CUDAStream_free(CUDAStreamInternals*&); | ||||
| AT_API bool CUDAStream_retain(CUDAStreamInternals*); | ||||
| AT_API void CUDAStream_free(CUDAStreamInternals*&); | ||||
|  | ||||
| } // namespace detail | ||||
|  | ||||
| @ -64,10 +66,10 @@ struct CUDAStream { | ||||
|   ~CUDAStream() { detail::CUDAStream_free(internals_); } | ||||
|  | ||||
|   // Copy constructor | ||||
|   CUDAStream(const CUDAStream& other); | ||||
|   AT_API CUDAStream(const CUDAStream& other); | ||||
|  | ||||
|   // Move constructor | ||||
|   CUDAStream(CUDAStream&& other);   | ||||
|   AT_API CUDAStream(CUDAStream&& other); | ||||
|  | ||||
|   // Assignment operator | ||||
|   CUDAStream& operator=(CUDAStream other) { | ||||
|  | ||||
| @ -111,8 +111,8 @@ struct Device { | ||||
| }; | ||||
| } // namespace at | ||||
|  | ||||
| std::ostream& operator<<(std::ostream& stream, at::Device::Type type); | ||||
| std::ostream& operator<<(std::ostream& stream, const at::Device& device); | ||||
| AT_API std::ostream& operator<<(std::ostream& stream, at::Device::Type type); | ||||
| AT_API std::ostream& operator<<(std::ostream& stream, const at::Device& device); | ||||
|  | ||||
| namespace std { | ||||
|   template<> struct hash<at::Device> | ||||
|  | ||||
| @ -43,7 +43,7 @@ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \ | ||||
|       AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \ | ||||
|       default:                                                                \ | ||||
|         AT_ERROR("%s not implemented for '%s'", (NAME), the_type.toString()); \ | ||||
|         AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \ | ||||
|     }                                                                         \ | ||||
|   }() | ||||
|  | ||||
|  | ||||
| @ -35,8 +35,8 @@ namespace at { | ||||
|  | ||||
| namespace detail { | ||||
|  | ||||
| float halfbits2float(unsigned short bits); | ||||
| unsigned short float2halfbits(float value); | ||||
| AT_API float halfbits2float(unsigned short bits); | ||||
| AT_API unsigned short float2halfbits(float value); | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -33,6 +33,8 @@ | ||||
| #include <type_traits> | ||||
| #include <utility> | ||||
|  | ||||
| #include <ATen/ATenGeneral.h> | ||||
|  | ||||
| #if __GNUG__ && __GNUC__ < 5 | ||||
| #define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T) | ||||
| #else | ||||
| @ -57,7 +59,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) { | ||||
| } | ||||
|  | ||||
| /// This is all the non-templated stuff common to all SmallVectors. | ||||
| class SmallVectorBase { | ||||
| class AT_API SmallVectorBase { | ||||
| protected: | ||||
|   void *BeginX, *EndX, *CapacityX; | ||||
|  | ||||
|  | ||||
| @ -5,7 +5,7 @@ | ||||
| #include "ATen/Error.h" | ||||
|  | ||||
| namespace at { | ||||
| struct SparseTensorImpl : public TensorImpl { | ||||
| struct AT_API SparseTensorImpl : public TensorImpl { | ||||
|   // Stored in COO format, indices + values. | ||||
|  | ||||
|   // Ideal INVARIANTS: | ||||
|  | ||||
| @ -19,7 +19,7 @@ namespace at { | ||||
| /// `torch::TensorOptions` subclass of this `TensorOptions`, which changes | ||||
| /// `type()` to return a variable type instead of a tensor type, such that | ||||
| /// variables are created inside factory methods, instead of tensors. | ||||
| struct TensorOptions { | ||||
| struct AT_API TensorOptions { | ||||
|   TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {} | ||||
|  | ||||
|   /// Constructs the `TensorOptions` with defaults taken from the thread local | ||||
|  | ||||
| @ -143,7 +143,7 @@ static inline ${return_type} ${api_name}(${formals}) { | ||||
| """) | ||||
| # add a native declaration for a native function | ||||
| NATIVE_DECLARATION = CodeTemplate("""\ | ||||
| ${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); | ||||
| AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults}); | ||||
| """) | ||||
|  | ||||
| # special method definition for factory functions in Functions.h | ||||
|  | ||||
| @ -35,11 +35,14 @@ | ||||
| #ifdef _WIN32 | ||||
| # if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS) | ||||
| #  define TH_API TH_EXTERNC __declspec(dllexport) | ||||
| #  define TH_CPP_API extern __declspec(dllexport) | ||||
| # else | ||||
| #  define TH_API TH_EXTERNC __declspec(dllimport) | ||||
| #  define TH_CPP_API extern __declspec(dllimport) | ||||
| # endif | ||||
| #else | ||||
| # define TH_API TH_EXTERNC | ||||
| # define TH_CPP_API extern | ||||
| #endif | ||||
|  | ||||
| #ifdef _WIN32 | ||||
|  | ||||
| @ -69,18 +69,18 @@ TH_API THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t si | ||||
| TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size, | ||||
|                                              at::Allocator *allocator); | ||||
|  | ||||
| ptrdiff_t THStorage_size(const THStorage *self); | ||||
| size_t THStorage_elementSize(); | ||||
| THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags); | ||||
| void THStorage_setFlag(THStorage *storage, const char flag); | ||||
| void THStorage_clearFlag(THStorage *storage, const char flag); | ||||
| void THStorage_retain(THStorage *storage); | ||||
| THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type, | ||||
|                                              at::DataPtr&& data, ptrdiff_t size, | ||||
|                                              at::Allocator* allocator); | ||||
| void THStorage_resize(THStorage *storage, ptrdiff_t size); | ||||
| void THStorage_swap(THStorage *storage1, THStorage *storage2); | ||||
| TH_API ptrdiff_t THStorage_size(const THStorage *self); | ||||
| TH_API size_t THStorage_elementSize(); | ||||
| TH_API THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags); | ||||
| TH_API void THStorage_setFlag(THStorage *storage, const char flag); | ||||
| TH_API void THStorage_clearFlag(THStorage *storage, const char flag); | ||||
| TH_API void THStorage_retain(THStorage *storage); | ||||
| TH_API THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type, | ||||
|                                                     at::DataPtr&& data, ptrdiff_t size, | ||||
|                                                     at::Allocator* allocator); | ||||
| TH_API void THStorage_resize(THStorage *storage, ptrdiff_t size); | ||||
| TH_API void THStorage_swap(THStorage *storage1, THStorage *storage2); | ||||
|  | ||||
| void THStorage_weakRetain(THStorage *weak_storage); | ||||
| void THStorage_weakFree(THStorage *weak_storage); | ||||
| THStorage* THStorage_weakLock(THStorage *weak_storage); | ||||
| TH_API void THStorage_weakRetain(THStorage *weak_storage); | ||||
| TH_API void THStorage_weakFree(THStorage *weak_storage); | ||||
| TH_API THStorage* THStorage_weakLock(THStorage *weak_storage); | ||||
|  | ||||
| @ -83,5 +83,5 @@ struct THTensor | ||||
| #include "THGenerateAllTypes.h" | ||||
|  | ||||
| TH_API void THTensor_free(THTensor *self); | ||||
| at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride, | ||||
|                                                            at::IntList newshape); | ||||
| TH_CPP_API at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride, | ||||
|                                                                       at::IntList newshape); | ||||
|  | ||||
| @ -6,9 +6,9 @@ static inline void THNN_(Col2Im_shapeCheck)( | ||||
|                          THCState *state, | ||||
|                          THCTensor *input, | ||||
|                          THCTensor *gradOutput, | ||||
|                          int outputHeight, int outputWidth, | ||||
|                          int kH, int kW, int dH, int dW, | ||||
|                          int padH, int padW, int sH, int sW) { | ||||
|                          int64_t outputHeight, int64_t outputWidth, | ||||
|                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||
|                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THArgCheck(kW > 0 && kH > 0, 6, | ||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||
| @ -17,7 +17,7 @@ static inline void THNN_(Col2Im_shapeCheck)( | ||||
|   THArgCheck(dW > 0 && dH > 0, 8, | ||||
|              "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); | ||||
|  | ||||
|   int ndim = THCTensor_(nDimension)(state, input); | ||||
|   int64_t ndim = THCTensor_(nDimension)(state, input); | ||||
|   THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input, | ||||
|                   "Expected non-empty 2D or 3D input tensor, but got input of shape %s"); | ||||
|  | ||||
| @ -54,11 +54,11 @@ void THNN_(Col2Im_updateOutput)( | ||||
|            THCState *state, | ||||
|            THCTensor *input, | ||||
|            THCTensor *output, | ||||
|            int outputHeight, int outputWidth, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t outputHeight, int64_t outputWidth, | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THCUNN_assertSameGPU(state, 2, input, output); | ||||
|  | ||||
| @ -84,10 +84,10 @@ void THNN_(Col2Im_updateOutput)( | ||||
|   THCTensor *input_n = THCTensor_(new)(state); | ||||
|   THCTensor *output_n = THCTensor_(new)(state); | ||||
|  | ||||
|   int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|  | ||||
|   for (int elt = 0; elt < batchSize; elt++) { | ||||
|   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||
|     THCTensor_(select)(state, input_n, input, 0, elt); | ||||
|     THCTensor_(select)(state, output_n, output, 0, elt); | ||||
|  | ||||
| @ -116,10 +116,10 @@ void THNN_(Col2Im_updateGradInput)( | ||||
|            THCState *state, | ||||
|            THCTensor *gradOutput, | ||||
|            THCTensor *gradInput, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, | ||||
|                              kH, kW, dH, dW, padH, padW, sH, sW); | ||||
|  | ||||
| @ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)( | ||||
|                          THCState *state, | ||||
|                          THCTensor *input, | ||||
|                          THCTensor *gradOutput, | ||||
|                          int kH, int kW, int dH, int dW, | ||||
|                          int padH, int padW, int sH, int sW) { | ||||
|                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||
|                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THArgCheck(kW > 0 && kH > 0, 4, | ||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||
| @ -18,7 +18,7 @@ static inline void THNN_(Im2Col_shapeCheck)( | ||||
|   THArgCheck(sW > 0 && sH > 0, 10, | ||||
|              "stride should be greater than zero, but got sH: %d sW: %d", sH, sW); | ||||
|  | ||||
|   int ndim = THCTensor_(nDimension)(state, input); | ||||
|   int64_t ndim = THCTensor_(nDimension)(state, input); | ||||
|   THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input, | ||||
|                 "Expected non-empty 3D or 4D input tensor, but got input of shape %s"); | ||||
|  | ||||
| @ -26,11 +26,11 @@ static inline void THNN_(Im2Col_shapeCheck)( | ||||
|   if (ndim == 3) { | ||||
|     dim_batch = -1; | ||||
|   } | ||||
|   int nInputPlane  = THCTensor_(size)(state, input, dim_batch + 1); | ||||
|   int inputHeight  = THCTensor_(size)(state, input, dim_batch + 2); | ||||
|   int inputWidth   = THCTensor_(size)(state, input, dim_batch + 3); | ||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int64_t nInputPlane  = THCTensor_(size)(state, input, dim_batch + 1); | ||||
|   int64_t inputHeight  = THCTensor_(size)(state, input, dim_batch + 2); | ||||
|   int64_t inputWidth   = THCTensor_(size)(state, input, dim_batch + 3); | ||||
|   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|  | ||||
|   if (outputHeight < 1 || outputWidth < 1) { | ||||
|     THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " | ||||
| @ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)( | ||||
|            THCState *state, | ||||
|            THCTensor *input, | ||||
|            THCTensor *output, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THCUNN_assertSameGPU(state, 2, input, output); | ||||
|  | ||||
| @ -62,15 +62,15 @@ void THNN_(Im2Col_updateOutput)( | ||||
|     THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]); | ||||
|   } | ||||
|  | ||||
|   int batchSize    = THCTensor_(size)(state, input, 0); | ||||
|   int nInputPlane  = THCTensor_(size)(state, input, 1); | ||||
|   int inputHeight  = THCTensor_(size)(state, input, 2); | ||||
|   int inputWidth   = THCTensor_(size)(state, input, 3); | ||||
|   int64_t batchSize    = THCTensor_(size)(state, input, 0); | ||||
|   int64_t nInputPlane  = THCTensor_(size)(state, input, 1); | ||||
|   int64_t inputHeight  = THCTensor_(size)(state, input, 2); | ||||
|   int64_t inputWidth   = THCTensor_(size)(state, input, 3); | ||||
|  | ||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int nOutputPlane = nInputPlane * kW * kH; | ||||
|   int outputLength = outputHeight * outputWidth; | ||||
|   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int64_t nOutputPlane = nInputPlane * kW * kH; | ||||
|   int64_t outputLength = outputHeight * outputWidth; | ||||
|  | ||||
|   THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength); | ||||
|   THCTensor_(zero)(state, output); | ||||
| @ -78,7 +78,7 @@ void THNN_(Im2Col_updateOutput)( | ||||
|   THCTensor *input_n = THCTensor_(new)(state); | ||||
|   THCTensor *output_n = THCTensor_(new)(state); | ||||
|  | ||||
|   for (int elt = 0; elt < batchSize; elt++) { | ||||
|   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||
|     THCTensor_(select)(state, input_n, input, 0, elt); | ||||
|     THCTensor_(select)(state, output_n, output, 0, elt); | ||||
|  | ||||
| @ -104,11 +104,11 @@ void THNN_(Im2Col_updateGradInput)( | ||||
|            THCState *state, | ||||
|            THCTensor *gradOutput, | ||||
|            THCTensor *gradInput, | ||||
|            int inputHeight, int inputWidth, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t inputHeight, int64_t inputWidth, | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, | ||||
|                              inputHeight, inputWidth, | ||||
|  | ||||
| @ -183,39 +183,39 @@ THC_API void THNN_(Im2Col_updateOutput)( | ||||
|                   THCState *state, | ||||
|                   THCTensor *input, | ||||
|                   THCTensor *output, | ||||
|                   int kH, int kW, | ||||
|                   int dH, int dW, | ||||
|                   int padH, int padW, | ||||
|                   int sH, int sW); | ||||
|                   int64_t kH, int64_t kW, | ||||
|                   int64_t dH, int64_t dW, | ||||
|                   int64_t padH, int64_t padW, | ||||
|                   int64_t sH, int64_t sW); | ||||
|  | ||||
| THC_API void THNN_(Im2Col_updateGradInput)( | ||||
|                   THCState *state, | ||||
|                   THCTensor *gradOutput, | ||||
|                   THCTensor *gradInput, | ||||
|                   int inputHeight, int inputWidth, | ||||
|                   int kH, int kW, | ||||
|                   int dH, int dW, | ||||
|                   int padH, int padW, | ||||
|                   int sH, int sW); | ||||
|                   int64_t inputHeight, int64_t inputWidth, | ||||
|                   int64_t kH, int64_t kW, | ||||
|                   int64_t dH, int64_t dW, | ||||
|                   int64_t padH, int64_t padW, | ||||
|                   int64_t sH, int64_t sW); | ||||
|  | ||||
| THC_API void THNN_(Col2Im_updateOutput)( | ||||
|                   THCState *state, | ||||
|                   THCTensor *input, | ||||
|                   THCTensor *output, | ||||
|                   int outputHeight, int outputWidth, | ||||
|                   int kH, int kW, | ||||
|                   int dH, int dW, | ||||
|                   int padH, int padW, | ||||
|                   int sH, int sW); | ||||
|                   int64_t outputHeight, int64_t outputWidth, | ||||
|                   int64_t kH, int64_t kW, | ||||
|                   int64_t dH, int64_t dW, | ||||
|                   int64_t padH, int64_t padW, | ||||
|                   int64_t sH, int64_t sW); | ||||
|  | ||||
|  THC_API void THNN_(Col2Im_updateGradInput)( | ||||
|                   THCState *state, | ||||
|                   THCTensor *gradOutput, | ||||
|                   THCTensor *gradInput, | ||||
|                   int kH, int kW, | ||||
|                   int dH, int dW, | ||||
|                   int padH, int padW, | ||||
|                   int sH, int sW); | ||||
|                   int64_t kH, int64_t kW, | ||||
|                   int64_t dH, int64_t dW, | ||||
|                   int64_t padH, int64_t padW, | ||||
|                   int64_t sH, int64_t sW); | ||||
|  | ||||
| THC_API void THNN_(LeakyReLU_updateOutput)( | ||||
|                   THCState *state, | ||||
|  | ||||
| @ -8,28 +8,28 @@ | ||||
| // (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu) | ||||
| template <typename Dtype> | ||||
| __launch_bounds__(CUDA_NUM_THREADS) | ||||
| __global__ void im2col_kernel(const int n, const Dtype* data_im, | ||||
|                               const int height, const int width, | ||||
|                               const int ksize_h, const int ksize_w, | ||||
|                               const int pad_h, const int pad_w, | ||||
|                               const int stride_h, const int stride_w, | ||||
|                               const int dilation_h, const int dilation_w, | ||||
|                               const int height_col, const int width_col, | ||||
| __global__ void im2col_kernel(const int64_t n, const Dtype* data_im, | ||||
|                               const int64_t height, const int64_t width, | ||||
|                               const int64_t ksize_h, const int64_t ksize_w, | ||||
|                               const int64_t pad_h, const int64_t pad_w, | ||||
|                               const int64_t stride_h, const int64_t stride_w, | ||||
|                               const int64_t dilation_h, const int64_t dilation_w, | ||||
|                               const int64_t height_col, const int64_t width_col, | ||||
|     Dtype* data_col) { | ||||
|   CUDA_KERNEL_LOOP(index, n) { | ||||
|     int w_out = index % width_col; | ||||
|     int64_t w_out = index % width_col; | ||||
|     index /= width_col; | ||||
|     int h_out = index % height_col; | ||||
|     int channel_in = index / height_col; | ||||
|     int channel_out = channel_in * ksize_h * ksize_w; | ||||
|     int h_in = h_out * stride_h - pad_h; | ||||
|     int w_in = w_out * stride_w - pad_w; | ||||
|     int64_t h_out = index % height_col; | ||||
|     int64_t channel_in = index / height_col; | ||||
|     int64_t channel_out = channel_in * ksize_h * ksize_w; | ||||
|     int64_t h_in = h_out * stride_h - pad_h; | ||||
|     int64_t w_in = w_out * stride_w - pad_w; | ||||
|     data_col += (channel_out * height_col + h_out) * width_col + w_out; | ||||
|     data_im += (channel_in * height + h_in) * width + w_in; | ||||
|     for (int i = 0; i < ksize_h; ++i) { | ||||
|       for (int j = 0; j < ksize_w; ++j) { | ||||
|         int h = h_in + i * dilation_h; | ||||
|         int w = w_in + j * dilation_w; | ||||
|     for (int64_t i = 0; i < ksize_h; ++i) { | ||||
|       for (int64_t j = 0; j < ksize_w; ++j) { | ||||
|         int64_t h = h_in + i * dilation_h; | ||||
|         int64_t w = w_in + j * dilation_w; | ||||
|         *data_col = (h >= 0 && w >= 0 && h < height && w < width) ? | ||||
|           data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0); | ||||
|         data_col += height_col * width_col; | ||||
| @ -39,15 +39,15 @@ __global__ void im2col_kernel(const int n, const Dtype* data_im, | ||||
| } | ||||
|  | ||||
| template <typename Dtype> | ||||
| void im2col(cudaStream_t stream, const Dtype* data_im, const int channels, | ||||
|             const int height, const int width, | ||||
|             const int height_col, const int width_col, | ||||
|             const int ksize_h, const int ksize_w, const int pad_h, | ||||
|             const int pad_w, const int stride_h, const int stride_w, | ||||
|             const int dilation_h, const int dilation_w, Dtype* data_col) { | ||||
| void im2col(cudaStream_t stream, const Dtype* data_im, const int64_t channels, | ||||
|             const int64_t height, const int64_t width, | ||||
|             const int64_t height_col, const int64_t width_col, | ||||
|             const int64_t ksize_h, const int64_t ksize_w, const int64_t pad_h, | ||||
|             const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, | ||||
|             const int64_t dilation_h, const int64_t dilation_w, Dtype* data_col) { | ||||
|   // We are going to launch channels * height_col * width_col kernels, each | ||||
|   // kernel responsible for copying a single-channel grid. | ||||
|   int num_kernels = channels * height_col * width_col; | ||||
|   int64_t num_kernels = channels * height_col * width_col; | ||||
|   // Launch | ||||
|   im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( | ||||
|       num_kernels, data_im, height, width, ksize_h, ksize_w, | ||||
| @ -60,37 +60,37 @@ void im2col(cudaStream_t stream, const Dtype* data_im, const int channels, | ||||
|  | ||||
| template <typename Dtype, typename Acctype> | ||||
| __launch_bounds__(CUDA_NUM_THREADS) | ||||
| __global__ void col2im_kernel(const int n, const Dtype* data_col, | ||||
|                                   const int height, const int width, const int channels, | ||||
|                                   const int kernel_h, const int kernel_w, | ||||
|                                   const int pad_h, const int pad_w, | ||||
|                                   const int stride_h, const int stride_w, | ||||
|                                   const int dilation_h, const int dilation_w, | ||||
|                                   const int height_col, const int width_col, | ||||
| __global__ void col2im_kernel(const int64_t n, const Dtype* data_col, | ||||
|                                   const int64_t height, const int64_t width, const int64_t channels, | ||||
|                                   const int64_t kernel_h, const int64_t kernel_w, | ||||
|                                   const int64_t pad_h, const int64_t pad_w, | ||||
|                                   const int64_t stride_h, const int64_t stride_w, | ||||
|                                   const int64_t dilation_h, const int64_t dilation_w, | ||||
|                                   const int64_t height_col, const int64_t width_col, | ||||
|                                   Dtype* data_im) { | ||||
|   CUDA_KERNEL_LOOP(index, n) { | ||||
|     Acctype val = Acctype(0); | ||||
|     const int w_im = index % width + pad_w; | ||||
|     const int h_im = (index / width) % height + pad_h; | ||||
|     const int c_im = index / (width * height); | ||||
|     int kernel_extent_w = (kernel_w - 1) * dilation_w + 1; | ||||
|     int kernel_extent_h = (kernel_h - 1) * dilation_h + 1; | ||||
|     const int64_t w_im = index % width + pad_w; | ||||
|     const int64_t h_im = (index / width) % height + pad_h; | ||||
|     const int64_t c_im = index / (width * height); | ||||
|     int64_t kernel_extent_w = (kernel_w - 1) * dilation_w + 1; | ||||
|     int64_t kernel_extent_h = (kernel_h - 1) * dilation_h + 1; | ||||
|     // compute the start and end of the output | ||||
|     const int w_col_start = | ||||
|     const int64_t w_col_start = | ||||
|       (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1; | ||||
|     const int w_col_end = min(w_im / stride_w + 1, width_col); | ||||
|     const int h_col_start = | ||||
|     const int64_t w_col_end = min(w_im / stride_w + 1, width_col); | ||||
|     const int64_t h_col_start = | ||||
|       (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1; | ||||
|     const int h_col_end = min(h_im / stride_h + 1, height_col); | ||||
|     const int64_t h_col_end = min(h_im / stride_h + 1, height_col); | ||||
|     // TODO: use LCM of stride and dilation to avoid unnecessary loops | ||||
|     for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) { | ||||
|       for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) { | ||||
|         int h_k = (h_im - h_col * stride_h); | ||||
|         int w_k = (w_im - w_col * stride_w); | ||||
|     for (int64_t h_col = h_col_start; h_col < h_col_end; h_col += 1) { | ||||
|       for (int64_t w_col = w_col_start; w_col < w_col_end; w_col += 1) { | ||||
|         int64_t h_k = (h_im - h_col * stride_h); | ||||
|         int64_t w_k = (w_im - w_col * stride_w); | ||||
|         if (h_k % dilation_h == 0 && w_k % dilation_w == 0) { | ||||
|           h_k /= dilation_h; | ||||
|           w_k /= dilation_w; | ||||
|           int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * | ||||
|           int64_t data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) * | ||||
|                                 height_col + h_col) * width_col + w_col; | ||||
|           val += data_col[data_col_index]; | ||||
|         } | ||||
| @ -101,21 +101,21 @@ __global__ void col2im_kernel(const int n, const Dtype* data_col, | ||||
| } | ||||
|  | ||||
| template <typename Dtype, typename Acctype> | ||||
| void col2im(cudaStream_t stream, const Dtype* data_col, const int channels, | ||||
|             const int height, const int width, | ||||
|             const int output_height, const int output_width, | ||||
|             const int patch_h, const int patch_w, const int pad_h, | ||||
|             const int pad_w, const int stride_h, const int stride_w, | ||||
|             const int dilation_h, const int dilation_w, Dtype* data_im); | ||||
| void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels, | ||||
|             const int64_t height, const int64_t width, | ||||
|             const int64_t output_height, const int64_t output_width, | ||||
|             const int64_t patch_h, const int64_t patch_w, const int64_t pad_h, | ||||
|             const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, | ||||
|             const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im); | ||||
|  | ||||
| template <typename Dtype, typename Acctype> | ||||
| void col2im(cudaStream_t stream, const Dtype* data_col, const int channels, | ||||
|             const int height, const int width, | ||||
|             const int output_height, const int output_width, | ||||
|             const int patch_h, const int patch_w, const int pad_h, | ||||
|             const int pad_w, const int stride_h, const int stride_w, | ||||
|             const int dilation_h, const int dilation_w, Dtype* data_im) { | ||||
|   int num_kernels = channels * height * width; | ||||
| void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels, | ||||
|             const int64_t height, const int64_t width, | ||||
|             const int64_t output_height, const int64_t output_width, | ||||
|             const int64_t patch_h, const int64_t patch_w, const int64_t pad_h, | ||||
|             const int64_t pad_w, const int64_t stride_h, const int64_t stride_w, | ||||
|             const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im) { | ||||
|   int64_t num_kernels = channels * height * width; | ||||
|   // To avoid involving atomic operations, we will launch one kernel per | ||||
|   // bottom dimension, and then in the kernel add up the top dimensions. | ||||
|   col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> ( | ||||
|  | ||||
| @ -54,25 +54,25 @@ | ||||
| // | ||||
| // ALSO do vol2col | ||||
|  | ||||
| static void THNN_(im2col)(const real* data_im, const int channels, | ||||
|       const int height, const int width, | ||||
|       const int output_height, const int output_width, | ||||
|       const int kernel_h, const int kernel_w, | ||||
|       const int pad_h, const int pad_w, | ||||
|       const int stride_h, const int stride_w, | ||||
|       const int dilation_h, const int dilation_w, | ||||
| static void THNN_(im2col)(const real* data_im, const int64_t channels, | ||||
|       const int64_t height, const int64_t width, | ||||
|       const int64_t output_height, const int64_t output_width, | ||||
|       const int64_t kernel_h, const int64_t kernel_w, | ||||
|       const int64_t pad_h, const int64_t pad_w, | ||||
|       const int64_t stride_h, const int64_t stride_w, | ||||
|       const int64_t dilation_h, const int64_t dilation_w, | ||||
|       real* data_col) { | ||||
|   const int height_col = output_height; | ||||
|   const int width_col = output_width; | ||||
|   const int channels_col = channels * kernel_h * kernel_w; | ||||
|   for (int c_col = 0; c_col < channels_col; ++c_col) { | ||||
|     int w_offset = c_col % kernel_w; | ||||
|     int h_offset = (c_col / kernel_w) % kernel_h; | ||||
|     int c_im = c_col / kernel_h / kernel_w; | ||||
|     for (int h_col = 0; h_col < height_col; ++h_col) { | ||||
|       int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; | ||||
|       for (int w_col = 0; w_col < width_col; ++w_col) { | ||||
|         int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; | ||||
|   const int64_t height_col = output_height; | ||||
|   const int64_t width_col = output_width; | ||||
|   const int64_t channels_col = channels * kernel_h * kernel_w; | ||||
|   for (int64_t c_col = 0; c_col < channels_col; ++c_col) { | ||||
|     int64_t w_offset = c_col % kernel_w; | ||||
|     int64_t h_offset = (c_col / kernel_w) % kernel_h; | ||||
|     int64_t c_im = c_col / kernel_h / kernel_w; | ||||
|     for (int64_t h_col = 0; h_col < height_col; ++h_col) { | ||||
|       int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h; | ||||
|       for (int64_t w_col = 0; w_col < width_col; ++w_col) { | ||||
|         int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w; | ||||
|         data_col[(c_col * height_col + h_col) * width_col + w_col] = | ||||
|           (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ? | ||||
|           data_im[(c_im * height + h_im) * width + w_im] : 0; | ||||
| @ -81,26 +81,26 @@ static void THNN_(im2col)(const real* data_im, const int channels, | ||||
|   } | ||||
| } | ||||
|  | ||||
| static void THNN_(col2im)(const real* data_col, const int channels, | ||||
|       const int height, const int width, | ||||
|       const int output_height, const int output_width, | ||||
|       const int kernel_h, const int kernel_w, | ||||
|       const int pad_h, const int pad_w, | ||||
|       const int stride_h, const int stride_w, | ||||
|       const int dilation_h, const int dilation_w, | ||||
| static void THNN_(col2im)(const real* data_col, const int64_t channels, | ||||
|       const int64_t height, const int64_t width, | ||||
|       const int64_t output_height, const int64_t output_width, | ||||
|       const int64_t kernel_h, const int64_t kernel_w, | ||||
|       const int64_t pad_h, const int64_t pad_w, | ||||
|       const int64_t stride_h, const int64_t stride_w, | ||||
|       const int64_t dilation_h, const int64_t dilation_w, | ||||
|       real* data_im) { | ||||
|   memset(data_im, 0, sizeof(real) * height * width * channels); | ||||
|   const int height_col = output_height; | ||||
|   const int width_col = output_width; | ||||
|   const int channels_col = channels * kernel_h * kernel_w; | ||||
|   for (int c_col = 0; c_col < channels_col; ++c_col) { | ||||
|     int w_offset = c_col % kernel_w; | ||||
|     int h_offset = (c_col / kernel_w) % kernel_h; | ||||
|     int c_im = c_col / kernel_h / kernel_w; | ||||
|     for (int h_col = 0; h_col < height_col; ++h_col) { | ||||
|       int h_im = h_col * stride_h - pad_h + h_offset * dilation_h; | ||||
|       for (int w_col = 0; w_col < width_col; ++w_col) { | ||||
|         int w_im = w_col * stride_w - pad_w + w_offset * dilation_w; | ||||
|   const int64_t height_col = output_height; | ||||
|   const int64_t width_col = output_width; | ||||
|   const int64_t channels_col = channels * kernel_h * kernel_w; | ||||
|   for (int64_t c_col = 0; c_col < channels_col; ++c_col) { | ||||
|     int64_t w_offset = c_col % kernel_w; | ||||
|     int64_t h_offset = (c_col / kernel_w) % kernel_h; | ||||
|     int64_t c_im = c_col / kernel_h / kernel_w; | ||||
|     for (int64_t h_col = 0; h_col < height_col; ++h_col) { | ||||
|       int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h; | ||||
|       for (int64_t w_col = 0; w_col < width_col; ++w_col) { | ||||
|         int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w; | ||||
|         if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width) | ||||
|           data_im[(c_im * height + h_im) * width + w_im] += | ||||
|             data_col[(c_col * height_col + h_col) * width_col + w_col]; | ||||
| @ -113,9 +113,9 @@ static inline void THNN_(Col2Im_shapeCheck)( | ||||
|                          THNNState *state, | ||||
|                          THTensor *input, | ||||
|                          THTensor *gradOutput, | ||||
|                          int outputHeight, int outputWidth, | ||||
|                          int kH, int kW, int dH, int dW, | ||||
|                          int padH, int padW, int sH, int sW) { | ||||
|                          int64_t outputHeight, int64_t outputWidth, | ||||
|                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||
|                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THArgCheck(kW > 0 && kH > 0, 6, | ||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||
| @ -124,11 +124,11 @@ static inline void THNN_(Col2Im_shapeCheck)( | ||||
|   THArgCheck(dW > 0 && dH > 0, 8, | ||||
|              "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW); | ||||
|  | ||||
|   int ndim = THTensor_(nDimension)(input); | ||||
|   int64_t ndim = THTensor_(nDimension)(input); | ||||
|   THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input, | ||||
|                 "Expected non-empty 2D or 3D input tensor, but got input of shape %s"); | ||||
|  | ||||
|   int batch_dim = (ndim == 3) ? 0 : -1; | ||||
|   int64_t batch_dim = (ndim == 3) ? 0 : -1; | ||||
|   int64_t nInputPlane  = input->size[batch_dim + 1]; | ||||
|  | ||||
|   if (nInputPlane % (kW * kH) != 0) { | ||||
| @ -161,11 +161,11 @@ void THNN_(Col2Im_updateOutput)( | ||||
|            THNNState *state, | ||||
|            THTensor *input, | ||||
|            THTensor *output, | ||||
|            int outputHeight, int outputWidth, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t outputHeight, int64_t outputWidth, | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth, | ||||
|                            kH, kW, dH, dW, padH, padW, sH, sW); | ||||
| @ -189,10 +189,10 @@ void THNN_(Col2Im_updateOutput)( | ||||
|   THTensor *input_n = THTensor_(new)(); | ||||
|   THTensor *output_n = THTensor_(new)(); | ||||
|  | ||||
|   int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|  | ||||
|   for (int elt = 0; elt < batchSize; elt++) { | ||||
|   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||
|     THTensor_(select)(input_n, input, 0, elt); | ||||
|     THTensor_(select)(output_n, output, 0, elt); | ||||
|  | ||||
| @ -220,10 +220,10 @@ void THNN_(Col2Im_updateGradInput)( | ||||
|            THNNState *state, | ||||
|            THTensor *gradOutput, | ||||
|            THTensor *gradInput, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput, | ||||
|                              kH, kW, dH, dW, padH, padW, sH, sW); | ||||
|  | ||||
| @ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)( | ||||
|                          THNNState *state, | ||||
|                          THTensor *input, | ||||
|                          THTensor *gradOutput, | ||||
|                          int kH, int kW, int dH, int dW, | ||||
|                          int padH, int padW, int sH, int sW) { | ||||
|                          int64_t kH, int64_t kW, int64_t dH, int64_t dW, | ||||
|                          int64_t padH, int64_t padW, int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THArgCheck(kW > 0 && kH > 0, 4, | ||||
|              "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW); | ||||
| @ -16,21 +16,21 @@ static inline void THNN_(Im2Col_shapeCheck)( | ||||
|   THArgCheck(sW > 0 && sH > 0, 10, | ||||
|              "stride should be greater than zero, but got sH: %d sW: %d", sH, sW); | ||||
|  | ||||
|   int ndim = THTensor_(nDimension)(input); | ||||
|   int64_t ndim = THTensor_(nDimension)(input); | ||||
|   THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input, | ||||
|                 "Expected non-empty 3D or 4D input tensor, but got input of shape %s"); | ||||
|  | ||||
|   int dim_batch = 0; | ||||
|   int64_t dim_batch = 0; | ||||
|   if (ndim == 3) { | ||||
|     dim_batch = -1; | ||||
|   } | ||||
|   int nInputPlane  = THTensor_(size)(input, dim_batch + 1); | ||||
|   int inputHeight  = THTensor_(size)(input, dim_batch + 2); | ||||
|   int inputWidth   = THTensor_(size)(input, dim_batch + 3); | ||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int nOutputPlane = nInputPlane * kW * kH; | ||||
|   int outputLength = outputHeight * outputWidth; | ||||
|   int64_t nInputPlane  = THTensor_(size)(input, dim_batch + 1); | ||||
|   int64_t inputHeight  = THTensor_(size)(input, dim_batch + 2); | ||||
|   int64_t inputWidth   = THTensor_(size)(input, dim_batch + 3); | ||||
|   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int64_t nOutputPlane = nInputPlane * kW * kH; | ||||
|   int64_t outputLength = outputHeight * outputWidth; | ||||
|  | ||||
|   if (outputHeight < 1 || outputWidth < 1) { | ||||
|     THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), " | ||||
| @ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)( | ||||
|            THNNState *state, | ||||
|            THTensor *input, | ||||
|            THTensor *output, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|   THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW); | ||||
|  | ||||
| @ -60,15 +60,15 @@ void THNN_(Im2Col_updateOutput)( | ||||
|     THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]); | ||||
|   } | ||||
|  | ||||
|   int batchSize    = THTensor_(size)(input, 0); | ||||
|   int nInputPlane  = THTensor_(size)(input, 1); | ||||
|   int inputHeight  = THTensor_(size)(input, 2); | ||||
|   int inputWidth   = THTensor_(size)(input, 3); | ||||
|   int64_t batchSize    = THTensor_(size)(input, 0); | ||||
|   int64_t nInputPlane  = THTensor_(size)(input, 1); | ||||
|   int64_t inputHeight  = THTensor_(size)(input, 2); | ||||
|   int64_t inputWidth   = THTensor_(size)(input, 3); | ||||
|  | ||||
|   int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int nOutputPlane = nInputPlane * kW * kH; | ||||
|   int outputLength = outputHeight * outputWidth; | ||||
|   int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1; | ||||
|   int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1; | ||||
|   int64_t nOutputPlane = nInputPlane * kW * kH; | ||||
|   int64_t outputLength = outputHeight * outputWidth; | ||||
|  | ||||
|   THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength); | ||||
|   THTensor_(zero)(output); | ||||
| @ -76,7 +76,7 @@ void THNN_(Im2Col_updateOutput)( | ||||
|   THTensor *input_n = THTensor_(new)(); | ||||
|   THTensor *output_n = THTensor_(new)(); | ||||
|  | ||||
|   for (int elt = 0; elt < batchSize; elt++) { | ||||
|   for (int64_t elt = 0; elt < batchSize; elt++) { | ||||
|     THTensor_(select)(input_n, input, 0, elt); | ||||
|     THTensor_(select)(output_n, output, 0, elt); | ||||
|  | ||||
| @ -102,11 +102,11 @@ void THNN_(Im2Col_updateGradInput)( | ||||
|            THNNState *state, | ||||
|            THTensor *gradOutput, | ||||
|            THTensor *gradInput, | ||||
|            int inputHeight, int inputWidth, | ||||
|            int kH, int kW, | ||||
|            int dH, int dW, | ||||
|            int padH, int padW, | ||||
|            int sH, int sW) { | ||||
|            int64_t inputHeight, int64_t inputWidth, | ||||
|            int64_t kH, int64_t kW, | ||||
|            int64_t dH, int64_t dW, | ||||
|            int64_t padH, int64_t padW, | ||||
|            int64_t sH, int64_t sW) { | ||||
|  | ||||
|  | ||||
|   THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput, | ||||
|  | ||||
| @ -220,8 +220,8 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)( | ||||
|      dilationH, dilationW, 0); | ||||
|  | ||||
|   // Params | ||||
|   int nInputPlane = weight->size[1]; | ||||
|   int nOutputPlane = weight->size[0]; | ||||
|   int64_t nInputPlane = weight->size[1]; | ||||
|   int64_t nOutputPlane = weight->size[0]; | ||||
|  | ||||
|   input = THTensor_(newContiguous)(input); | ||||
|   weight = THTensor_(newContiguous)(weight); | ||||
|  | ||||
| @ -221,8 +221,8 @@ void THNN_(SpatialFullDilatedConvolution_updateGradInput)( | ||||
|     (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW, | ||||
|      dilationH, dilationW, adjH, adjW, 0); | ||||
|  | ||||
|   int nInputPlane = THTensor_(size)(weight,0); | ||||
|   int nOutputPlane = THTensor_(size)(weight,1); | ||||
|   int64_t nInputPlane = THTensor_(size)(weight,0); | ||||
|   int64_t nOutputPlane = THTensor_(size)(weight,1); | ||||
|  | ||||
|   input = THTensor_(newContiguous)(input); | ||||
|   gradOutput = THTensor_(newContiguous)(gradOutput); | ||||
| @ -328,7 +328,7 @@ void THNN_(SpatialFullDilatedConvolution_accGradParameters)( | ||||
|     (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW, | ||||
|      dilationH, dilationW, adjH, adjW, 1); | ||||
|  | ||||
|   int nOutputPlane; | ||||
|   int64_t nOutputPlane; | ||||
|   if (gradWeight) { | ||||
|     nOutputPlane = THTensor_(size)(gradWeight, 1); | ||||
|   } else if (gradBias) { | ||||
|  | ||||
| @ -147,39 +147,39 @@ TH_API void THNN_(Im2Col_updateOutput)( | ||||
|           THNNState *state, | ||||
|           THTensor *input, | ||||
|           THTensor *output, | ||||
|           int kH, int kW, | ||||
|           int dH, int dW, | ||||
|           int padH, int padW, | ||||
|           int sH, int sW); | ||||
|           int64_t kH, int64_t kW, | ||||
|           int64_t dH, int64_t dW, | ||||
|           int64_t padH, int64_t padW, | ||||
|           int64_t sH, int64_t sW); | ||||
|  | ||||
| TH_API void THNN_(Im2Col_updateGradInput)( | ||||
|           THNNState *state, | ||||
|           THTensor *gradOutput, | ||||
|           THTensor *gradInput, | ||||
|           int inputHeight, int inputWidth, | ||||
|           int kH, int kW, | ||||
|           int dH, int dW, | ||||
|           int padH, int padW, | ||||
|           int sH, int sW); | ||||
|           int64_t inputHeight, int64_t inputWidth, | ||||
|           int64_t kH, int64_t kW, | ||||
|           int64_t dH, int64_t dW, | ||||
|           int64_t padH, int64_t padW, | ||||
|           int64_t sH, int64_t sW); | ||||
|  | ||||
| TH_API void THNN_(Col2Im_updateOutput)( | ||||
|           THNNState *state, | ||||
|           THTensor *input, | ||||
|           THTensor *output, | ||||
|           int outputHeight, int outputWidth, | ||||
|           int kH, int kW, | ||||
|           int dH, int dW, | ||||
|           int padH, int padW, | ||||
|           int sH, int sW); | ||||
|           int64_t outputHeight, int64_t outputWidth, | ||||
|           int64_t kH, int64_t kW, | ||||
|           int64_t dH, int64_t dW, | ||||
|           int64_t padH, int64_t padW, | ||||
|           int64_t sH, int64_t sW); | ||||
|  | ||||
| TH_API void THNN_(Col2Im_updateGradInput)( | ||||
|           THNNState *state, | ||||
|           THTensor *gradOutput, | ||||
|           THTensor *gradInput, | ||||
|           int kH, int kW, | ||||
|           int dH, int dW, | ||||
|           int padH, int padW, | ||||
|           int sH, int sW); | ||||
|           int64_t kH, int64_t kW, | ||||
|           int64_t dH, int64_t dW, | ||||
|           int64_t padH, int64_t padW, | ||||
|           int64_t sH, int64_t sW); | ||||
|  | ||||
| TH_API void THNN_(L1Cost_updateOutput)( | ||||
|           THNNState *state,            // library's state | ||||
|  | ||||
| @ -224,11 +224,7 @@ if(USE_CUDA) | ||||
|   # it. We will then manually add the cudart library as interface libs. | ||||
|   set(__tmp ${CUDA_LIBRARIES}) | ||||
|   set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES}) | ||||
|   if(CAFFE2_STATIC_LINK_CUDA) | ||||
|     torch_cuda_based_add_library(caffe2_gpu STATIC ${Caffe2_GPU_SRCS}) | ||||
|   else() | ||||
|     torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS}) | ||||
|   endif() | ||||
|   torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS}) | ||||
|   set(CUDA_LIBRARIES ${__tmp}) | ||||
|   target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart) | ||||
|  | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| #include "caffe2/utils/proto_wrap.h" | ||||
| #include "caffe2/core/common.h" | ||||
|  | ||||
| #include <google/protobuf/stubs/common.h> | ||||
| #include <google/protobuf/generated_message_util.h> | ||||
| @ -8,7 +9,7 @@ namespace caffe { | ||||
| // Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | ||||
| // used to avoid duplicated global variable in the case when protobuf | ||||
| // is built with hidden visibility. | ||||
| const ::std::string& GetEmptyStringAlreadyInited() { | ||||
| CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() { | ||||
|   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); | ||||
| } | ||||
|  | ||||
| @ -19,7 +20,7 @@ namespace ONNX_NAMESPACE { | ||||
| // ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | ||||
| // used to avoid duplicated global variable in the case when protobuf | ||||
| // is built with hidden visibility. | ||||
| const ::std::string& GetEmptyStringAlreadyInited() { | ||||
| CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() { | ||||
|   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); | ||||
| } | ||||
|  | ||||
| @ -30,7 +31,7 @@ namespace caffe2 { | ||||
| // Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function | ||||
| // used to avoid duplicated global variable in the case when protobuf | ||||
| // is built with hidden visibility. | ||||
| const ::std::string& GetEmptyStringAlreadyInited() { | ||||
| CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() { | ||||
|   return ::google::protobuf::internal::GetEmptyStringAlreadyInited(); | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -1102,6 +1102,11 @@ Linear functions | ||||
|  | ||||
| .. autofunction:: linear | ||||
|  | ||||
| :hidden:`bilinear` | ||||
| ~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| .. autofunction:: bilinear | ||||
|  | ||||
| Dropout functions | ||||
| ----------------- | ||||
|  | ||||
|  | ||||
| @ -337,6 +337,7 @@ view of a storage and defines numeric operations on it. | ||||
|    .. automethod:: rsqrt | ||||
|    .. automethod:: rsqrt_ | ||||
|    .. automethod:: scatter_ | ||||
|    .. automethod:: scatter_add_ | ||||
|    .. automethod:: select | ||||
|    .. automethod:: set_ | ||||
|    .. automethod:: share_memory_ | ||||
|  | ||||
| @ -251,6 +251,7 @@ Spectral Ops | ||||
|  | ||||
| Other Operations | ||||
| ~~~~~~~~~~~~~~~~~~~~~~ | ||||
| .. autofunction:: bincount | ||||
| .. autofunction:: cross | ||||
| .. autofunction:: diag | ||||
| .. autofunction:: diagflat | ||||
| @ -258,6 +259,7 @@ Other Operations | ||||
| .. autofunction:: einsum | ||||
| .. autofunction:: flip | ||||
| .. autofunction:: histc | ||||
| .. autofunction:: meshgrid | ||||
| .. autofunction:: renorm | ||||
| .. autofunction:: trace | ||||
| .. autofunction:: tril | ||||
|  | ||||
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							| @ -152,6 +152,8 @@ IS_LINUX = (platform.system() == 'Linux') | ||||
| FULL_CAFFE2 = check_env_flag('FULL_CAFFE2') | ||||
| BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH') | ||||
|  | ||||
| USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK') | ||||
|  | ||||
| NUM_JOBS = multiprocessing.cpu_count() | ||||
| max_jobs = os.getenv("MAX_JOBS") | ||||
| if max_jobs is not None: | ||||
| @ -318,6 +320,8 @@ def build_libs(libs): | ||||
|     if USE_CUDA: | ||||
|         my_env["CUDA_BIN_PATH"] = CUDA_HOME | ||||
|         build_libs_cmd += ['--use-cuda'] | ||||
|     if USE_CUDA_STATIC_LINK: | ||||
|         build_libs_cmd += ['--cuda-static-link'] | ||||
|     if USE_ROCM: | ||||
|         build_libs_cmd += ['--use-rocm'] | ||||
|     if USE_NNPACK: | ||||
|  | ||||
| @ -28,7 +28,7 @@ import errno | ||||
| import torch | ||||
| import torch.cuda | ||||
| from torch._utils_internal import get_writable_path | ||||
| from torch._six import string_classes | ||||
| from torch._six import string_classes, inf | ||||
| import torch.backends.cudnn | ||||
| import torch.backends.mkl | ||||
|  | ||||
| @ -353,7 +353,7 @@ class TestCase(unittest.TestCase): | ||||
|         elif isinstance(x, bool) and isinstance(y, bool): | ||||
|             super(TestCase, self).assertEqual(x, y, message) | ||||
|         elif isinstance(x, Number) and isinstance(y, Number): | ||||
|             if abs(x) == float('inf') or abs(y) == float('inf'): | ||||
|             if abs(x) == inf or abs(y) == inf: | ||||
|                 if allow_inf: | ||||
|                     super(TestCase, self).assertEqual(x, y, message) | ||||
|                 else: | ||||
|  | ||||
| @ -10,6 +10,7 @@ from collections import OrderedDict | ||||
| from itertools import product | ||||
| from operator import mul, itemgetter | ||||
| from functools import reduce, wraps | ||||
| from torch._six import inf, nan | ||||
| from torch.autograd.gradcheck import gradgradcheck, gradcheck | ||||
| from torch.autograd.function import once_differentiable | ||||
| from torch.autograd.profiler import profile | ||||
| @ -1524,12 +1525,12 @@ class TestAutograd(TestCase): | ||||
|         pyscalar = -12345.1 | ||||
|         f[0] = pyscalar | ||||
|         self.assertEqual(float(f), pyscalar) | ||||
|         f[0] = float('nan') | ||||
|         f[0] = nan | ||||
|         self.assertTrue(math.isnan(float(f))) | ||||
|         f[0] = float('inf') | ||||
|         self.assertEqual(float(f), float('inf'), allow_inf=True) | ||||
|         f[0] = float('-inf') | ||||
|         self.assertEqual(float(f), float('-inf'), allow_inf=True) | ||||
|         f[0] = inf | ||||
|         self.assertEqual(float(f), inf, allow_inf=True) | ||||
|         f[0] = -inf | ||||
|         self.assertEqual(float(f), -inf, allow_inf=True) | ||||
|  | ||||
|         # integral -> floating point | ||||
|         # check we can convert something that loses precision | ||||
| @ -1539,11 +1540,11 @@ class TestAutograd(TestCase): | ||||
|         self.assertEqual(float(l), float(pyscalar)) | ||||
|  | ||||
|         # floating point -> integral | ||||
|         f[0] = float('nan') | ||||
|         f[0] = nan | ||||
|         self.assertRaises(ValueError, lambda: integral_conv(f[0])) | ||||
|         f[0] = float('inf') | ||||
|         f[0] = inf | ||||
|         self.assertRaises(OverflowError, lambda: integral_conv(f[0])) | ||||
|         f[0] = float('-inf') | ||||
|         f[0] = -inf | ||||
|         self.assertRaises(OverflowError, lambda: integral_conv(f[0])) | ||||
|         f[0] = sys.float_info.max | ||||
|         self.assertEqual(integral_conv(f), sys.float_info.max) | ||||
| @ -1558,9 +1559,9 @@ class TestAutograd(TestCase): | ||||
|         test_nonzero(l, -2, True) | ||||
|         test_nonzero(f, 0.0, False) | ||||
|         test_nonzero(f, sys.float_info.min, True) | ||||
|         test_nonzero(f, float('nan'), bool(float('nan'))) | ||||
|         test_nonzero(f, float('inf'), bool(float('inf'))) | ||||
|         test_nonzero(f, float('-inf'), bool(float('-inf'))) | ||||
|         test_nonzero(f, nan, bool(nan)) | ||||
|         test_nonzero(f, inf, bool(inf)) | ||||
|         test_nonzero(f, -inf, bool(-inf)) | ||||
|  | ||||
|     def test_pyscalar_conversions(self): | ||||
|         self._test_pyscalar_conversions(lambda x: x, lambda x: int(x)) | ||||
| @ -2825,7 +2826,7 @@ method_tests = [ | ||||
|     ('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]), | ||||
|     ('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]), | ||||
|     ('renorm', (S, S, S), (1, 2, 3), 'norm_1'), | ||||
|     ('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'), | ||||
|     ('renorm', (S, S, S), (inf, 2, 0.5), 'norm_inf'), | ||||
|     ('repeat', (S,), (2,), 'single_number'), | ||||
|     ('repeat', (), (2, 3), 'scalar'), | ||||
|     ('repeat', (2, 2), (3, 2)), | ||||
| @ -2917,7 +2918,7 @@ method_tests = [ | ||||
|     ('norm', (S, S), (0.5,), '0_5'), | ||||
|     ('norm', (S, S), (1,), '1'), | ||||
|     ('norm', (S, S), (3,), '3'), | ||||
|     ('norm', (S, S), (float('inf'),), 'inf'), | ||||
|     ('norm', (S, S), (inf,), 'inf'), | ||||
|     ('norm', (S, S), (-1,), 'neg_1'), | ||||
|     ('norm', (S, S), (-0.5,), 'neg_0_5'), | ||||
|     ('norm', (S, S), (-1.5,), 'neg_1_5'), | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| import os | ||||
| import unittest | ||||
| import sys | ||||
|  | ||||
| @ -15,7 +16,10 @@ import common | ||||
|  | ||||
| from torch.utils.cpp_extension import CUDA_HOME | ||||
| TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None | ||||
| TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_available() | ||||
| TEST_CUDNN = False | ||||
| if TEST_CUDA: | ||||
|     CUDNN_HEADER_EXISTS = os.path.isfile(os.path.join(CUDA_HOME, 'include/cudnn.h')) | ||||
|     TEST_CUDNN = TEST_CUDA and CUDNN_HEADER_EXISTS and torch.backends.cudnn.is_available() | ||||
|  | ||||
|  | ||||
| class TestCppExtension(common.TestCase): | ||||
|  | ||||
| @ -12,6 +12,7 @@ import torch | ||||
| import torch.cuda | ||||
| import torch.cuda.comm as comm | ||||
| from torch import multiprocessing as mp | ||||
| from torch._six import inf, nan | ||||
|  | ||||
| from test_torch import TestTorch | ||||
| from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \ | ||||
| @ -782,7 +783,7 @@ class TestCuda(TestCase): | ||||
|             if not end0: | ||||
|                 gen1_max_times = torch.LongTensor(1).random_(0, 3)[0] | ||||
|             else: | ||||
|                 gen1_max_times = float('inf') | ||||
|                 gen1_max_times = inf | ||||
|             t = 0 | ||||
|             while t < gen1_max_times and not end1: | ||||
|                 end1 = advance(gen1, end1) | ||||
| @ -901,7 +902,7 @@ class TestCuda(TestCase): | ||||
|                  (lambda x: x.max(0)[0], 'max_dim')] | ||||
|         for f, name in tests: | ||||
|             a = torch.arange(25.0).view(5, 5) | ||||
|             a[2, 2] = float('nan') | ||||
|             a[2, 2] = nan | ||||
|             actual = f(a.cuda()).cpu() | ||||
|             expected = f(a).cpu() | ||||
|             self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name)) | ||||
| @ -1503,9 +1504,9 @@ class TestCuda(TestCase): | ||||
|     def test_multinomial_invalid_probs_cuda(self): | ||||
|         test_method = TestCuda._test_multinomial_invalid_probs_cuda | ||||
|         self._spawn_method(test_method, torch.Tensor([0, -1])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, float('inf')])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, float('-inf')])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, float('nan')])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, inf])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, -inf])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, nan])) | ||||
|  | ||||
|     def test_broadcast(self): | ||||
|         TestTorch._test_broadcast(self, lambda t: t.cuda()) | ||||
| @ -1686,7 +1687,6 @@ class TestCuda(TestCase): | ||||
|         cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111, | ||||
|                                   -100.99999994, -1931.99999994, 0.000000111, | ||||
|                                   -0.000000111, 0, -1, -2, -931]) | ||||
|         nan = float('nan') | ||||
|         expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan]) | ||||
|         gpu_tensor = cpu_tensor.cuda() | ||||
|         cpu_out = cpu_tensor.digamma() | ||||
|  | ||||
| @ -30,6 +30,7 @@ from itertools import product | ||||
| from random import shuffle | ||||
|  | ||||
| import torch | ||||
| from torch._six import inf | ||||
| from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN | ||||
| from common_cuda import TEST_CUDA | ||||
| from torch.autograd import grad, gradcheck | ||||
| @ -782,7 +783,7 @@ class TestDistributions(TestCase): | ||||
|         s = 0.3 | ||||
|         self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3)) | ||||
|         self.assertEqual(Geometric(1).sample(), 0) | ||||
|         self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -float('inf'), allow_inf=True) | ||||
|         self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -inf, allow_inf=True) | ||||
|         self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0) | ||||
|         self.assertFalse(Geometric(p).sample().requires_grad) | ||||
|         self.assertEqual(Geometric(r).sample((8,)).size(), (8,)) | ||||
| @ -1162,8 +1163,8 @@ class TestDistributions(TestCase): | ||||
|         uniform = Uniform(low_1d, high_1d) | ||||
|         above_high = torch.tensor([4.0]) | ||||
|         below_low = torch.tensor([-1.0]) | ||||
|         self.assertEqual(uniform.log_prob(above_high).item(), -float('inf'), allow_inf=True) | ||||
|         self.assertEqual(uniform.log_prob(below_low).item(), -float('inf'), allow_inf=True) | ||||
|         self.assertEqual(uniform.log_prob(above_high).item(), -inf, allow_inf=True) | ||||
|         self.assertEqual(uniform.log_prob(below_low).item(), -inf, allow_inf=True) | ||||
|  | ||||
|         # check cdf computation when value outside range | ||||
|         self.assertEqual(uniform.cdf(below_low).item(), 0) | ||||
| @ -1190,7 +1191,7 @@ class TestDistributions(TestCase): | ||||
|         loc_1d = torch.zeros(1, requires_grad=True) | ||||
|         scale_1d = torch.ones(1, requires_grad=True) | ||||
|         self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean)) | ||||
|         self.assertEqual(Cauchy(loc_1d, scale_1d).variance, float('inf'), allow_inf=True) | ||||
|         self.assertEqual(Cauchy(loc_1d, scale_1d).variance, inf, allow_inf=True) | ||||
|         self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5)) | ||||
|         self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5)) | ||||
|         self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,)) | ||||
| @ -1216,7 +1217,7 @@ class TestDistributions(TestCase): | ||||
|         scale = torch.ones(5, 5, requires_grad=True) | ||||
|         scale_1d = torch.ones(1, requires_grad=True) | ||||
|         self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean)) | ||||
|         self.assertEqual(HalfCauchy(scale_1d).variance, float('inf'), allow_inf=True) | ||||
|         self.assertEqual(HalfCauchy(scale_1d).variance, inf, allow_inf=True) | ||||
|         self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5)) | ||||
|         self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5)) | ||||
|         self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,)) | ||||
| @ -1714,8 +1715,8 @@ class TestDistributions(TestCase): | ||||
|         alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True) | ||||
|         scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) | ||||
|         alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True) | ||||
|         self.assertEqual(Pareto(scale_1d, 0.5).mean, float('inf'), allow_inf=True) | ||||
|         self.assertEqual(Pareto(scale_1d, 0.5).variance, float('inf'), allow_inf=True) | ||||
|         self.assertEqual(Pareto(scale_1d, 0.5).mean, inf, allow_inf=True) | ||||
|         self.assertEqual(Pareto(scale_1d, 0.5).variance, inf, allow_inf=True) | ||||
|         self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3)) | ||||
|         self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3)) | ||||
|         self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1)) | ||||
| @ -1832,7 +1833,7 @@ class TestDistributions(TestCase): | ||||
|         df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True) | ||||
|         self.assertTrue(is_all_nan(StudentT(1).mean)) | ||||
|         self.assertTrue(is_all_nan(StudentT(1).variance)) | ||||
|         self.assertEqual(StudentT(2).variance, float('inf'), allow_inf=True) | ||||
|         self.assertEqual(StudentT(2).variance, inf, allow_inf=True) | ||||
|         self.assertEqual(StudentT(df).sample().size(), (2, 3)) | ||||
|         self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3)) | ||||
|         self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1)) | ||||
| @ -2962,7 +2963,7 @@ class TestKL(TestCase): | ||||
|  | ||||
|     def test_kl_infinite(self): | ||||
|         for p, q in self.infinite_examples: | ||||
|             self.assertTrue((kl_divergence(p, q) == float('inf')).all(), | ||||
|             self.assertTrue((kl_divergence(p, q) == inf).all(), | ||||
|                             'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__)) | ||||
|  | ||||
|     def test_kl_edgecases(self): | ||||
| @ -2996,7 +2997,7 @@ class TestKL(TestCase): | ||||
|                     continue | ||||
|                 x = dist.sample(sample_shape=(60000,)) | ||||
|                 expected = -dist.log_prob(x).mean(0) | ||||
|                 ignore = (expected == float('inf')) | ||||
|                 ignore = (expected == inf) | ||||
|                 expected[ignore] = actual[ignore] | ||||
|                 self.assertEqual(actual, expected, prec=0.2, message='\n'.join([ | ||||
|                     '{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)), | ||||
| @ -3157,12 +3158,12 @@ class TestNumericalStability(TestCase): | ||||
|  | ||||
|     def test_categorical_log_prob_with_logits(self): | ||||
|         for dtype in ([torch.float, torch.double]): | ||||
|             p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True) | ||||
|             p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True) | ||||
|             categorical = OneHotCategorical(logits=p) | ||||
|             log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype)) | ||||
|             self.assertEqual(log_pdf_prob_1.item(), 0) | ||||
|             log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype)) | ||||
|             self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True) | ||||
|             self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True) | ||||
|  | ||||
|     def test_multinomial_log_prob(self): | ||||
|         for dtype in ([torch.float, torch.double]): | ||||
| @ -3174,12 +3175,12 @@ class TestNumericalStability(TestCase): | ||||
|  | ||||
|     def test_multinomial_log_prob_with_logits(self): | ||||
|         for dtype in ([torch.float, torch.double]): | ||||
|             p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True) | ||||
|             p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True) | ||||
|             multinomial = Multinomial(10, logits=p) | ||||
|             log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype)) | ||||
|             self.assertEqual(log_pdf_prob_1.item(), 0) | ||||
|             log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype)) | ||||
|             self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True) | ||||
|             self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True) | ||||
|  | ||||
|  | ||||
| class TestLazyLogitsInitialization(TestCase): | ||||
|  | ||||
| @ -15,6 +15,7 @@ import hashlib | ||||
| import os | ||||
|  | ||||
| import torch | ||||
| from torch._six import inf, nan | ||||
| import torch.backends.cudnn as cudnn | ||||
| import torch.nn as nn | ||||
| import torch.nn.functional as F | ||||
| @ -1465,7 +1466,7 @@ class TestNN(NNTestCase): | ||||
|  | ||||
|         def compute_norm(norm_type): | ||||
|             norm_type = float(norm_type) | ||||
|             if norm_type != float('inf'): | ||||
|             if norm_type != inf: | ||||
|                 total_norm = 0 | ||||
|                 for p in l.parameters(): | ||||
|                     total_norm += p.grad.data.abs().pow(norm_type).sum() | ||||
| @ -1560,8 +1561,6 @@ class TestNN(NNTestCase): | ||||
|     # We don't want to make propagating NaN a hard requirement on ops, but for | ||||
|     # these easy ones, we should make them do so. | ||||
|     def _test_nonlinearity_propagate_nan(self, device): | ||||
|         nan = float('nan') | ||||
|  | ||||
|         def test(nonlinearity, *args, **kwargs): | ||||
|             x = torch.tensor([nan], device=device) | ||||
|             fn = getattr(F, nonlinearity) | ||||
| @ -2547,7 +2546,7 @@ class TestNN(NNTestCase): | ||||
|             for num_dim in [1, 2, 3]: | ||||
|                 fn_name = '{}max_pool{}d'.format(adaptive, num_dim) | ||||
|                 fn = getattr(F, fn_name) | ||||
|                 x = torch.full([1, 1] + num_dim * [3], float('nan')) | ||||
|                 x = torch.full([1, 1] + num_dim * [3], nan) | ||||
|                 res = fn(x, 1 if adaptive else 3) | ||||
|                 self.assertTrue(math.isnan(res.item())) | ||||
|  | ||||
|  | ||||
| @ -3,6 +3,7 @@ import unittest | ||||
| import functools | ||||
| from copy import deepcopy | ||||
| import torch | ||||
| from torch._six import inf | ||||
| import torch.optim as optim | ||||
| import torch.legacy.optim as old_optim | ||||
| import torch.nn.functional as F | ||||
| @ -478,8 +479,8 @@ class TestOptim(TestCase): | ||||
|     @unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN") | ||||
|     def test_lbfgs_return_type(self): | ||||
|         params = [torch.randn(10, 5), torch.randn(10)] | ||||
|         opt1 = optim.LBFGS(params, 0.01, tolerance_grad=float('inf')) | ||||
|         opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-float('inf')) | ||||
|         opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf) | ||||
|         opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf) | ||||
|  | ||||
|         def closure(): | ||||
|             return torch.Tensor([10]) | ||||
|  | ||||
| @ -16,6 +16,7 @@ import gzip | ||||
| from torch._utils_internal import get_file_path, get_file_path_2 | ||||
| from torch.utils.dlpack import from_dlpack, to_dlpack | ||||
| from torch._utils import _rebuild_tensor | ||||
| from torch._six import inf, nan | ||||
| from itertools import product, combinations | ||||
| from functools import reduce | ||||
| from torch import multiprocessing as mp | ||||
| @ -241,17 +242,17 @@ class TestTorch(TestCase): | ||||
|         self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0)) | ||||
|         self.assertFalse(torch.allclose(x, y)) | ||||
|         self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8]))) | ||||
|         x = torch.tensor([2.0, 3.0, float('nan')]) | ||||
|         y = torch.tensor([2.01, 3.01, float('nan')]) | ||||
|         x = torch.tensor([2.0, 3.0, nan]) | ||||
|         y = torch.tensor([2.01, 3.01, nan]) | ||||
|         self.assertFalse(torch.allclose(x, y, rtol=1e-2)) | ||||
|         self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True)) | ||||
|         self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True)) | ||||
|         inf = torch.tensor([float('inf')]) | ||||
|         self.assertTrue(torch.allclose(inf, inf)) | ||||
|         self.assertTrue(torch.allclose(-inf, -inf)) | ||||
|         self.assertFalse(torch.allclose(inf, -inf)) | ||||
|         self.assertFalse(torch.allclose(inf, torch.tensor([1e20]))) | ||||
|         self.assertFalse(torch.allclose(-inf, torch.tensor([-1e20]))) | ||||
|         inf_t = torch.tensor([inf]) | ||||
|         self.assertTrue(torch.allclose(inf_t, inf_t)) | ||||
|         self.assertTrue(torch.allclose(-inf_t, -inf_t)) | ||||
|         self.assertFalse(torch.allclose(inf_t, -inf_t)) | ||||
|         self.assertFalse(torch.allclose(inf_t, torch.tensor([1e20]))) | ||||
|         self.assertFalse(torch.allclose(-inf_t, torch.tensor([-1e20]))) | ||||
|  | ||||
|     def test_linear_algebra_scalar_raises(self): | ||||
|         m = torch.randn(5, 5) | ||||
| @ -359,13 +360,13 @@ class TestTorch(TestCase): | ||||
|             try: | ||||
|                 return math.sinh(x) | ||||
|             except OverflowError: | ||||
|                 return float('inf') if x > 0 else float('-inf') | ||||
|                 return inf if x > 0 else -inf | ||||
|         self._test_math(torch.sinh, sinh) | ||||
|  | ||||
|     def test_lgamma(self): | ||||
|         def lgamma(x): | ||||
|             if x <= 0 and x == int(x): | ||||
|                 return float('inf') | ||||
|                 return inf | ||||
|             return math.lgamma(x) | ||||
|         self._test_math(torch.lgamma, lgamma) | ||||
|  | ||||
| @ -392,14 +393,14 @@ class TestTorch(TestCase): | ||||
|         # scipy 1.1.0 changed when it returns +/-inf vs. NaN | ||||
|         def torch_digamma_without_inf(inp): | ||||
|             res = torch.digamma(inp) | ||||
|             res[(res == float('-inf')) | (res == float('inf'))] = float('nan') | ||||
|             res[(res == -inf) | (res == inf)] = nan | ||||
|             return res | ||||
|  | ||||
|         def scipy_digamma_without_inf(inp): | ||||
|             res = digamma(inp) | ||||
|             if np.isscalar(res): | ||||
|                 return res if np.isfinite(res) else float('nan') | ||||
|             res[np.isinf(res)] = float('nan') | ||||
|                 return res if np.isfinite(res) else nan | ||||
|             res[np.isinf(res)] = nan | ||||
|             return res | ||||
|  | ||||
|         self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input()) | ||||
| @ -413,7 +414,7 @@ class TestTorch(TestCase): | ||||
|                             self._digamma_input(test_poles=False)) | ||||
|  | ||||
|     def test_asin(self): | ||||
|         self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else float('nan')) | ||||
|         self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else nan) | ||||
|  | ||||
|     def test_cos(self): | ||||
|         self._test_math_by_name('cos') | ||||
| @ -425,11 +426,11 @@ class TestTorch(TestCase): | ||||
|             except OverflowError: | ||||
|                 # Return inf on overflow. | ||||
|                 # See http://en.cppreference.com/w/cpp/numeric/math/cosh | ||||
|                 return float('inf') | ||||
|                 return inf | ||||
|         self._test_math(torch.cosh, cosh) | ||||
|  | ||||
|     def test_acos(self): | ||||
|         self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else float('nan')) | ||||
|         self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else nan) | ||||
|  | ||||
|     def test_tan(self): | ||||
|         self._test_math_by_name('tan') | ||||
| @ -443,36 +444,36 @@ class TestTorch(TestCase): | ||||
|     def test_log(self): | ||||
|         def log(x): | ||||
|             if x == 0: | ||||
|                 return float('-inf') | ||||
|                 return -inf | ||||
|             elif x < 0: | ||||
|                 return float('nan') | ||||
|                 return nan | ||||
|             return math.log(x) | ||||
|         self._test_math(torch.log, log) | ||||
|  | ||||
|     def test_log10(self): | ||||
|         def log10(x): | ||||
|             if x == 0: | ||||
|                 return float('-inf') | ||||
|                 return -inf | ||||
|             elif x < 0: | ||||
|                 return float('nan') | ||||
|                 return nan | ||||
|             return math.log10(x) | ||||
|         self._test_math(torch.log10, log10) | ||||
|  | ||||
|     def test_log1p(self): | ||||
|         def log1p(x): | ||||
|             if x == -1: | ||||
|                 return float('-inf') | ||||
|                 return -inf | ||||
|             elif x < -1: | ||||
|                 return float('nan') | ||||
|                 return nan | ||||
|             return math.log1p(x) | ||||
|         self._test_math(torch.log1p, log1p) | ||||
|  | ||||
|     def test_log2(self): | ||||
|         def log2(x): | ||||
|             if x == 0: | ||||
|                 return float('-inf') | ||||
|                 return -inf | ||||
|             elif x < 0: | ||||
|                 return float('nan') | ||||
|                 return nan | ||||
|             try: | ||||
|                 return math.log2(x) | ||||
|             except AttributeError: | ||||
| @ -480,7 +481,7 @@ class TestTorch(TestCase): | ||||
|         self._test_math(torch.log2, log2) | ||||
|  | ||||
|     def test_sqrt(self): | ||||
|         self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else float('nan')) | ||||
|         self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else nan) | ||||
|  | ||||
|     def test_erf(self): | ||||
|         self._test_math_by_name('erf') | ||||
| @ -493,9 +494,9 @@ class TestTorch(TestCase): | ||||
|             inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.) | ||||
|             self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues)) | ||||
|             # test inf | ||||
|             self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([float('-inf'), float('inf')]))) | ||||
|             self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([-inf, inf]))) | ||||
|             # test nan | ||||
|             self.assertEqual(tensor([-2, 2]).erfinv(), tensor([float('nan'), float('nan')])) | ||||
|             self.assertEqual(tensor([-2, 2]).erfinv(), tensor([nan, nan])) | ||||
|  | ||||
|         checkType(torch.FloatTensor) | ||||
|         checkType(torch.DoubleTensor) | ||||
| @ -505,7 +506,7 @@ class TestTorch(TestCase): | ||||
|             try: | ||||
|                 return math.exp(x) | ||||
|             except OverflowError: | ||||
|                 return float('inf') | ||||
|                 return inf | ||||
|         self._test_math(torch.exp, exp) | ||||
|  | ||||
|     def test_expm1(self): | ||||
| @ -513,7 +514,7 @@ class TestTorch(TestCase): | ||||
|             try: | ||||
|                 return math.expm1(x) | ||||
|             except OverflowError: | ||||
|                 return float('inf') | ||||
|                 return inf | ||||
|         self._test_math(torch.expm1, expm1) | ||||
|  | ||||
|     def test_floor(self): | ||||
| @ -525,9 +526,9 @@ class TestTorch(TestCase): | ||||
|     def test_rsqrt(self): | ||||
|         def rsqrt(x): | ||||
|             if x == 0: | ||||
|                 return float('inf') | ||||
|                 return inf | ||||
|             elif x < 0: | ||||
|                 return float('nan') | ||||
|                 return nan | ||||
|             return 1.0 / math.sqrt(x) | ||||
|  | ||||
|         self._test_math(torch.rsqrt, rsqrt) | ||||
| @ -615,7 +616,7 @@ class TestTorch(TestCase): | ||||
|         # NaNs | ||||
|         for index in (0, 4, 99): | ||||
|             m1 = torch.randn(100) | ||||
|             m1[index] = float('nan') | ||||
|             m1[index] = nan | ||||
|             res1val, res1ind = torch.max(m1, 0) | ||||
|             self.assertTrue(math.isnan(res1val)) | ||||
|             self.assertEqual(res1ind, index) | ||||
| @ -633,14 +634,14 @@ class TestTorch(TestCase): | ||||
|         # full reduction | ||||
|         x = torch.randn(5, device=device) | ||||
|         xn = x.cpu().numpy() | ||||
|         for p in [0, 1, 2, 3, 4, float('inf')]: | ||||
|         for p in [0, 1, 2, 3, 4, inf]: | ||||
|             res = x.norm(p).item() | ||||
|             expected = np.linalg.norm(xn, p) | ||||
|             self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p)) | ||||
|         # one dimension | ||||
|         x = torch.randn(5, 5, device=device) | ||||
|         xn = x.cpu().numpy() | ||||
|         for p in [0, 1, 2, 3, 4, float('inf')]: | ||||
|         for p in [0, 1, 2, 3, 4, inf]: | ||||
|             res = x.norm(p, 1).cpu().numpy() | ||||
|             expected = np.linalg.norm(xn, p, 1) | ||||
|             self.assertEqual(res.shape, expected.shape) | ||||
| @ -808,10 +809,10 @@ class TestTorch(TestCase): | ||||
|             ('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1), | ||||
|             ('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0), | ||||
|             ('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0), | ||||
|             ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), float('nan')), | ||||
|             ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), float('nan')), | ||||
|             ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), float('nan')), | ||||
|             ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), float('-inf')), | ||||
|             ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), nan), | ||||
|             ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), nan), | ||||
|             ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), nan), | ||||
|             ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), -inf), | ||||
|         ] | ||||
|  | ||||
|         devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda'] | ||||
| @ -878,8 +879,8 @@ class TestTorch(TestCase): | ||||
|     def test_logsumexp(self): | ||||
|         from scipy.special import logsumexp | ||||
|         a = torch.randn(5, 4) | ||||
|         a[0, 0] = float('inf') | ||||
|         a[1, :] = float('-inf') | ||||
|         a[0, 0] = inf | ||||
|         a[1, :] = -inf | ||||
|         actual = a.logsumexp(1) | ||||
|         expected = logsumexp(a.numpy(), 1) | ||||
|         self.assertEqual(expected.shape, actual.shape) | ||||
| @ -1540,7 +1541,7 @@ class TestTorch(TestCase): | ||||
|         self._test_cop(torch.mul, lambda x, y: x * y) | ||||
|  | ||||
|     def test_cpow(self): | ||||
|         self._test_cop(torch.pow, lambda x, y: float('nan') if x < 0 else math.pow(x, y)) | ||||
|         self._test_cop(torch.pow, lambda x, y: nan if x < 0 else math.pow(x, y)) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_NUMPY, 'Numpy not found') | ||||
|     def test_einsum(self): | ||||
| @ -2416,7 +2417,7 @@ class TestTorch(TestCase): | ||||
|         # full reduction | ||||
|         x = torch.randn(5, 5) | ||||
|         xn = x.numpy() | ||||
|         for p in [1, 2, 3, 4, float('inf')]: | ||||
|         for p in [1, 2, 3, 4, inf]: | ||||
|             res = x.renorm(p, 1, 1) | ||||
|             expected = x / x.norm(p, 0, keepdim=True).clamp(min=1) | ||||
|             self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p)) | ||||
| @ -2532,9 +2533,9 @@ class TestTorch(TestCase): | ||||
|     def test_multinomial_invalid_probs(self): | ||||
|         test_method = TestTorch._test_multinomial_invalid_probs | ||||
|         self._spawn_method(test_method, torch.Tensor([0, -1])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, float('inf')])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, float('-inf')])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, float('nan')])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, inf])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, -inf])) | ||||
|         self._spawn_method(test_method, torch.Tensor([0, nan])) | ||||
|  | ||||
|     @suppress_warnings | ||||
|     def test_range(self): | ||||
| @ -4672,15 +4673,15 @@ class TestTorch(TestCase): | ||||
|         self.assertEqual(x.nelement(), all.long().sum()) | ||||
|  | ||||
|     def test_isfinite(self): | ||||
|         x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan'), -10]) | ||||
|         x = torch.Tensor([1, inf, 2, -inf, nan, -10]) | ||||
|         self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1])) | ||||
|  | ||||
|     def test_isinf(self): | ||||
|         x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan')]) | ||||
|         x = torch.Tensor([1, inf, 2, -inf, nan]) | ||||
|         self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0])) | ||||
|  | ||||
|     def test_isnan(self): | ||||
|         x = torch.Tensor([1, float('nan'), 2]) | ||||
|         x = torch.Tensor([1, nan, 2]) | ||||
|         self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0])) | ||||
|  | ||||
|     def test_RNGState(self): | ||||
| @ -7418,7 +7419,7 @@ class TestTorch(TestCase): | ||||
|         self.assertExpected(str(x), subname='negint') | ||||
|  | ||||
|         # test inf and nan | ||||
|         x = torch.tensor([4, float('inf'), 1.5, float('-inf'), 0, float('nan'), 1]) | ||||
|         x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1]) | ||||
|         self.assertEqual(x.__repr__(), str(x)) | ||||
|         self.assertExpected(str(x), subname='nonfinite') | ||||
|  | ||||
|  | ||||
| @ -413,6 +413,7 @@ class TestFFI(TestCase): | ||||
|     @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package") | ||||
|     @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows") | ||||
|     def test_gpu(self): | ||||
|         from torch.utils.cpp_extension import CUDA_HOME | ||||
|         create_extension( | ||||
|             name='gpulib', | ||||
|             headers=[test_dir + '/ffi/src/cuda/cudalib.h'], | ||||
| @ -421,6 +422,7 @@ class TestFFI(TestCase): | ||||
|             ], | ||||
|             with_cuda=True, | ||||
|             verbose=False, | ||||
|             include_dirs=[os.path.join(CUDA_HOME, 'include')], | ||||
|         ).build() | ||||
|         import gpulib | ||||
|         tensor = torch.ones(2, 2).float() | ||||
|  | ||||
| @ -41,6 +41,9 @@ while [[ $# -gt 0 ]]; do | ||||
|       --full-caffe2) | ||||
|           FULL_CAFFE2=1 | ||||
|           ;; | ||||
|       --cuda-static-link) | ||||
|           CAFFE2_STATIC_LINK_CUDA=1 | ||||
|           ;; | ||||
|       *) | ||||
|           break | ||||
|           ;; | ||||
| @ -261,6 +264,7 @@ function build_caffe2() { | ||||
|       -DBUILD_SHARED_LIBS=ON \ | ||||
|       -DONNX_NAMESPACE=$ONNX_NAMESPACE \ | ||||
|       -DUSE_CUDA=$USE_CUDA \ | ||||
|       -DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \ | ||||
|       -DUSE_ROCM=$USE_ROCM \ | ||||
|       -DUSE_NNPACK=$USE_NNPACK \ | ||||
|       -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \ | ||||
|  | ||||
| @ -25,6 +25,13 @@ import sys | ||||
| PY2 = sys.version_info[0] == 2 | ||||
| PY3 = sys.version_info[0] == 3 | ||||
|  | ||||
| if PY2: | ||||
|     inf = float('inf') | ||||
|     nan = float('nan') | ||||
| else: | ||||
|     import math | ||||
|     inf = math.inf | ||||
|     nan = math.nan | ||||
|  | ||||
| if PY2: | ||||
|     string_classes = basestring | ||||
|  | ||||
| @ -1743,8 +1743,8 @@ scatter_(dim, index, src) -> Tensor | ||||
|  | ||||
| Writes all values from the tensor :attr:`src` into :attr:`self` at the indices | ||||
| specified in the :attr:`index` tensor. For each value in :attr:`src`, its output | ||||
| index is specified by its index in :attr:`src` for dimension != :attr:`dim` and | ||||
| by the corresponding value in :attr:`index` for dimension = :attr:`dim`. | ||||
| index is specified by its index in :attr:`src` for ``dimension != dim`` and by | ||||
| the corresponding value in :attr:`index` for ``dimension = dim``. | ||||
|  | ||||
| For a 3-D tensor, :attr:`self` is updated as:: | ||||
|  | ||||
| @ -1754,14 +1754,14 @@ For a 3-D tensor, :attr:`self` is updated as:: | ||||
|  | ||||
| This is the reverse operation of the manner described in :meth:`~Tensor.gather`. | ||||
|  | ||||
| :attr:`self`, :attr:`index` and :attr:`src` should have same number of | ||||
| dimensions. It is also required that `index.size(d) <= src.size(d)` for all | ||||
| dimensions `d`, and that `index.size(d) <= self.size(d)` for all dimensions | ||||
| `d != dim`. | ||||
| :attr:`self`, :attr:`index` and :attr:`src` (if it is a Tensor) should have same | ||||
| number of dimensions. It is also required that ``index.size(d) <= src.size(d)`` | ||||
| for all dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all | ||||
| dimensions ``d != dim``. | ||||
|  | ||||
| Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be | ||||
| between `0` and `(self.size(dim) -1)` inclusive, and all values in a row along | ||||
| the specified dimension :attr:`dim` must be unique. | ||||
| between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row | ||||
| along the specified dimension :attr:`dim` must be unique. | ||||
|  | ||||
| Args: | ||||
|     dim (int): the axis along which to index | ||||
| @ -1785,6 +1785,50 @@ Example:: | ||||
|             [ 0.0000,  0.0000,  0.0000,  1.2300]]) | ||||
| """) | ||||
|  | ||||
| add_docstr_all('scatter_add_', | ||||
|                r""" | ||||
| scatter_add_(dim, index, other) -> Tensor | ||||
|  | ||||
| Adds all values from the tensor :attr:`other` into :attr:`self` at the indices | ||||
| specified in the :attr:`index` tensor in a similar fashion as | ||||
| :meth:`~torch.Tensor.scatter_`. For each value in :attr:`other`, it is added to | ||||
| an index in :attr:`self` which is specified by its index in :attr:`other` | ||||
| for ``dimension != dim`` and by the corresponding value in :attr:`index` for | ||||
| ``dimension = dim``. | ||||
|  | ||||
| For a 3-D tensor, :attr:`self` is updated as:: | ||||
|  | ||||
|     self[index[i][j][k]][j][k] += other[i][j][k]  # if dim == 0 | ||||
|     self[i][index[i][j][k]][k] += other[i][j][k]  # if dim == 1 | ||||
|     self[i][j][index[i][j][k]] += other[i][j][k]  # if dim == 2 | ||||
|  | ||||
| :attr:`self`, :attr:`index` and :attr:`other` should have same number of | ||||
| dimensions. It is also required that ``index.size(d) <= other.size(d)`` for all | ||||
| dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all dimensions | ||||
| ``d != dim``. | ||||
|  | ||||
| Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be | ||||
| between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row along | ||||
| the specified dimension :attr:`dim` must be unique. | ||||
|  | ||||
| Args: | ||||
|     dim (int): the axis along which to index | ||||
|     index (LongTensor): the indices of elements to scatter and add | ||||
|     other (Tensor): the source elements to scatter and add | ||||
|  | ||||
| Example:: | ||||
|  | ||||
|     >>> x = torch.rand(2, 5) | ||||
|     >>> x | ||||
|     tensor([[0.7404, 0.0427, 0.6480, 0.3806, 0.8328], | ||||
|             [0.7953, 0.2009, 0.9154, 0.6782, 0.9620]]) | ||||
|     >>> torch.ones(3, 5).scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x) | ||||
|     tensor([[1.7404, 1.2009, 1.9154, 1.3806, 1.8328], | ||||
|             [1.0000, 1.0427, 1.0000, 1.6782, 1.0000], | ||||
|             [1.7953, 1.0000, 1.6480, 1.0000, 1.9620]]) | ||||
|  | ||||
| """) | ||||
|  | ||||
| add_docstr_all('select', | ||||
|                r""" | ||||
| select(dim, index) -> Tensor | ||||
|  | ||||
| @ -2,6 +2,7 @@ import math | ||||
| import torch | ||||
| from functools import reduce | ||||
| from sys import float_info | ||||
| from torch._six import inf, nan | ||||
|  | ||||
|  | ||||
| class __PrinterOptions(object): | ||||
| @ -50,7 +51,7 @@ def set_printoptions( | ||||
|             PRINT_OPTS.linewidth = 80 | ||||
|         elif profile == "full": | ||||
|             PRINT_OPTS.precision = 4 | ||||
|             PRINT_OPTS.threshold = float('inf') | ||||
|             PRINT_OPTS.threshold = inf | ||||
|             PRINT_OPTS.edgeitems = 3 | ||||
|             PRINT_OPTS.linewidth = 80 | ||||
|  | ||||
| @ -101,8 +102,8 @@ class _Formatter(object): | ||||
|  | ||||
|             else: | ||||
|                 copy_abs = copy.abs() | ||||
|                 pos_inf_mask = copy_abs.eq(float('inf')) | ||||
|                 neg_inf_mask = copy_abs.eq(float('-inf')) | ||||
|                 pos_inf_mask = copy_abs.eq(inf) | ||||
|                 neg_inf_mask = copy_abs.eq(-inf) | ||||
|                 nan_mask = copy_abs.ne(copy) | ||||
|                 invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask | ||||
|                 if invalid_value_mask.all(): | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| import torch | ||||
| from torch._six import nan | ||||
| from torch.distributions import constraints | ||||
| from torch.distributions.distribution import Distribution | ||||
| from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all | ||||
| @ -72,11 +73,11 @@ class Categorical(Distribution): | ||||
|  | ||||
|     @property | ||||
|     def mean(self): | ||||
|         return self.probs.new_tensor(float('nan')).expand(self._extended_shape()) | ||||
|         return self.probs.new_tensor(nan).expand(self._extended_shape()) | ||||
|  | ||||
|     @property | ||||
|     def variance(self): | ||||
|         return self.probs.new_tensor(float('nan')).expand(self._extended_shape()) | ||||
|         return self.probs.new_tensor(nan).expand(self._extended_shape()) | ||||
|  | ||||
|     def sample(self, sample_shape=torch.Size()): | ||||
|         sample_shape = self._extended_shape(sample_shape) | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| import math | ||||
| from torch._six import inf, nan | ||||
| from numbers import Number | ||||
|  | ||||
| import torch | ||||
| @ -37,11 +38,11 @@ class Cauchy(Distribution): | ||||
|  | ||||
|     @property | ||||
|     def mean(self): | ||||
|         return self.loc.new_tensor(float('nan')).expand(self._extended_shape()) | ||||
|         return self.loc.new_tensor(nan).expand(self._extended_shape()) | ||||
|  | ||||
|     @property | ||||
|     def variance(self): | ||||
|         return self.loc.new_tensor(float('inf')).expand(self._extended_shape()) | ||||
|         return self.loc.new_tensor(inf).expand(self._extended_shape()) | ||||
|  | ||||
|     def rsample(self, sample_shape=torch.Size()): | ||||
|         shape = self._extended_shape(sample_shape) | ||||
|  | ||||
| @ -1,6 +1,7 @@ | ||||
| from numbers import Number | ||||
| import torch | ||||
| import math | ||||
| from torch._six import nan | ||||
| from torch.distributions import constraints | ||||
| from torch.distributions.distribution import Distribution | ||||
| from torch.distributions.gamma import Gamma | ||||
| @ -39,13 +40,13 @@ class FisherSnedecor(Distribution): | ||||
|     @property | ||||
|     def mean(self): | ||||
|         df2 = self.df2.clone() | ||||
|         df2[df2 <= 2] = float('nan') | ||||
|         df2[df2 <= 2] = nan | ||||
|         return df2 / (df2 - 2) | ||||
|  | ||||
|     @property | ||||
|     def variance(self): | ||||
|         df2 = self.df2.clone() | ||||
|         df2[df2 <= 4] = float('nan') | ||||
|         df2[df2 <= 4] = nan | ||||
|         return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4)) | ||||
|  | ||||
|     def rsample(self, sample_shape=torch.Size(())): | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| import math | ||||
|  | ||||
| from torch._six import inf | ||||
| from torch.distributions import constraints | ||||
| from torch.distributions.transforms import AbsTransform | ||||
| from torch.distributions.cauchy import Cauchy | ||||
| @ -44,7 +45,7 @@ class HalfCauchy(TransformedDistribution): | ||||
|  | ||||
|     def log_prob(self, value): | ||||
|         log_prob = self.base_dist.log_prob(value) + math.log(2) | ||||
|         log_prob[value.expand(log_prob.shape) < 0] = -float('inf') | ||||
|         log_prob[value.expand(log_prob.shape) < 0] = -inf | ||||
|         return log_prob | ||||
|  | ||||
|     def cdf(self, value): | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| import math | ||||
|  | ||||
| from torch._six import inf | ||||
| from torch.distributions import constraints | ||||
| from torch.distributions.transforms import AbsTransform | ||||
| from torch.distributions.normal import Normal | ||||
| @ -44,7 +45,7 @@ class HalfNormal(TransformedDistribution): | ||||
|  | ||||
|     def log_prob(self, value): | ||||
|         log_prob = self.base_dist.log_prob(value) + math.log(2) | ||||
|         log_prob[value.expand(log_prob.shape) < 0] = -float('inf') | ||||
|         log_prob[value.expand(log_prob.shape) < 0] = -inf | ||||
|         return log_prob | ||||
|  | ||||
|     def cdf(self, value): | ||||
|  | ||||
| @ -3,6 +3,7 @@ import warnings | ||||
| from functools import total_ordering | ||||
|  | ||||
| import torch | ||||
| from torch._six import inf | ||||
|  | ||||
| from .bernoulli import Bernoulli | ||||
| from .beta import Beta | ||||
| @ -113,7 +114,7 @@ def _infinite_like(tensor): | ||||
|     """ | ||||
|     Helper function for obtaining infinite KL Divergence throughout | ||||
|     """ | ||||
|     return tensor.new_tensor(float('inf')).expand_as(tensor) | ||||
|     return tensor.new_tensor(inf).expand_as(tensor) | ||||
|  | ||||
|  | ||||
| def _x_log_x(tensor): | ||||
| @ -173,10 +174,10 @@ _euler_gamma = 0.57721566490153286060 | ||||
| @register_kl(Bernoulli, Bernoulli) | ||||
| def _kl_bernoulli_bernoulli(p, q): | ||||
|     t1 = p.probs * (p.probs / q.probs).log() | ||||
|     t1[q.probs == 0] = float('inf') | ||||
|     t1[q.probs == 0] = inf | ||||
|     t1[p.probs == 0] = 0 | ||||
|     t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log() | ||||
|     t2[q.probs == 1] = float('inf') | ||||
|     t2[q.probs == 1] = inf | ||||
|     t2[p.probs == 1] = 0 | ||||
|     return t1 + t2 | ||||
|  | ||||
| @ -208,7 +209,7 @@ def _kl_binomial_binomial(p, q): | ||||
| @register_kl(Categorical, Categorical) | ||||
| def _kl_categorical_categorical(p, q): | ||||
|     t = p.probs * (p.logits - q.logits) | ||||
|     t[q.probs == 0] = float('inf') | ||||
|     t[q.probs == 0] = inf | ||||
|     t[p.probs == 0] = 0 | ||||
|     return t.sum(-1) | ||||
|  | ||||
| @ -322,7 +323,7 @@ def _kl_pareto_pareto(p, q): | ||||
|     t1 = q.alpha * scale_ratio.log() | ||||
|     t2 = -alpha_ratio.log() | ||||
|     result = t1 + t2 + alpha_ratio - 1 | ||||
|     result[p.support.lower_bound < q.support.lower_bound] = float('inf') | ||||
|     result[p.support.lower_bound < q.support.lower_bound] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -346,7 +347,7 @@ def _kl_transformed_transformed(p, q): | ||||
| @register_kl(Uniform, Uniform) | ||||
| def _kl_uniform_uniform(p, q): | ||||
|     result = ((q.high - q.low) / (p.high - p.low)).log() | ||||
|     result[(q.low > p.low) | (q.high < p.high)] = float('inf') | ||||
|     result[(q.low > p.low) | (q.high < p.high)] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -392,7 +393,7 @@ def _kl_beta_normal(p, q): | ||||
| @register_kl(Beta, Uniform) | ||||
| def _kl_beta_uniform(p, q): | ||||
|     result = -p.entropy() + (q.high - q.low).log() | ||||
|     result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = float('inf') | ||||
|     result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -543,7 +544,7 @@ def _kl_pareto_exponential(p, q): | ||||
|     t2 = p.alpha.reciprocal() | ||||
|     t3 = p.alpha * scale_rate_prod / (p.alpha - 1) | ||||
|     result = t1 - t2 + t3 - 1 | ||||
|     result[p.alpha <= 1] = float('inf') | ||||
|     result[p.alpha <= 1] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -555,7 +556,7 @@ def _kl_pareto_gamma(p, q): | ||||
|     t3 = (1 - q.concentration) * common_term | ||||
|     t4 = q.rate * p.alpha * p.scale / (p.alpha - 1) | ||||
|     result = t1 + t2 + t3 + t4 - 1 | ||||
|     result[p.alpha <= 1] = float('inf') | ||||
|     result[p.alpha <= 1] = inf | ||||
|     return result | ||||
|  | ||||
| # TODO: Add Pareto-Laplace KL Divergence | ||||
| @ -570,7 +571,7 @@ def _kl_pareto_normal(p, q): | ||||
|     t3 = p.alpha * common_term.pow(2) / (p.alpha - 2) | ||||
|     t4 = (p.alpha * common_term - q.loc).pow(2) | ||||
|     result = t1 - t2 + (t3 + t4) / var_normal - 1 | ||||
|     result[p.alpha <= 2] = float('inf') | ||||
|     result[p.alpha <= 2] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -588,14 +589,14 @@ def _kl_uniform_beta(p, q): | ||||
|     t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term | ||||
|     t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma() | ||||
|     result = t3 + t4 - t1 - t2 | ||||
|     result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = float('inf') | ||||
|     result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @register_kl(Uniform, Exponential) | ||||
| def _kl_uniform_exponetial(p, q): | ||||
|     result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log() | ||||
|     result[p.low < q.support.lower_bound] = float('inf') | ||||
|     result[p.low < q.support.lower_bound] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -607,7 +608,7 @@ def _kl_uniform_gamma(p, q): | ||||
|     t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term | ||||
|     t4 = q.rate * (p.high + p.low) / 2 | ||||
|     result = -t1 + t2 + t3 + t4 | ||||
|     result[p.low < q.support.lower_bound] = float('inf') | ||||
|     result[p.low < q.support.lower_bound] = inf | ||||
|     return result | ||||
|  | ||||
|  | ||||
| @ -638,5 +639,5 @@ def _kl_uniform_pareto(p, q): | ||||
|     t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log() | ||||
|     t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform | ||||
|     result = t2 * (q.alpha + 1) - t1 | ||||
|     result[p.low < q.support.lower_bound] = float('inf') | ||||
|     result[p.low < q.support.lower_bound] = inf | ||||
|     return result | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| import torch | ||||
| from torch._six import inf | ||||
| from torch.distributions.distribution import Distribution | ||||
| from torch.distributions import Categorical | ||||
| from numbers import Number | ||||
| @ -93,6 +94,6 @@ class Multinomial(Distribution): | ||||
|         logits, value = broadcast_all(self.logits.clone(), value) | ||||
|         log_factorial_n = torch.lgamma(value.sum(-1) + 1) | ||||
|         log_factorial_xs = torch.lgamma(value + 1).sum(-1) | ||||
|         logits[(value == 0) & (logits == -float('inf'))] = 0 | ||||
|         logits[(value == 0) & (logits == -inf)] = 0 | ||||
|         log_powers = (logits * value).sum(-1) | ||||
|         return log_factorial_n - log_factorial_xs + log_powers | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| from numbers import Number | ||||
| import torch | ||||
| from torch._six import inf, nan | ||||
| import math | ||||
| from torch.distributions import constraints | ||||
| from torch.distributions.distribution import Distribution | ||||
| @ -27,15 +28,15 @@ class StudentT(Distribution): | ||||
|     @property | ||||
|     def mean(self): | ||||
|         m = self.loc.clone() | ||||
|         m[self.df <= 1] = float('nan') | ||||
|         m[self.df <= 1] = nan | ||||
|         return m | ||||
|  | ||||
|     @property | ||||
|     def variance(self): | ||||
|         m = self.df.clone() | ||||
|         m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2) | ||||
|         m[(self.df <= 2) & (self.df > 1)] = float('inf') | ||||
|         m[self.df <= 1] = float('nan') | ||||
|         m[(self.df <= 2) & (self.df > 1)] = inf | ||||
|         m[self.df <= 1] = nan | ||||
|         return m | ||||
|  | ||||
|     def __init__(self, df, loc=0., scale=1., validate_args=None): | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| import torch | ||||
| import torch.nn.functional as F | ||||
| from torch._six import inf | ||||
| from operator import mul | ||||
| from functools import reduce | ||||
| import math | ||||
| @ -155,7 +156,7 @@ def isfinite(tensor): | ||||
|     """ | ||||
|     if not isinstance(tensor, torch.Tensor): | ||||
|         raise ValueError("The argument is not a tensor", str(tensor)) | ||||
|     return (tensor == tensor) & (tensor.abs() != float('inf')) | ||||
|     return (tensor == tensor) & (tensor.abs() != inf) | ||||
|  | ||||
|  | ||||
| def isinf(tensor): | ||||
| @ -174,7 +175,7 @@ def isinf(tensor): | ||||
|     """ | ||||
|     if not isinstance(tensor, torch.Tensor): | ||||
|         raise ValueError("The argument is not a tensor", str(tensor)) | ||||
|     return tensor.abs() == float('inf') | ||||
|     return tensor.abs() == inf | ||||
|  | ||||
|  | ||||
| def stft(input, n_fft, hop_length=None, win_length=None, window=None, | ||||
|  | ||||
| @ -1,4 +1,5 @@ | ||||
| import torch | ||||
| from torch._six import inf | ||||
| from .Module import Module | ||||
| from .utils import clear | ||||
|  | ||||
| @ -34,7 +35,7 @@ class Normalize(Module): | ||||
|         self._output.resize_as_(input) | ||||
|  | ||||
|         # specialization for the infinity norm | ||||
|         if self.p == float('inf'): | ||||
|         if self.p == inf: | ||||
|             if not self._indices: | ||||
|                 self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \ | ||||
|                     else torch.LongTensor() | ||||
| @ -72,7 +73,7 @@ class Normalize(Module): | ||||
|             self.cross = input.new() | ||||
|         # compute diagonal term with gradOutput | ||||
|         self._gradInput.resize_(n, d) | ||||
|         if self.p == float('inf'): | ||||
|         if self.p == inf: | ||||
|                 # specialization for the inf case | ||||
|             torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput) | ||||
|             self.buffer.resize_as_(input).zero_() | ||||
| @ -113,7 +114,7 @@ class Normalize(Module): | ||||
|         self._gradInput.add_(-1, self.buffer) | ||||
|  | ||||
|         # reuse cross buffer for normalization | ||||
|         if self.p == float('inf'): | ||||
|         if self.p == inf: | ||||
|             torch.mul(self.norm, self.norm, out=self.cross) | ||||
|         else: | ||||
|             torch.mul(self.normp, self.norm, out=self.cross) | ||||
|  | ||||
| @ -1,10 +1,11 @@ | ||||
| import math | ||||
|  | ||||
| INFINITY = float('inf') | ||||
| NAN = float('nan') | ||||
|  | ||||
|  | ||||
| def sqrt_nothrow(x): | ||||
|     return math.sqrt(x) if x >= 0 else float('nan') | ||||
|     return math.sqrt(x) if x >= 0 else NAN | ||||
|  | ||||
|  | ||||
| def cg(opfunc, x, config, state=None): | ||||
| @ -145,7 +146,7 @@ def cg(opfunc, x, config, state=None): | ||||
|             A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3) | ||||
|             B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2) | ||||
|             _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3)) | ||||
|             z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan') | ||||
|             z2 = -d2 * z3 * z3 / _denom if _denom != 0 else NAN | ||||
|  | ||||
|             if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0: | ||||
|                 if limit < -0.5: | ||||
|  | ||||
| @ -523,7 +523,7 @@ class BCEWithLogitsLoss(_Loss): | ||||
|     :math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision. | ||||
|  | ||||
|     For example, if a dataset contains 100 positive and 300 negative examples of a single class, | ||||
|     then `pos_weight` for the class should be equal to math:`\frac{300}{100}=3`. | ||||
|     then `pos_weight` for the class should be equal to :math:`\frac{300}{100}=3`. | ||||
|     The loss would act as if the dataset contains math:`3\times 100=300` positive examples. | ||||
|  | ||||
|     Args: | ||||
|  | ||||
| @ -691,7 +691,7 @@ class _LPPoolNd(Module): | ||||
|         self.ceil_mode = ceil_mode | ||||
|  | ||||
|     def extra_repr(self): | ||||
|         return 'norm_type={norm_type}, kernel_size{kernel_size}, stride={stride}, ' \ | ||||
|         return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \ | ||||
|             'ceil_mode={ceil_mode}'.format(**self.__dict__) | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| import warnings | ||||
| import torch | ||||
| from torch._six import inf | ||||
|  | ||||
|  | ||||
| def clip_grad_norm_(parameters, max_norm, norm_type=2): | ||||
| @ -23,7 +24,7 @@ def clip_grad_norm_(parameters, max_norm, norm_type=2): | ||||
|     parameters = list(filter(lambda p: p.grad is not None, parameters)) | ||||
|     max_norm = float(max_norm) | ||||
|     norm_type = float(norm_type) | ||||
|     if norm_type == float('inf'): | ||||
|     if norm_type == inf: | ||||
|         total_norm = max(p.grad.data.abs().max() for p in parameters) | ||||
|     else: | ||||
|         total_norm = 0 | ||||
|  | ||||
| @ -1,4 +1,6 @@ | ||||
| import math | ||||
| import torch | ||||
| from torch._six import inf | ||||
| from bisect import bisect_right | ||||
| from functools import partial | ||||
| from .optimizer import Optimizer | ||||
| @ -367,9 +369,9 @@ class ReduceLROnPlateau(object): | ||||
|             raise ValueError('threshold mode ' + threshold_mode + ' is unknown!') | ||||
|  | ||||
|         if mode == 'min': | ||||
|             self.mode_worse = float('inf') | ||||
|             self.mode_worse = inf | ||||
|         else:  # mode == 'max': | ||||
|             self.mode_worse = (-float('inf')) | ||||
|             self.mode_worse = -inf | ||||
|  | ||||
|         self.is_better = partial(self._cmp, mode, threshold_mode, threshold) | ||||
|  | ||||
|  | ||||
| @ -65,6 +65,10 @@ CUDA_HOME = _find_cuda_home() | ||||
| BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+') | ||||
|  | ||||
|  | ||||
| def is_binary_build(): | ||||
|     return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__) | ||||
|  | ||||
|  | ||||
| def check_compiler_abi_compatibility(compiler): | ||||
|     ''' | ||||
|     Verifies that the given compiler is ABI-compatible with PyTorch. | ||||
| @ -77,7 +81,7 @@ def check_compiler_abi_compatibility(compiler): | ||||
|         False if the compiler is (likely) ABI-incompatible with PyTorch, | ||||
|         else True. | ||||
|     ''' | ||||
|     if BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__): | ||||
|     if not is_binary_build(): | ||||
|         return True | ||||
|     try: | ||||
|         check_cmd = '{}' if sys.platform == 'win32' else '{} --version' | ||||
| @ -134,6 +138,7 @@ class BuildExtension(build_ext): | ||||
|         self._check_abi() | ||||
|         for extension in self.extensions: | ||||
|             self._define_torch_extension_name(extension) | ||||
|             self._add_gnu_abi_flag_if_binary(extension) | ||||
|  | ||||
|         # Register .cu and .cuh as valid source extensions. | ||||
|         self.compiler.src_extensions += ['.cu', '.cuh'] | ||||
| @ -266,6 +271,21 @@ class BuildExtension(build_ext): | ||||
|         else: | ||||
|             extension.extra_compile_args.append(define) | ||||
|  | ||||
|     def _add_gnu_abi_flag_if_binary(self, extension): | ||||
|         # If the version string looks like a binary build, | ||||
|         # we know that PyTorch was compiled with gcc 4.9.2. | ||||
|         # if the extension is compiled with gcc >= 5.1, | ||||
|         # then we have to define _GLIBCXX_USE_CXX11_ABI=0 | ||||
|         # so that the std::string in the API is resolved to | ||||
|         # non-C++11 symbols | ||||
|         define = '-D_GLIBCXX_USE_CXX11_ABI=0' | ||||
|         if is_binary_build(): | ||||
|             if isinstance(extension.extra_compile_args, dict): | ||||
|                 for args in extension.extra_compile_args.values(): | ||||
|                     args.append(define) | ||||
|             else: | ||||
|                 extension.extra_compile_args.append(define) | ||||
|  | ||||
|  | ||||
| def CppExtension(name, sources, *args, **kwargs): | ||||
|     ''' | ||||
| @ -785,6 +805,9 @@ def _write_ninja_file(path, | ||||
|     common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)] | ||||
|     common_cflags += ['-I{}'.format(include) for include in includes] | ||||
|  | ||||
|     if is_binary_build(): | ||||
|         common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0'] | ||||
|  | ||||
|     cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags | ||||
|     if sys.platform == 'win32': | ||||
|         from distutils.spawn import _nt_quote_args | ||||
|  | ||||
		Reference in New Issue
	
	Block a user
	