mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-11-04 16:04:58 +08:00 
			
		
		
		
	Compare commits
	
		
			17 Commits
		
	
	
		
			mlazos/use
			...
			v0.4.1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| a24163a95e | |||
| f08f222db3 | |||
| 8f916179f8 | |||
| 7b7e6dbfa7 | |||
| 84b8c1c357 | |||
| b595c3e9ca | |||
| 6ecc275272 | |||
| f34528a723 | |||
| 2edf053549 | |||
| 76c16a5a64 | |||
| f6fac92692 | |||
| bb60c97805 | |||
| 886a367247 | |||
| 416c8ef1d1 | |||
| 2fbbe42a30 | |||
| f07e550b08 | |||
| 3684cc4e52 | 
@ -151,10 +151,6 @@ endif()
 | 
			
		||||
# ---[ CMake scripts + modules
 | 
			
		||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
 | 
			
		||||
 | 
			
		||||
if (MSVC AND ${BUILD_SHARED_LIBS})
 | 
			
		||||
  set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
 | 
			
		||||
endif()
 | 
			
		||||
 | 
			
		||||
# ---[ CMake build directories
 | 
			
		||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 | 
			
		||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 | 
			
		||||
 | 
			
		||||
@ -13,7 +13,7 @@ else()
 | 
			
		||||
  cmake_dependent_option(
 | 
			
		||||
      USE_CUDNN "Use cuDNN" ON
 | 
			
		||||
      "USE_CUDA" OFF)
 | 
			
		||||
  option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
 | 
			
		||||
  option(ATEN_NO_TEST "Do not build ATen test binaries" ON)
 | 
			
		||||
 | 
			
		||||
  # Flag for shared dependencies
 | 
			
		||||
  set(BUILD_ATEN ON)
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
#include <ATen/optional.h>
 | 
			
		||||
#include <ATen/Backtrace.h>
 | 
			
		||||
 | 
			
		||||
#include <functional>
 | 
			
		||||
#include <memory>
 | 
			
		||||
 | 
			
		||||
@ -4,9 +4,11 @@
 | 
			
		||||
#include <string>
 | 
			
		||||
#include <typeinfo>
 | 
			
		||||
 | 
			
		||||
#include <ATen/ATenGeneral.h>
 | 
			
		||||
 | 
			
		||||
namespace at {
 | 
			
		||||
/// Utility to demangle a C++ symbol name.
 | 
			
		||||
std::string demangle(const char* name);
 | 
			
		||||
AT_API std::string demangle(const char* name);
 | 
			
		||||
 | 
			
		||||
/// Returns the printable name of the type.
 | 
			
		||||
template <typename T>
 | 
			
		||||
@ -19,7 +21,7 @@ inline const char* demangle_type() {
 | 
			
		||||
#endif // __GXX_RTTI
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
std::string get_backtrace(
 | 
			
		||||
AT_API std::string get_backtrace(
 | 
			
		||||
    size_t frames_to_skip = 0,
 | 
			
		||||
    size_t maximum_number_of_frames = 64,
 | 
			
		||||
    bool skip_python_frames = true);
 | 
			
		||||
 | 
			
		||||
@ -250,6 +250,7 @@ IF(USE_CUDA AND NOT USE_ROCM)
 | 
			
		||||
  ENDIF(USE_MAGMA)
 | 
			
		||||
  IF ($ENV{ATEN_STATIC_CUDA})
 | 
			
		||||
    list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a")
 | 
			
		||||
    list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a")
 | 
			
		||||
  ENDIF($ENV{ATEN_STATIC_CUDA})
 | 
			
		||||
ENDIF()
 | 
			
		||||
 | 
			
		||||
@ -405,11 +406,11 @@ ENDFOREACH()
 | 
			
		||||
INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
 | 
			
		||||
  DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen)
 | 
			
		||||
 | 
			
		||||
if(ATEN_NO_TEST)
 | 
			
		||||
  message("disable test because ATEN_NO_TEST is set")
 | 
			
		||||
else()
 | 
			
		||||
  add_subdirectory(test)
 | 
			
		||||
endif()
 | 
			
		||||
# if(ATEN_NO_TEST)
 | 
			
		||||
#   message("disable test because ATEN_NO_TEST is set")
 | 
			
		||||
# else()
 | 
			
		||||
#   add_subdirectory(test)
 | 
			
		||||
# endif()
 | 
			
		||||
 | 
			
		||||
if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
 | 
			
		||||
  foreach(test_src ${ATen_CPU_TEST_SRCS})
 | 
			
		||||
 | 
			
		||||
@ -3,6 +3,8 @@
 | 
			
		||||
#include <cstdint>
 | 
			
		||||
#include <utility>
 | 
			
		||||
 | 
			
		||||
#include <ATen/ATenGeneral.h>
 | 
			
		||||
 | 
			
		||||
/*
 | 
			
		||||
* A CUDA stream interface with no CUDA build dependency.
 | 
			
		||||
* 
 | 
			
		||||
@ -25,27 +27,27 @@ namespace detail {
 | 
			
		||||
 | 
			
		||||
// Pointer-based API (for internal use)
 | 
			
		||||
// Note: ATen/Context is preferred to work with streams safely
 | 
			
		||||
CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device);
 | 
			
		||||
CUDAStreamInternals* CUDAStream_getDefaultStream();
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device);
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_getDefaultStream();
 | 
			
		||||
 | 
			
		||||
CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority);
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority);
 | 
			
		||||
 | 
			
		||||
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device);
 | 
			
		||||
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream();
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device);
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream();
 | 
			
		||||
 | 
			
		||||
// Note: these Unsafe gets should NEVER be used and are only here for legacy
 | 
			
		||||
// purposes. Once those uses are gone they should be removed.
 | 
			
		||||
CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device);
 | 
			
		||||
CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe();
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device);
 | 
			
		||||
AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe();
 | 
			
		||||
 | 
			
		||||
void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals);
 | 
			
		||||
void CUDAStream_setStream(CUDAStreamInternals* internals);
 | 
			
		||||
AT_API void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals);
 | 
			
		||||
AT_API void CUDAStream_setStream(CUDAStreamInternals* internals);
 | 
			
		||||
 | 
			
		||||
cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
 | 
			
		||||
int64_t CUDAStream_device(CUDAStreamInternals*);
 | 
			
		||||
AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
 | 
			
		||||
AT_API int64_t CUDAStream_device(CUDAStreamInternals*);
 | 
			
		||||
 | 
			
		||||
bool CUDAStream_retain(CUDAStreamInternals*);
 | 
			
		||||
void CUDAStream_free(CUDAStreamInternals*&);
 | 
			
		||||
AT_API bool CUDAStream_retain(CUDAStreamInternals*);
 | 
			
		||||
AT_API void CUDAStream_free(CUDAStreamInternals*&);
 | 
			
		||||
 | 
			
		||||
} // namespace detail
 | 
			
		||||
 | 
			
		||||
@ -64,10 +66,10 @@ struct CUDAStream {
 | 
			
		||||
  ~CUDAStream() { detail::CUDAStream_free(internals_); }
 | 
			
		||||
 | 
			
		||||
  // Copy constructor
 | 
			
		||||
  CUDAStream(const CUDAStream& other);
 | 
			
		||||
  AT_API CUDAStream(const CUDAStream& other);
 | 
			
		||||
 | 
			
		||||
  // Move constructor
 | 
			
		||||
  CUDAStream(CUDAStream&& other);  
 | 
			
		||||
  AT_API CUDAStream(CUDAStream&& other);
 | 
			
		||||
 | 
			
		||||
  // Assignment operator
 | 
			
		||||
  CUDAStream& operator=(CUDAStream other) {
 | 
			
		||||
 | 
			
		||||
@ -111,8 +111,8 @@ struct Device {
 | 
			
		||||
};
 | 
			
		||||
} // namespace at
 | 
			
		||||
 | 
			
		||||
std::ostream& operator<<(std::ostream& stream, at::Device::Type type);
 | 
			
		||||
std::ostream& operator<<(std::ostream& stream, const at::Device& device);
 | 
			
		||||
AT_API std::ostream& operator<<(std::ostream& stream, at::Device::Type type);
 | 
			
		||||
AT_API std::ostream& operator<<(std::ostream& stream, const at::Device& device);
 | 
			
		||||
 | 
			
		||||
namespace std {
 | 
			
		||||
  template<> struct hash<at::Device>
 | 
			
		||||
 | 
			
		||||
@ -43,7 +43,7 @@
 | 
			
		||||
      AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__)        \
 | 
			
		||||
      AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__)       \
 | 
			
		||||
      default:                                                                \
 | 
			
		||||
        AT_ERROR("%s not implemented for '%s'", (NAME), the_type.toString()); \
 | 
			
		||||
        AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
 | 
			
		||||
    }                                                                         \
 | 
			
		||||
  }()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -35,8 +35,8 @@ namespace at {
 | 
			
		||||
 | 
			
		||||
namespace detail {
 | 
			
		||||
 | 
			
		||||
float halfbits2float(unsigned short bits);
 | 
			
		||||
unsigned short float2halfbits(float value);
 | 
			
		||||
AT_API float halfbits2float(unsigned short bits);
 | 
			
		||||
AT_API unsigned short float2halfbits(float value);
 | 
			
		||||
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -33,6 +33,8 @@
 | 
			
		||||
#include <type_traits>
 | 
			
		||||
#include <utility>
 | 
			
		||||
 | 
			
		||||
#include <ATen/ATenGeneral.h>
 | 
			
		||||
 | 
			
		||||
#if __GNUG__ && __GNUC__ < 5
 | 
			
		||||
#define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T)
 | 
			
		||||
#else
 | 
			
		||||
@ -57,7 +59,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) {
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/// This is all the non-templated stuff common to all SmallVectors.
 | 
			
		||||
class SmallVectorBase {
 | 
			
		||||
class AT_API SmallVectorBase {
 | 
			
		||||
protected:
 | 
			
		||||
  void *BeginX, *EndX, *CapacityX;
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -5,7 +5,7 @@
 | 
			
		||||
#include "ATen/Error.h"
 | 
			
		||||
 | 
			
		||||
namespace at {
 | 
			
		||||
struct SparseTensorImpl : public TensorImpl {
 | 
			
		||||
struct AT_API SparseTensorImpl : public TensorImpl {
 | 
			
		||||
  // Stored in COO format, indices + values.
 | 
			
		||||
 | 
			
		||||
  // Ideal INVARIANTS:
 | 
			
		||||
 | 
			
		||||
@ -19,7 +19,7 @@ namespace at {
 | 
			
		||||
/// `torch::TensorOptions` subclass of this `TensorOptions`, which changes
 | 
			
		||||
/// `type()` to return a variable type instead of a tensor type, such that
 | 
			
		||||
/// variables are created inside factory methods, instead of tensors.
 | 
			
		||||
struct TensorOptions {
 | 
			
		||||
struct AT_API TensorOptions {
 | 
			
		||||
  TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {}
 | 
			
		||||
 | 
			
		||||
  /// Constructs the `TensorOptions` with defaults taken from the thread local
 | 
			
		||||
 | 
			
		||||
@ -143,7 +143,7 @@ static inline ${return_type} ${api_name}(${formals}) {
 | 
			
		||||
""")
 | 
			
		||||
# add a native declaration for a native function
 | 
			
		||||
NATIVE_DECLARATION = CodeTemplate("""\
 | 
			
		||||
${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
 | 
			
		||||
AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
# special method definition for factory functions in Functions.h
 | 
			
		||||
 | 
			
		||||
@ -35,11 +35,14 @@
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
# if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
 | 
			
		||||
#  define TH_API TH_EXTERNC __declspec(dllexport)
 | 
			
		||||
#  define TH_CPP_API extern __declspec(dllexport)
 | 
			
		||||
# else
 | 
			
		||||
#  define TH_API TH_EXTERNC __declspec(dllimport)
 | 
			
		||||
#  define TH_CPP_API extern __declspec(dllimport)
 | 
			
		||||
# endif
 | 
			
		||||
#else
 | 
			
		||||
# define TH_API TH_EXTERNC
 | 
			
		||||
# define TH_CPP_API extern
 | 
			
		||||
#endif
 | 
			
		||||
 | 
			
		||||
#ifdef _WIN32
 | 
			
		||||
 | 
			
		||||
@ -69,18 +69,18 @@ TH_API THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t si
 | 
			
		||||
TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size,
 | 
			
		||||
                                             at::Allocator *allocator);
 | 
			
		||||
 | 
			
		||||
ptrdiff_t THStorage_size(const THStorage *self);
 | 
			
		||||
size_t THStorage_elementSize();
 | 
			
		||||
THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags);
 | 
			
		||||
void THStorage_setFlag(THStorage *storage, const char flag);
 | 
			
		||||
void THStorage_clearFlag(THStorage *storage, const char flag);
 | 
			
		||||
void THStorage_retain(THStorage *storage);
 | 
			
		||||
THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type,
 | 
			
		||||
                                             at::DataPtr&& data, ptrdiff_t size,
 | 
			
		||||
                                             at::Allocator* allocator);
 | 
			
		||||
void THStorage_resize(THStorage *storage, ptrdiff_t size);
 | 
			
		||||
void THStorage_swap(THStorage *storage1, THStorage *storage2);
 | 
			
		||||
TH_API ptrdiff_t THStorage_size(const THStorage *self);
 | 
			
		||||
TH_API size_t THStorage_elementSize();
 | 
			
		||||
TH_API THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags);
 | 
			
		||||
TH_API void THStorage_setFlag(THStorage *storage, const char flag);
 | 
			
		||||
TH_API void THStorage_clearFlag(THStorage *storage, const char flag);
 | 
			
		||||
TH_API void THStorage_retain(THStorage *storage);
 | 
			
		||||
TH_API THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type,
 | 
			
		||||
                                                    at::DataPtr&& data, ptrdiff_t size,
 | 
			
		||||
                                                    at::Allocator* allocator);
 | 
			
		||||
TH_API void THStorage_resize(THStorage *storage, ptrdiff_t size);
 | 
			
		||||
TH_API void THStorage_swap(THStorage *storage1, THStorage *storage2);
 | 
			
		||||
 | 
			
		||||
void THStorage_weakRetain(THStorage *weak_storage);
 | 
			
		||||
void THStorage_weakFree(THStorage *weak_storage);
 | 
			
		||||
THStorage* THStorage_weakLock(THStorage *weak_storage);
 | 
			
		||||
TH_API void THStorage_weakRetain(THStorage *weak_storage);
 | 
			
		||||
TH_API void THStorage_weakFree(THStorage *weak_storage);
 | 
			
		||||
TH_API THStorage* THStorage_weakLock(THStorage *weak_storage);
 | 
			
		||||
 | 
			
		||||
@ -83,5 +83,5 @@ struct THTensor
 | 
			
		||||
#include "THGenerateAllTypes.h"
 | 
			
		||||
 | 
			
		||||
TH_API void THTensor_free(THTensor *self);
 | 
			
		||||
at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride,
 | 
			
		||||
                                                           at::IntList newshape);
 | 
			
		||||
TH_CPP_API at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride,
 | 
			
		||||
                                                                      at::IntList newshape);
 | 
			
		||||
 | 
			
		||||
@ -6,9 +6,9 @@ static inline void THNN_(Col2Im_shapeCheck)(
 | 
			
		||||
                         THCState *state,
 | 
			
		||||
                         THCTensor *input,
 | 
			
		||||
                         THCTensor *gradOutput,
 | 
			
		||||
                         int outputHeight, int outputWidth,
 | 
			
		||||
                         int kH, int kW, int dH, int dW,
 | 
			
		||||
                         int padH, int padW, int sH, int sW) {
 | 
			
		||||
                         int64_t outputHeight, int64_t outputWidth,
 | 
			
		||||
                         int64_t kH, int64_t kW, int64_t dH, int64_t dW,
 | 
			
		||||
                         int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THArgCheck(kW > 0 && kH > 0, 6,
 | 
			
		||||
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
 | 
			
		||||
@ -17,7 +17,7 @@ static inline void THNN_(Col2Im_shapeCheck)(
 | 
			
		||||
  THArgCheck(dW > 0 && dH > 0, 8,
 | 
			
		||||
             "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW);
 | 
			
		||||
 | 
			
		||||
  int ndim = THCTensor_(nDimension)(state, input);
 | 
			
		||||
  int64_t ndim = THCTensor_(nDimension)(state, input);
 | 
			
		||||
  THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
 | 
			
		||||
                  "Expected non-empty 2D or 3D input tensor, but got input of shape %s");
 | 
			
		||||
 | 
			
		||||
@ -54,11 +54,11 @@ void THNN_(Col2Im_updateOutput)(
 | 
			
		||||
           THCState *state,
 | 
			
		||||
           THCTensor *input,
 | 
			
		||||
           THCTensor *output,
 | 
			
		||||
           int outputHeight, int outputWidth,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t outputHeight, int64_t outputWidth,
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THCUNN_assertSameGPU(state, 2, input, output);
 | 
			
		||||
 | 
			
		||||
@ -84,10 +84,10 @@ void THNN_(Col2Im_updateOutput)(
 | 
			
		||||
  THCTensor *input_n = THCTensor_(new)(state);
 | 
			
		||||
  THCTensor *output_n = THCTensor_(new)(state);
 | 
			
		||||
 | 
			
		||||
  int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
 | 
			
		||||
  for (int elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
  for (int64_t elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
    THCTensor_(select)(state, input_n, input, 0, elt);
 | 
			
		||||
    THCTensor_(select)(state, output_n, output, 0, elt);
 | 
			
		||||
 | 
			
		||||
@ -116,10 +116,10 @@ void THNN_(Col2Im_updateGradInput)(
 | 
			
		||||
           THCState *state,
 | 
			
		||||
           THCTensor *gradOutput,
 | 
			
		||||
           THCTensor *gradInput,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
 | 
			
		||||
                             kH, kW, dH, dW, padH, padW, sH, sW);
 | 
			
		||||
 | 
			
		||||
@ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)(
 | 
			
		||||
                         THCState *state,
 | 
			
		||||
                         THCTensor *input,
 | 
			
		||||
                         THCTensor *gradOutput,
 | 
			
		||||
                         int kH, int kW, int dH, int dW,
 | 
			
		||||
                         int padH, int padW, int sH, int sW) {
 | 
			
		||||
                         int64_t kH, int64_t kW, int64_t dH, int64_t dW,
 | 
			
		||||
                         int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THArgCheck(kW > 0 && kH > 0, 4,
 | 
			
		||||
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
 | 
			
		||||
@ -18,7 +18,7 @@ static inline void THNN_(Im2Col_shapeCheck)(
 | 
			
		||||
  THArgCheck(sW > 0 && sH > 0, 10,
 | 
			
		||||
             "stride should be greater than zero, but got sH: %d sW: %d", sH, sW);
 | 
			
		||||
 | 
			
		||||
  int ndim = THCTensor_(nDimension)(state, input);
 | 
			
		||||
  int64_t ndim = THCTensor_(nDimension)(state, input);
 | 
			
		||||
  THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
 | 
			
		||||
                "Expected non-empty 3D or 4D input tensor, but got input of shape %s");
 | 
			
		||||
 | 
			
		||||
@ -26,11 +26,11 @@ static inline void THNN_(Im2Col_shapeCheck)(
 | 
			
		||||
  if (ndim == 3) {
 | 
			
		||||
    dim_batch = -1;
 | 
			
		||||
  }
 | 
			
		||||
  int nInputPlane  = THCTensor_(size)(state, input, dim_batch + 1);
 | 
			
		||||
  int inputHeight  = THCTensor_(size)(state, input, dim_batch + 2);
 | 
			
		||||
  int inputWidth   = THCTensor_(size)(state, input, dim_batch + 3);
 | 
			
		||||
  int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int64_t nInputPlane  = THCTensor_(size)(state, input, dim_batch + 1);
 | 
			
		||||
  int64_t inputHeight  = THCTensor_(size)(state, input, dim_batch + 2);
 | 
			
		||||
  int64_t inputWidth   = THCTensor_(size)(state, input, dim_batch + 3);
 | 
			
		||||
  int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
 | 
			
		||||
  if (outputHeight < 1 || outputWidth < 1) {
 | 
			
		||||
    THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), "
 | 
			
		||||
@ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
           THCState *state,
 | 
			
		||||
           THCTensor *input,
 | 
			
		||||
           THCTensor *output,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THCUNN_assertSameGPU(state, 2, input, output);
 | 
			
		||||
 | 
			
		||||
@ -62,15 +62,15 @@ void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
    THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int batchSize    = THCTensor_(size)(state, input, 0);
 | 
			
		||||
  int nInputPlane  = THCTensor_(size)(state, input, 1);
 | 
			
		||||
  int inputHeight  = THCTensor_(size)(state, input, 2);
 | 
			
		||||
  int inputWidth   = THCTensor_(size)(state, input, 3);
 | 
			
		||||
  int64_t batchSize    = THCTensor_(size)(state, input, 0);
 | 
			
		||||
  int64_t nInputPlane  = THCTensor_(size)(state, input, 1);
 | 
			
		||||
  int64_t inputHeight  = THCTensor_(size)(state, input, 2);
 | 
			
		||||
  int64_t inputWidth   = THCTensor_(size)(state, input, 3);
 | 
			
		||||
 | 
			
		||||
  int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int nOutputPlane = nInputPlane * kW * kH;
 | 
			
		||||
  int outputLength = outputHeight * outputWidth;
 | 
			
		||||
  int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int64_t nOutputPlane = nInputPlane * kW * kH;
 | 
			
		||||
  int64_t outputLength = outputHeight * outputWidth;
 | 
			
		||||
 | 
			
		||||
  THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength);
 | 
			
		||||
  THCTensor_(zero)(state, output);
 | 
			
		||||
@ -78,7 +78,7 @@ void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
  THCTensor *input_n = THCTensor_(new)(state);
 | 
			
		||||
  THCTensor *output_n = THCTensor_(new)(state);
 | 
			
		||||
 | 
			
		||||
  for (int elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
  for (int64_t elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
    THCTensor_(select)(state, input_n, input, 0, elt);
 | 
			
		||||
    THCTensor_(select)(state, output_n, output, 0, elt);
 | 
			
		||||
 | 
			
		||||
@ -104,11 +104,11 @@ void THNN_(Im2Col_updateGradInput)(
 | 
			
		||||
           THCState *state,
 | 
			
		||||
           THCTensor *gradOutput,
 | 
			
		||||
           THCTensor *gradInput,
 | 
			
		||||
           int inputHeight, int inputWidth,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t inputHeight, int64_t inputWidth,
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput,
 | 
			
		||||
                             inputHeight, inputWidth,
 | 
			
		||||
 | 
			
		||||
@ -183,39 +183,39 @@ THC_API void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
                  THCState *state,
 | 
			
		||||
                  THCTensor *input,
 | 
			
		||||
                  THCTensor *output,
 | 
			
		||||
                  int kH, int kW,
 | 
			
		||||
                  int dH, int dW,
 | 
			
		||||
                  int padH, int padW,
 | 
			
		||||
                  int sH, int sW);
 | 
			
		||||
                  int64_t kH, int64_t kW,
 | 
			
		||||
                  int64_t dH, int64_t dW,
 | 
			
		||||
                  int64_t padH, int64_t padW,
 | 
			
		||||
                  int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
THC_API void THNN_(Im2Col_updateGradInput)(
 | 
			
		||||
                  THCState *state,
 | 
			
		||||
                  THCTensor *gradOutput,
 | 
			
		||||
                  THCTensor *gradInput,
 | 
			
		||||
                  int inputHeight, int inputWidth,
 | 
			
		||||
                  int kH, int kW,
 | 
			
		||||
                  int dH, int dW,
 | 
			
		||||
                  int padH, int padW,
 | 
			
		||||
                  int sH, int sW);
 | 
			
		||||
                  int64_t inputHeight, int64_t inputWidth,
 | 
			
		||||
                  int64_t kH, int64_t kW,
 | 
			
		||||
                  int64_t dH, int64_t dW,
 | 
			
		||||
                  int64_t padH, int64_t padW,
 | 
			
		||||
                  int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
THC_API void THNN_(Col2Im_updateOutput)(
 | 
			
		||||
                  THCState *state,
 | 
			
		||||
                  THCTensor *input,
 | 
			
		||||
                  THCTensor *output,
 | 
			
		||||
                  int outputHeight, int outputWidth,
 | 
			
		||||
                  int kH, int kW,
 | 
			
		||||
                  int dH, int dW,
 | 
			
		||||
                  int padH, int padW,
 | 
			
		||||
                  int sH, int sW);
 | 
			
		||||
                  int64_t outputHeight, int64_t outputWidth,
 | 
			
		||||
                  int64_t kH, int64_t kW,
 | 
			
		||||
                  int64_t dH, int64_t dW,
 | 
			
		||||
                  int64_t padH, int64_t padW,
 | 
			
		||||
                  int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
 THC_API void THNN_(Col2Im_updateGradInput)(
 | 
			
		||||
                  THCState *state,
 | 
			
		||||
                  THCTensor *gradOutput,
 | 
			
		||||
                  THCTensor *gradInput,
 | 
			
		||||
                  int kH, int kW,
 | 
			
		||||
                  int dH, int dW,
 | 
			
		||||
                  int padH, int padW,
 | 
			
		||||
                  int sH, int sW);
 | 
			
		||||
                  int64_t kH, int64_t kW,
 | 
			
		||||
                  int64_t dH, int64_t dW,
 | 
			
		||||
                  int64_t padH, int64_t padW,
 | 
			
		||||
                  int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
THC_API void THNN_(LeakyReLU_updateOutput)(
 | 
			
		||||
                  THCState *state,
 | 
			
		||||
 | 
			
		||||
@ -8,28 +8,28 @@
 | 
			
		||||
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu)
 | 
			
		||||
template <typename Dtype>
 | 
			
		||||
__launch_bounds__(CUDA_NUM_THREADS)
 | 
			
		||||
__global__ void im2col_kernel(const int n, const Dtype* data_im,
 | 
			
		||||
                              const int height, const int width,
 | 
			
		||||
                              const int ksize_h, const int ksize_w,
 | 
			
		||||
                              const int pad_h, const int pad_w,
 | 
			
		||||
                              const int stride_h, const int stride_w,
 | 
			
		||||
                              const int dilation_h, const int dilation_w,
 | 
			
		||||
                              const int height_col, const int width_col,
 | 
			
		||||
__global__ void im2col_kernel(const int64_t n, const Dtype* data_im,
 | 
			
		||||
                              const int64_t height, const int64_t width,
 | 
			
		||||
                              const int64_t ksize_h, const int64_t ksize_w,
 | 
			
		||||
                              const int64_t pad_h, const int64_t pad_w,
 | 
			
		||||
                              const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
                              const int64_t dilation_h, const int64_t dilation_w,
 | 
			
		||||
                              const int64_t height_col, const int64_t width_col,
 | 
			
		||||
    Dtype* data_col) {
 | 
			
		||||
  CUDA_KERNEL_LOOP(index, n) {
 | 
			
		||||
    int w_out = index % width_col;
 | 
			
		||||
    int64_t w_out = index % width_col;
 | 
			
		||||
    index /= width_col;
 | 
			
		||||
    int h_out = index % height_col;
 | 
			
		||||
    int channel_in = index / height_col;
 | 
			
		||||
    int channel_out = channel_in * ksize_h * ksize_w;
 | 
			
		||||
    int h_in = h_out * stride_h - pad_h;
 | 
			
		||||
    int w_in = w_out * stride_w - pad_w;
 | 
			
		||||
    int64_t h_out = index % height_col;
 | 
			
		||||
    int64_t channel_in = index / height_col;
 | 
			
		||||
    int64_t channel_out = channel_in * ksize_h * ksize_w;
 | 
			
		||||
    int64_t h_in = h_out * stride_h - pad_h;
 | 
			
		||||
    int64_t w_in = w_out * stride_w - pad_w;
 | 
			
		||||
    data_col += (channel_out * height_col + h_out) * width_col + w_out;
 | 
			
		||||
    data_im += (channel_in * height + h_in) * width + w_in;
 | 
			
		||||
    for (int i = 0; i < ksize_h; ++i) {
 | 
			
		||||
      for (int j = 0; j < ksize_w; ++j) {
 | 
			
		||||
        int h = h_in + i * dilation_h;
 | 
			
		||||
        int w = w_in + j * dilation_w;
 | 
			
		||||
    for (int64_t i = 0; i < ksize_h; ++i) {
 | 
			
		||||
      for (int64_t j = 0; j < ksize_w; ++j) {
 | 
			
		||||
        int64_t h = h_in + i * dilation_h;
 | 
			
		||||
        int64_t w = w_in + j * dilation_w;
 | 
			
		||||
        *data_col = (h >= 0 && w >= 0 && h < height && w < width) ?
 | 
			
		||||
          data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0);
 | 
			
		||||
        data_col += height_col * width_col;
 | 
			
		||||
@ -39,15 +39,15 @@ __global__ void im2col_kernel(const int n, const Dtype* data_im,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename Dtype>
 | 
			
		||||
void im2col(cudaStream_t stream, const Dtype* data_im, const int channels,
 | 
			
		||||
            const int height, const int width,
 | 
			
		||||
            const int height_col, const int width_col,
 | 
			
		||||
            const int ksize_h, const int ksize_w, const int pad_h,
 | 
			
		||||
            const int pad_w, const int stride_h, const int stride_w,
 | 
			
		||||
            const int dilation_h, const int dilation_w, Dtype* data_col) {
 | 
			
		||||
void im2col(cudaStream_t stream, const Dtype* data_im, const int64_t channels,
 | 
			
		||||
            const int64_t height, const int64_t width,
 | 
			
		||||
            const int64_t height_col, const int64_t width_col,
 | 
			
		||||
            const int64_t ksize_h, const int64_t ksize_w, const int64_t pad_h,
 | 
			
		||||
            const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
            const int64_t dilation_h, const int64_t dilation_w, Dtype* data_col) {
 | 
			
		||||
  // We are going to launch channels * height_col * width_col kernels, each
 | 
			
		||||
  // kernel responsible for copying a single-channel grid.
 | 
			
		||||
  int num_kernels = channels * height_col * width_col;
 | 
			
		||||
  int64_t num_kernels = channels * height_col * width_col;
 | 
			
		||||
  // Launch
 | 
			
		||||
  im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> (
 | 
			
		||||
      num_kernels, data_im, height, width, ksize_h, ksize_w,
 | 
			
		||||
@ -60,37 +60,37 @@ void im2col(cudaStream_t stream, const Dtype* data_im, const int channels,
 | 
			
		||||
 | 
			
		||||
template <typename Dtype, typename Acctype>
 | 
			
		||||
__launch_bounds__(CUDA_NUM_THREADS)
 | 
			
		||||
__global__ void col2im_kernel(const int n, const Dtype* data_col,
 | 
			
		||||
                                  const int height, const int width, const int channels,
 | 
			
		||||
                                  const int kernel_h, const int kernel_w,
 | 
			
		||||
                                  const int pad_h, const int pad_w,
 | 
			
		||||
                                  const int stride_h, const int stride_w,
 | 
			
		||||
                                  const int dilation_h, const int dilation_w,
 | 
			
		||||
                                  const int height_col, const int width_col,
 | 
			
		||||
__global__ void col2im_kernel(const int64_t n, const Dtype* data_col,
 | 
			
		||||
                                  const int64_t height, const int64_t width, const int64_t channels,
 | 
			
		||||
                                  const int64_t kernel_h, const int64_t kernel_w,
 | 
			
		||||
                                  const int64_t pad_h, const int64_t pad_w,
 | 
			
		||||
                                  const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
                                  const int64_t dilation_h, const int64_t dilation_w,
 | 
			
		||||
                                  const int64_t height_col, const int64_t width_col,
 | 
			
		||||
                                  Dtype* data_im) {
 | 
			
		||||
  CUDA_KERNEL_LOOP(index, n) {
 | 
			
		||||
    Acctype val = Acctype(0);
 | 
			
		||||
    const int w_im = index % width + pad_w;
 | 
			
		||||
    const int h_im = (index / width) % height + pad_h;
 | 
			
		||||
    const int c_im = index / (width * height);
 | 
			
		||||
    int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
 | 
			
		||||
    int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
 | 
			
		||||
    const int64_t w_im = index % width + pad_w;
 | 
			
		||||
    const int64_t h_im = (index / width) % height + pad_h;
 | 
			
		||||
    const int64_t c_im = index / (width * height);
 | 
			
		||||
    int64_t kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
 | 
			
		||||
    int64_t kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
 | 
			
		||||
    // compute the start and end of the output
 | 
			
		||||
    const int w_col_start =
 | 
			
		||||
    const int64_t w_col_start =
 | 
			
		||||
      (w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
 | 
			
		||||
    const int w_col_end = min(w_im / stride_w + 1, width_col);
 | 
			
		||||
    const int h_col_start =
 | 
			
		||||
    const int64_t w_col_end = min(w_im / stride_w + 1, width_col);
 | 
			
		||||
    const int64_t h_col_start =
 | 
			
		||||
      (h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
 | 
			
		||||
    const int h_col_end = min(h_im / stride_h + 1, height_col);
 | 
			
		||||
    const int64_t h_col_end = min(h_im / stride_h + 1, height_col);
 | 
			
		||||
    // TODO: use LCM of stride and dilation to avoid unnecessary loops
 | 
			
		||||
    for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
 | 
			
		||||
      for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
 | 
			
		||||
        int h_k = (h_im - h_col * stride_h);
 | 
			
		||||
        int w_k = (w_im - w_col * stride_w);
 | 
			
		||||
    for (int64_t h_col = h_col_start; h_col < h_col_end; h_col += 1) {
 | 
			
		||||
      for (int64_t w_col = w_col_start; w_col < w_col_end; w_col += 1) {
 | 
			
		||||
        int64_t h_k = (h_im - h_col * stride_h);
 | 
			
		||||
        int64_t w_k = (w_im - w_col * stride_w);
 | 
			
		||||
        if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
 | 
			
		||||
          h_k /= dilation_h;
 | 
			
		||||
          w_k /= dilation_w;
 | 
			
		||||
          int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
 | 
			
		||||
          int64_t data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
 | 
			
		||||
                                height_col + h_col) * width_col + w_col;
 | 
			
		||||
          val += data_col[data_col_index];
 | 
			
		||||
        }
 | 
			
		||||
@ -101,21 +101,21 @@ __global__ void col2im_kernel(const int n, const Dtype* data_col,
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
template <typename Dtype, typename Acctype>
 | 
			
		||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int channels,
 | 
			
		||||
            const int height, const int width,
 | 
			
		||||
            const int output_height, const int output_width,
 | 
			
		||||
            const int patch_h, const int patch_w, const int pad_h,
 | 
			
		||||
            const int pad_w, const int stride_h, const int stride_w,
 | 
			
		||||
            const int dilation_h, const int dilation_w, Dtype* data_im);
 | 
			
		||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels,
 | 
			
		||||
            const int64_t height, const int64_t width,
 | 
			
		||||
            const int64_t output_height, const int64_t output_width,
 | 
			
		||||
            const int64_t patch_h, const int64_t patch_w, const int64_t pad_h,
 | 
			
		||||
            const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
            const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im);
 | 
			
		||||
 | 
			
		||||
template <typename Dtype, typename Acctype>
 | 
			
		||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int channels,
 | 
			
		||||
            const int height, const int width,
 | 
			
		||||
            const int output_height, const int output_width,
 | 
			
		||||
            const int patch_h, const int patch_w, const int pad_h,
 | 
			
		||||
            const int pad_w, const int stride_h, const int stride_w,
 | 
			
		||||
            const int dilation_h, const int dilation_w, Dtype* data_im) {
 | 
			
		||||
  int num_kernels = channels * height * width;
 | 
			
		||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels,
 | 
			
		||||
            const int64_t height, const int64_t width,
 | 
			
		||||
            const int64_t output_height, const int64_t output_width,
 | 
			
		||||
            const int64_t patch_h, const int64_t patch_w, const int64_t pad_h,
 | 
			
		||||
            const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
            const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im) {
 | 
			
		||||
  int64_t num_kernels = channels * height * width;
 | 
			
		||||
  // To avoid involving atomic operations, we will launch one kernel per
 | 
			
		||||
  // bottom dimension, and then in the kernel add up the top dimensions.
 | 
			
		||||
  col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> (
 | 
			
		||||
 | 
			
		||||
@ -54,25 +54,25 @@
 | 
			
		||||
//
 | 
			
		||||
// ALSO do vol2col
 | 
			
		||||
 | 
			
		||||
static void THNN_(im2col)(const real* data_im, const int channels,
 | 
			
		||||
      const int height, const int width,
 | 
			
		||||
      const int output_height, const int output_width,
 | 
			
		||||
      const int kernel_h, const int kernel_w,
 | 
			
		||||
      const int pad_h, const int pad_w,
 | 
			
		||||
      const int stride_h, const int stride_w,
 | 
			
		||||
      const int dilation_h, const int dilation_w,
 | 
			
		||||
static void THNN_(im2col)(const real* data_im, const int64_t channels,
 | 
			
		||||
      const int64_t height, const int64_t width,
 | 
			
		||||
      const int64_t output_height, const int64_t output_width,
 | 
			
		||||
      const int64_t kernel_h, const int64_t kernel_w,
 | 
			
		||||
      const int64_t pad_h, const int64_t pad_w,
 | 
			
		||||
      const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
      const int64_t dilation_h, const int64_t dilation_w,
 | 
			
		||||
      real* data_col) {
 | 
			
		||||
  const int height_col = output_height;
 | 
			
		||||
  const int width_col = output_width;
 | 
			
		||||
  const int channels_col = channels * kernel_h * kernel_w;
 | 
			
		||||
  for (int c_col = 0; c_col < channels_col; ++c_col) {
 | 
			
		||||
    int w_offset = c_col % kernel_w;
 | 
			
		||||
    int h_offset = (c_col / kernel_w) % kernel_h;
 | 
			
		||||
    int c_im = c_col / kernel_h / kernel_w;
 | 
			
		||||
    for (int h_col = 0; h_col < height_col; ++h_col) {
 | 
			
		||||
      int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
 | 
			
		||||
      for (int w_col = 0; w_col < width_col; ++w_col) {
 | 
			
		||||
        int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
 | 
			
		||||
  const int64_t height_col = output_height;
 | 
			
		||||
  const int64_t width_col = output_width;
 | 
			
		||||
  const int64_t channels_col = channels * kernel_h * kernel_w;
 | 
			
		||||
  for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
 | 
			
		||||
    int64_t w_offset = c_col % kernel_w;
 | 
			
		||||
    int64_t h_offset = (c_col / kernel_w) % kernel_h;
 | 
			
		||||
    int64_t c_im = c_col / kernel_h / kernel_w;
 | 
			
		||||
    for (int64_t h_col = 0; h_col < height_col; ++h_col) {
 | 
			
		||||
      int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
 | 
			
		||||
      for (int64_t w_col = 0; w_col < width_col; ++w_col) {
 | 
			
		||||
        int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
 | 
			
		||||
        data_col[(c_col * height_col + h_col) * width_col + w_col] =
 | 
			
		||||
          (h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
 | 
			
		||||
          data_im[(c_im * height + h_im) * width + w_im] : 0;
 | 
			
		||||
@ -81,26 +81,26 @@ static void THNN_(im2col)(const real* data_im, const int channels,
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
static void THNN_(col2im)(const real* data_col, const int channels,
 | 
			
		||||
      const int height, const int width,
 | 
			
		||||
      const int output_height, const int output_width,
 | 
			
		||||
      const int kernel_h, const int kernel_w,
 | 
			
		||||
      const int pad_h, const int pad_w,
 | 
			
		||||
      const int stride_h, const int stride_w,
 | 
			
		||||
      const int dilation_h, const int dilation_w,
 | 
			
		||||
static void THNN_(col2im)(const real* data_col, const int64_t channels,
 | 
			
		||||
      const int64_t height, const int64_t width,
 | 
			
		||||
      const int64_t output_height, const int64_t output_width,
 | 
			
		||||
      const int64_t kernel_h, const int64_t kernel_w,
 | 
			
		||||
      const int64_t pad_h, const int64_t pad_w,
 | 
			
		||||
      const int64_t stride_h, const int64_t stride_w,
 | 
			
		||||
      const int64_t dilation_h, const int64_t dilation_w,
 | 
			
		||||
      real* data_im) {
 | 
			
		||||
  memset(data_im, 0, sizeof(real) * height * width * channels);
 | 
			
		||||
  const int height_col = output_height;
 | 
			
		||||
  const int width_col = output_width;
 | 
			
		||||
  const int channels_col = channels * kernel_h * kernel_w;
 | 
			
		||||
  for (int c_col = 0; c_col < channels_col; ++c_col) {
 | 
			
		||||
    int w_offset = c_col % kernel_w;
 | 
			
		||||
    int h_offset = (c_col / kernel_w) % kernel_h;
 | 
			
		||||
    int c_im = c_col / kernel_h / kernel_w;
 | 
			
		||||
    for (int h_col = 0; h_col < height_col; ++h_col) {
 | 
			
		||||
      int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
 | 
			
		||||
      for (int w_col = 0; w_col < width_col; ++w_col) {
 | 
			
		||||
        int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
 | 
			
		||||
  const int64_t height_col = output_height;
 | 
			
		||||
  const int64_t width_col = output_width;
 | 
			
		||||
  const int64_t channels_col = channels * kernel_h * kernel_w;
 | 
			
		||||
  for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
 | 
			
		||||
    int64_t w_offset = c_col % kernel_w;
 | 
			
		||||
    int64_t h_offset = (c_col / kernel_w) % kernel_h;
 | 
			
		||||
    int64_t c_im = c_col / kernel_h / kernel_w;
 | 
			
		||||
    for (int64_t h_col = 0; h_col < height_col; ++h_col) {
 | 
			
		||||
      int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
 | 
			
		||||
      for (int64_t w_col = 0; w_col < width_col; ++w_col) {
 | 
			
		||||
        int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
 | 
			
		||||
        if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
 | 
			
		||||
          data_im[(c_im * height + h_im) * width + w_im] +=
 | 
			
		||||
            data_col[(c_col * height_col + h_col) * width_col + w_col];
 | 
			
		||||
@ -113,9 +113,9 @@ static inline void THNN_(Col2Im_shapeCheck)(
 | 
			
		||||
                         THNNState *state,
 | 
			
		||||
                         THTensor *input,
 | 
			
		||||
                         THTensor *gradOutput,
 | 
			
		||||
                         int outputHeight, int outputWidth,
 | 
			
		||||
                         int kH, int kW, int dH, int dW,
 | 
			
		||||
                         int padH, int padW, int sH, int sW) {
 | 
			
		||||
                         int64_t outputHeight, int64_t outputWidth,
 | 
			
		||||
                         int64_t kH, int64_t kW, int64_t dH, int64_t dW,
 | 
			
		||||
                         int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THArgCheck(kW > 0 && kH > 0, 6,
 | 
			
		||||
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
 | 
			
		||||
@ -124,11 +124,11 @@ static inline void THNN_(Col2Im_shapeCheck)(
 | 
			
		||||
  THArgCheck(dW > 0 && dH > 0, 8,
 | 
			
		||||
             "dilation should be greater than zero, but got dH: %d dW: %d", dH, dW);
 | 
			
		||||
 | 
			
		||||
  int ndim = THTensor_(nDimension)(input);
 | 
			
		||||
  int64_t ndim = THTensor_(nDimension)(input);
 | 
			
		||||
  THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
 | 
			
		||||
                "Expected non-empty 2D or 3D input tensor, but got input of shape %s");
 | 
			
		||||
 | 
			
		||||
  int batch_dim = (ndim == 3) ? 0 : -1;
 | 
			
		||||
  int64_t batch_dim = (ndim == 3) ? 0 : -1;
 | 
			
		||||
  int64_t nInputPlane  = input->size[batch_dim + 1];
 | 
			
		||||
 | 
			
		||||
  if (nInputPlane % (kW * kH) != 0) {
 | 
			
		||||
@ -161,11 +161,11 @@ void THNN_(Col2Im_updateOutput)(
 | 
			
		||||
           THNNState *state,
 | 
			
		||||
           THTensor *input,
 | 
			
		||||
           THTensor *output,
 | 
			
		||||
           int outputHeight, int outputWidth,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t outputHeight, int64_t outputWidth,
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth,
 | 
			
		||||
                           kH, kW, dH, dW, padH, padW, sH, sW);
 | 
			
		||||
@ -189,10 +189,10 @@ void THNN_(Col2Im_updateOutput)(
 | 
			
		||||
  THTensor *input_n = THTensor_(new)();
 | 
			
		||||
  THTensor *output_n = THTensor_(new)();
 | 
			
		||||
 | 
			
		||||
  int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
 | 
			
		||||
  for (int elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
  for (int64_t elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
    THTensor_(select)(input_n, input, 0, elt);
 | 
			
		||||
    THTensor_(select)(output_n, output, 0, elt);
 | 
			
		||||
 | 
			
		||||
@ -220,10 +220,10 @@ void THNN_(Col2Im_updateGradInput)(
 | 
			
		||||
           THNNState *state,
 | 
			
		||||
           THTensor *gradOutput,
 | 
			
		||||
           THTensor *gradInput,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
 | 
			
		||||
                             kH, kW, dH, dW, padH, padW, sH, sW);
 | 
			
		||||
 | 
			
		||||
@ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)(
 | 
			
		||||
                         THNNState *state,
 | 
			
		||||
                         THTensor *input,
 | 
			
		||||
                         THTensor *gradOutput,
 | 
			
		||||
                         int kH, int kW, int dH, int dW,
 | 
			
		||||
                         int padH, int padW, int sH, int sW) {
 | 
			
		||||
                         int64_t kH, int64_t kW, int64_t dH, int64_t dW,
 | 
			
		||||
                         int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THArgCheck(kW > 0 && kH > 0, 4,
 | 
			
		||||
             "kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
 | 
			
		||||
@ -16,21 +16,21 @@ static inline void THNN_(Im2Col_shapeCheck)(
 | 
			
		||||
  THArgCheck(sW > 0 && sH > 0, 10,
 | 
			
		||||
             "stride should be greater than zero, but got sH: %d sW: %d", sH, sW);
 | 
			
		||||
 | 
			
		||||
  int ndim = THTensor_(nDimension)(input);
 | 
			
		||||
  int64_t ndim = THTensor_(nDimension)(input);
 | 
			
		||||
  THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
 | 
			
		||||
                "Expected non-empty 3D or 4D input tensor, but got input of shape %s");
 | 
			
		||||
 | 
			
		||||
  int dim_batch = 0;
 | 
			
		||||
  int64_t dim_batch = 0;
 | 
			
		||||
  if (ndim == 3) {
 | 
			
		||||
    dim_batch = -1;
 | 
			
		||||
  }
 | 
			
		||||
  int nInputPlane  = THTensor_(size)(input, dim_batch + 1);
 | 
			
		||||
  int inputHeight  = THTensor_(size)(input, dim_batch + 2);
 | 
			
		||||
  int inputWidth   = THTensor_(size)(input, dim_batch + 3);
 | 
			
		||||
  int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int nOutputPlane = nInputPlane * kW * kH;
 | 
			
		||||
  int outputLength = outputHeight * outputWidth;
 | 
			
		||||
  int64_t nInputPlane  = THTensor_(size)(input, dim_batch + 1);
 | 
			
		||||
  int64_t inputHeight  = THTensor_(size)(input, dim_batch + 2);
 | 
			
		||||
  int64_t inputWidth   = THTensor_(size)(input, dim_batch + 3);
 | 
			
		||||
  int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int64_t nOutputPlane = nInputPlane * kW * kH;
 | 
			
		||||
  int64_t outputLength = outputHeight * outputWidth;
 | 
			
		||||
 | 
			
		||||
  if (outputHeight < 1 || outputWidth < 1) {
 | 
			
		||||
    THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), "
 | 
			
		||||
@ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
           THNNState *state,
 | 
			
		||||
           THTensor *input,
 | 
			
		||||
           THTensor *output,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
  THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW);
 | 
			
		||||
 | 
			
		||||
@ -60,15 +60,15 @@ void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
    THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
 | 
			
		||||
  }
 | 
			
		||||
 | 
			
		||||
  int batchSize    = THTensor_(size)(input, 0);
 | 
			
		||||
  int nInputPlane  = THTensor_(size)(input, 1);
 | 
			
		||||
  int inputHeight  = THTensor_(size)(input, 2);
 | 
			
		||||
  int inputWidth   = THTensor_(size)(input, 3);
 | 
			
		||||
  int64_t batchSize    = THTensor_(size)(input, 0);
 | 
			
		||||
  int64_t nInputPlane  = THTensor_(size)(input, 1);
 | 
			
		||||
  int64_t inputHeight  = THTensor_(size)(input, 2);
 | 
			
		||||
  int64_t inputWidth   = THTensor_(size)(input, 3);
 | 
			
		||||
 | 
			
		||||
  int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int nOutputPlane = nInputPlane * kW * kH;
 | 
			
		||||
  int outputLength = outputHeight * outputWidth;
 | 
			
		||||
  int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
 | 
			
		||||
  int64_t outputWidth  = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
 | 
			
		||||
  int64_t nOutputPlane = nInputPlane * kW * kH;
 | 
			
		||||
  int64_t outputLength = outputHeight * outputWidth;
 | 
			
		||||
 | 
			
		||||
  THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength);
 | 
			
		||||
  THTensor_(zero)(output);
 | 
			
		||||
@ -76,7 +76,7 @@ void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
  THTensor *input_n = THTensor_(new)();
 | 
			
		||||
  THTensor *output_n = THTensor_(new)();
 | 
			
		||||
 | 
			
		||||
  for (int elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
  for (int64_t elt = 0; elt < batchSize; elt++) {
 | 
			
		||||
    THTensor_(select)(input_n, input, 0, elt);
 | 
			
		||||
    THTensor_(select)(output_n, output, 0, elt);
 | 
			
		||||
 | 
			
		||||
@ -102,11 +102,11 @@ void THNN_(Im2Col_updateGradInput)(
 | 
			
		||||
           THNNState *state,
 | 
			
		||||
           THTensor *gradOutput,
 | 
			
		||||
           THTensor *gradInput,
 | 
			
		||||
           int inputHeight, int inputWidth,
 | 
			
		||||
           int kH, int kW,
 | 
			
		||||
           int dH, int dW,
 | 
			
		||||
           int padH, int padW,
 | 
			
		||||
           int sH, int sW) {
 | 
			
		||||
           int64_t inputHeight, int64_t inputWidth,
 | 
			
		||||
           int64_t kH, int64_t kW,
 | 
			
		||||
           int64_t dH, int64_t dW,
 | 
			
		||||
           int64_t padH, int64_t padW,
 | 
			
		||||
           int64_t sH, int64_t sW) {
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput,
 | 
			
		||||
 | 
			
		||||
@ -220,8 +220,8 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)(
 | 
			
		||||
     dilationH, dilationW, 0);
 | 
			
		||||
 | 
			
		||||
  // Params
 | 
			
		||||
  int nInputPlane = weight->size[1];
 | 
			
		||||
  int nOutputPlane = weight->size[0];
 | 
			
		||||
  int64_t nInputPlane = weight->size[1];
 | 
			
		||||
  int64_t nOutputPlane = weight->size[0];
 | 
			
		||||
 | 
			
		||||
  input = THTensor_(newContiguous)(input);
 | 
			
		||||
  weight = THTensor_(newContiguous)(weight);
 | 
			
		||||
 | 
			
		||||
@ -221,8 +221,8 @@ void THNN_(SpatialFullDilatedConvolution_updateGradInput)(
 | 
			
		||||
    (input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW,
 | 
			
		||||
     dilationH, dilationW, adjH, adjW, 0);
 | 
			
		||||
 | 
			
		||||
  int nInputPlane = THTensor_(size)(weight,0);
 | 
			
		||||
  int nOutputPlane = THTensor_(size)(weight,1);
 | 
			
		||||
  int64_t nInputPlane = THTensor_(size)(weight,0);
 | 
			
		||||
  int64_t nOutputPlane = THTensor_(size)(weight,1);
 | 
			
		||||
 | 
			
		||||
  input = THTensor_(newContiguous)(input);
 | 
			
		||||
  gradOutput = THTensor_(newContiguous)(gradOutput);
 | 
			
		||||
@ -328,7 +328,7 @@ void THNN_(SpatialFullDilatedConvolution_accGradParameters)(
 | 
			
		||||
    (input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW,
 | 
			
		||||
     dilationH, dilationW, adjH, adjW, 1);
 | 
			
		||||
 | 
			
		||||
  int nOutputPlane;
 | 
			
		||||
  int64_t nOutputPlane;
 | 
			
		||||
  if (gradWeight) {
 | 
			
		||||
    nOutputPlane = THTensor_(size)(gradWeight, 1);
 | 
			
		||||
  } else if (gradBias) {
 | 
			
		||||
 | 
			
		||||
@ -147,39 +147,39 @@ TH_API void THNN_(Im2Col_updateOutput)(
 | 
			
		||||
          THNNState *state,
 | 
			
		||||
          THTensor *input,
 | 
			
		||||
          THTensor *output,
 | 
			
		||||
          int kH, int kW,
 | 
			
		||||
          int dH, int dW,
 | 
			
		||||
          int padH, int padW,
 | 
			
		||||
          int sH, int sW);
 | 
			
		||||
          int64_t kH, int64_t kW,
 | 
			
		||||
          int64_t dH, int64_t dW,
 | 
			
		||||
          int64_t padH, int64_t padW,
 | 
			
		||||
          int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
TH_API void THNN_(Im2Col_updateGradInput)(
 | 
			
		||||
          THNNState *state,
 | 
			
		||||
          THTensor *gradOutput,
 | 
			
		||||
          THTensor *gradInput,
 | 
			
		||||
          int inputHeight, int inputWidth,
 | 
			
		||||
          int kH, int kW,
 | 
			
		||||
          int dH, int dW,
 | 
			
		||||
          int padH, int padW,
 | 
			
		||||
          int sH, int sW);
 | 
			
		||||
          int64_t inputHeight, int64_t inputWidth,
 | 
			
		||||
          int64_t kH, int64_t kW,
 | 
			
		||||
          int64_t dH, int64_t dW,
 | 
			
		||||
          int64_t padH, int64_t padW,
 | 
			
		||||
          int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
TH_API void THNN_(Col2Im_updateOutput)(
 | 
			
		||||
          THNNState *state,
 | 
			
		||||
          THTensor *input,
 | 
			
		||||
          THTensor *output,
 | 
			
		||||
          int outputHeight, int outputWidth,
 | 
			
		||||
          int kH, int kW,
 | 
			
		||||
          int dH, int dW,
 | 
			
		||||
          int padH, int padW,
 | 
			
		||||
          int sH, int sW);
 | 
			
		||||
          int64_t outputHeight, int64_t outputWidth,
 | 
			
		||||
          int64_t kH, int64_t kW,
 | 
			
		||||
          int64_t dH, int64_t dW,
 | 
			
		||||
          int64_t padH, int64_t padW,
 | 
			
		||||
          int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
TH_API void THNN_(Col2Im_updateGradInput)(
 | 
			
		||||
          THNNState *state,
 | 
			
		||||
          THTensor *gradOutput,
 | 
			
		||||
          THTensor *gradInput,
 | 
			
		||||
          int kH, int kW,
 | 
			
		||||
          int dH, int dW,
 | 
			
		||||
          int padH, int padW,
 | 
			
		||||
          int sH, int sW);
 | 
			
		||||
          int64_t kH, int64_t kW,
 | 
			
		||||
          int64_t dH, int64_t dW,
 | 
			
		||||
          int64_t padH, int64_t padW,
 | 
			
		||||
          int64_t sH, int64_t sW);
 | 
			
		||||
 | 
			
		||||
TH_API void THNN_(L1Cost_updateOutput)(
 | 
			
		||||
          THNNState *state,            // library's state
 | 
			
		||||
 | 
			
		||||
@ -224,11 +224,7 @@ if(USE_CUDA)
 | 
			
		||||
  # it. We will then manually add the cudart library as interface libs.
 | 
			
		||||
  set(__tmp ${CUDA_LIBRARIES})
 | 
			
		||||
  set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
 | 
			
		||||
  if(CAFFE2_STATIC_LINK_CUDA)
 | 
			
		||||
    torch_cuda_based_add_library(caffe2_gpu STATIC ${Caffe2_GPU_SRCS})
 | 
			
		||||
  else()
 | 
			
		||||
    torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS})
 | 
			
		||||
  endif()
 | 
			
		||||
  torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS})
 | 
			
		||||
  set(CUDA_LIBRARIES ${__tmp})
 | 
			
		||||
  target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
#include "caffe2/utils/proto_wrap.h"
 | 
			
		||||
#include "caffe2/core/common.h"
 | 
			
		||||
 | 
			
		||||
#include <google/protobuf/stubs/common.h>
 | 
			
		||||
#include <google/protobuf/generated_message_util.h>
 | 
			
		||||
@ -8,7 +9,7 @@ namespace caffe {
 | 
			
		||||
// Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
 | 
			
		||||
// used to avoid duplicated global variable in the case when protobuf
 | 
			
		||||
// is built with hidden visibility.
 | 
			
		||||
const ::std::string& GetEmptyStringAlreadyInited() {
 | 
			
		||||
CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
 | 
			
		||||
  return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -19,7 +20,7 @@ namespace ONNX_NAMESPACE {
 | 
			
		||||
// ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
 | 
			
		||||
// used to avoid duplicated global variable in the case when protobuf
 | 
			
		||||
// is built with hidden visibility.
 | 
			
		||||
const ::std::string& GetEmptyStringAlreadyInited() {
 | 
			
		||||
CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
 | 
			
		||||
  return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
@ -30,7 +31,7 @@ namespace caffe2 {
 | 
			
		||||
// Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
 | 
			
		||||
// used to avoid duplicated global variable in the case when protobuf
 | 
			
		||||
// is built with hidden visibility.
 | 
			
		||||
const ::std::string& GetEmptyStringAlreadyInited() {
 | 
			
		||||
CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
 | 
			
		||||
  return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1102,6 +1102,11 @@ Linear functions
 | 
			
		||||
 | 
			
		||||
.. autofunction:: linear
 | 
			
		||||
 | 
			
		||||
:hidden:`bilinear`
 | 
			
		||||
~~~~~~~~~~~~~~~~
 | 
			
		||||
 | 
			
		||||
.. autofunction:: bilinear
 | 
			
		||||
 | 
			
		||||
Dropout functions
 | 
			
		||||
-----------------
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -337,6 +337,7 @@ view of a storage and defines numeric operations on it.
 | 
			
		||||
   .. automethod:: rsqrt
 | 
			
		||||
   .. automethod:: rsqrt_
 | 
			
		||||
   .. automethod:: scatter_
 | 
			
		||||
   .. automethod:: scatter_add_
 | 
			
		||||
   .. automethod:: select
 | 
			
		||||
   .. automethod:: set_
 | 
			
		||||
   .. automethod:: share_memory_
 | 
			
		||||
 | 
			
		||||
@ -251,6 +251,7 @@ Spectral Ops
 | 
			
		||||
 | 
			
		||||
Other Operations
 | 
			
		||||
~~~~~~~~~~~~~~~~~~~~~~
 | 
			
		||||
.. autofunction:: bincount
 | 
			
		||||
.. autofunction:: cross
 | 
			
		||||
.. autofunction:: diag
 | 
			
		||||
.. autofunction:: diagflat
 | 
			
		||||
@ -258,6 +259,7 @@ Other Operations
 | 
			
		||||
.. autofunction:: einsum
 | 
			
		||||
.. autofunction:: flip
 | 
			
		||||
.. autofunction:: histc
 | 
			
		||||
.. autofunction:: meshgrid
 | 
			
		||||
.. autofunction:: renorm
 | 
			
		||||
.. autofunction:: trace
 | 
			
		||||
.. autofunction:: tril
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										4
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										4
									
								
								setup.py
									
									
									
									
									
								
							@ -152,6 +152,8 @@ IS_LINUX = (platform.system() == 'Linux')
 | 
			
		||||
FULL_CAFFE2 = check_env_flag('FULL_CAFFE2')
 | 
			
		||||
BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH')
 | 
			
		||||
 | 
			
		||||
USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK')
 | 
			
		||||
 | 
			
		||||
NUM_JOBS = multiprocessing.cpu_count()
 | 
			
		||||
max_jobs = os.getenv("MAX_JOBS")
 | 
			
		||||
if max_jobs is not None:
 | 
			
		||||
@ -318,6 +320,8 @@ def build_libs(libs):
 | 
			
		||||
    if USE_CUDA:
 | 
			
		||||
        my_env["CUDA_BIN_PATH"] = CUDA_HOME
 | 
			
		||||
        build_libs_cmd += ['--use-cuda']
 | 
			
		||||
    if USE_CUDA_STATIC_LINK:
 | 
			
		||||
        build_libs_cmd += ['--cuda-static-link']
 | 
			
		||||
    if USE_ROCM:
 | 
			
		||||
        build_libs_cmd += ['--use-rocm']
 | 
			
		||||
    if USE_NNPACK:
 | 
			
		||||
 | 
			
		||||
@ -28,7 +28,7 @@ import errno
 | 
			
		||||
import torch
 | 
			
		||||
import torch.cuda
 | 
			
		||||
from torch._utils_internal import get_writable_path
 | 
			
		||||
from torch._six import string_classes
 | 
			
		||||
from torch._six import string_classes, inf
 | 
			
		||||
import torch.backends.cudnn
 | 
			
		||||
import torch.backends.mkl
 | 
			
		||||
 | 
			
		||||
@ -353,7 +353,7 @@ class TestCase(unittest.TestCase):
 | 
			
		||||
        elif isinstance(x, bool) and isinstance(y, bool):
 | 
			
		||||
            super(TestCase, self).assertEqual(x, y, message)
 | 
			
		||||
        elif isinstance(x, Number) and isinstance(y, Number):
 | 
			
		||||
            if abs(x) == float('inf') or abs(y) == float('inf'):
 | 
			
		||||
            if abs(x) == inf or abs(y) == inf:
 | 
			
		||||
                if allow_inf:
 | 
			
		||||
                    super(TestCase, self).assertEqual(x, y, message)
 | 
			
		||||
                else:
 | 
			
		||||
 | 
			
		||||
@ -10,6 +10,7 @@ from collections import OrderedDict
 | 
			
		||||
from itertools import product
 | 
			
		||||
from operator import mul, itemgetter
 | 
			
		||||
from functools import reduce, wraps
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
from torch.autograd.gradcheck import gradgradcheck, gradcheck
 | 
			
		||||
from torch.autograd.function import once_differentiable
 | 
			
		||||
from torch.autograd.profiler import profile
 | 
			
		||||
@ -1524,12 +1525,12 @@ class TestAutograd(TestCase):
 | 
			
		||||
        pyscalar = -12345.1
 | 
			
		||||
        f[0] = pyscalar
 | 
			
		||||
        self.assertEqual(float(f), pyscalar)
 | 
			
		||||
        f[0] = float('nan')
 | 
			
		||||
        f[0] = nan
 | 
			
		||||
        self.assertTrue(math.isnan(float(f)))
 | 
			
		||||
        f[0] = float('inf')
 | 
			
		||||
        self.assertEqual(float(f), float('inf'), allow_inf=True)
 | 
			
		||||
        f[0] = float('-inf')
 | 
			
		||||
        self.assertEqual(float(f), float('-inf'), allow_inf=True)
 | 
			
		||||
        f[0] = inf
 | 
			
		||||
        self.assertEqual(float(f), inf, allow_inf=True)
 | 
			
		||||
        f[0] = -inf
 | 
			
		||||
        self.assertEqual(float(f), -inf, allow_inf=True)
 | 
			
		||||
 | 
			
		||||
        # integral -> floating point
 | 
			
		||||
        # check we can convert something that loses precision
 | 
			
		||||
@ -1539,11 +1540,11 @@ class TestAutograd(TestCase):
 | 
			
		||||
        self.assertEqual(float(l), float(pyscalar))
 | 
			
		||||
 | 
			
		||||
        # floating point -> integral
 | 
			
		||||
        f[0] = float('nan')
 | 
			
		||||
        f[0] = nan
 | 
			
		||||
        self.assertRaises(ValueError, lambda: integral_conv(f[0]))
 | 
			
		||||
        f[0] = float('inf')
 | 
			
		||||
        f[0] = inf
 | 
			
		||||
        self.assertRaises(OverflowError, lambda: integral_conv(f[0]))
 | 
			
		||||
        f[0] = float('-inf')
 | 
			
		||||
        f[0] = -inf
 | 
			
		||||
        self.assertRaises(OverflowError, lambda: integral_conv(f[0]))
 | 
			
		||||
        f[0] = sys.float_info.max
 | 
			
		||||
        self.assertEqual(integral_conv(f), sys.float_info.max)
 | 
			
		||||
@ -1558,9 +1559,9 @@ class TestAutograd(TestCase):
 | 
			
		||||
        test_nonzero(l, -2, True)
 | 
			
		||||
        test_nonzero(f, 0.0, False)
 | 
			
		||||
        test_nonzero(f, sys.float_info.min, True)
 | 
			
		||||
        test_nonzero(f, float('nan'), bool(float('nan')))
 | 
			
		||||
        test_nonzero(f, float('inf'), bool(float('inf')))
 | 
			
		||||
        test_nonzero(f, float('-inf'), bool(float('-inf')))
 | 
			
		||||
        test_nonzero(f, nan, bool(nan))
 | 
			
		||||
        test_nonzero(f, inf, bool(inf))
 | 
			
		||||
        test_nonzero(f, -inf, bool(-inf))
 | 
			
		||||
 | 
			
		||||
    def test_pyscalar_conversions(self):
 | 
			
		||||
        self._test_pyscalar_conversions(lambda x: x, lambda x: int(x))
 | 
			
		||||
@ -2825,7 +2826,7 @@ method_tests = [
 | 
			
		||||
    ('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]),
 | 
			
		||||
    ('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]),
 | 
			
		||||
    ('renorm', (S, S, S), (1, 2, 3), 'norm_1'),
 | 
			
		||||
    ('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'),
 | 
			
		||||
    ('renorm', (S, S, S), (inf, 2, 0.5), 'norm_inf'),
 | 
			
		||||
    ('repeat', (S,), (2,), 'single_number'),
 | 
			
		||||
    ('repeat', (), (2, 3), 'scalar'),
 | 
			
		||||
    ('repeat', (2, 2), (3, 2)),
 | 
			
		||||
@ -2917,7 +2918,7 @@ method_tests = [
 | 
			
		||||
    ('norm', (S, S), (0.5,), '0_5'),
 | 
			
		||||
    ('norm', (S, S), (1,), '1'),
 | 
			
		||||
    ('norm', (S, S), (3,), '3'),
 | 
			
		||||
    ('norm', (S, S), (float('inf'),), 'inf'),
 | 
			
		||||
    ('norm', (S, S), (inf,), 'inf'),
 | 
			
		||||
    ('norm', (S, S), (-1,), 'neg_1'),
 | 
			
		||||
    ('norm', (S, S), (-0.5,), 'neg_0_5'),
 | 
			
		||||
    ('norm', (S, S), (-1.5,), 'neg_1_5'),
 | 
			
		||||
 | 
			
		||||
@ -1,3 +1,4 @@
 | 
			
		||||
import os
 | 
			
		||||
import unittest
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
@ -15,7 +16,10 @@ import common
 | 
			
		||||
 | 
			
		||||
from torch.utils.cpp_extension import CUDA_HOME
 | 
			
		||||
TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None
 | 
			
		||||
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_available()
 | 
			
		||||
TEST_CUDNN = False
 | 
			
		||||
if TEST_CUDA:
 | 
			
		||||
    CUDNN_HEADER_EXISTS = os.path.isfile(os.path.join(CUDA_HOME, 'include/cudnn.h'))
 | 
			
		||||
    TEST_CUDNN = TEST_CUDA and CUDNN_HEADER_EXISTS and torch.backends.cudnn.is_available()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestCppExtension(common.TestCase):
 | 
			
		||||
 | 
			
		||||
@ -12,6 +12,7 @@ import torch
 | 
			
		||||
import torch.cuda
 | 
			
		||||
import torch.cuda.comm as comm
 | 
			
		||||
from torch import multiprocessing as mp
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
 | 
			
		||||
from test_torch import TestTorch
 | 
			
		||||
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \
 | 
			
		||||
@ -782,7 +783,7 @@ class TestCuda(TestCase):
 | 
			
		||||
            if not end0:
 | 
			
		||||
                gen1_max_times = torch.LongTensor(1).random_(0, 3)[0]
 | 
			
		||||
            else:
 | 
			
		||||
                gen1_max_times = float('inf')
 | 
			
		||||
                gen1_max_times = inf
 | 
			
		||||
            t = 0
 | 
			
		||||
            while t < gen1_max_times and not end1:
 | 
			
		||||
                end1 = advance(gen1, end1)
 | 
			
		||||
@ -901,7 +902,7 @@ class TestCuda(TestCase):
 | 
			
		||||
                 (lambda x: x.max(0)[0], 'max_dim')]
 | 
			
		||||
        for f, name in tests:
 | 
			
		||||
            a = torch.arange(25.0).view(5, 5)
 | 
			
		||||
            a[2, 2] = float('nan')
 | 
			
		||||
            a[2, 2] = nan
 | 
			
		||||
            actual = f(a.cuda()).cpu()
 | 
			
		||||
            expected = f(a).cpu()
 | 
			
		||||
            self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name))
 | 
			
		||||
@ -1503,9 +1504,9 @@ class TestCuda(TestCase):
 | 
			
		||||
    def test_multinomial_invalid_probs_cuda(self):
 | 
			
		||||
        test_method = TestCuda._test_multinomial_invalid_probs_cuda
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, -1]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, float('inf')]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, float('-inf')]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, float('nan')]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, inf]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, -inf]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, nan]))
 | 
			
		||||
 | 
			
		||||
    def test_broadcast(self):
 | 
			
		||||
        TestTorch._test_broadcast(self, lambda t: t.cuda())
 | 
			
		||||
@ -1686,7 +1687,6 @@ class TestCuda(TestCase):
 | 
			
		||||
        cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111,
 | 
			
		||||
                                  -100.99999994, -1931.99999994, 0.000000111,
 | 
			
		||||
                                  -0.000000111, 0, -1, -2, -931])
 | 
			
		||||
        nan = float('nan')
 | 
			
		||||
        expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan])
 | 
			
		||||
        gpu_tensor = cpu_tensor.cuda()
 | 
			
		||||
        cpu_out = cpu_tensor.digamma()
 | 
			
		||||
 | 
			
		||||
@ -30,6 +30,7 @@ from itertools import product
 | 
			
		||||
from random import shuffle
 | 
			
		||||
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN
 | 
			
		||||
from common_cuda import TEST_CUDA
 | 
			
		||||
from torch.autograd import grad, gradcheck
 | 
			
		||||
@ -782,7 +783,7 @@ class TestDistributions(TestCase):
 | 
			
		||||
        s = 0.3
 | 
			
		||||
        self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3))
 | 
			
		||||
        self.assertEqual(Geometric(1).sample(), 0)
 | 
			
		||||
        self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0)
 | 
			
		||||
        self.assertFalse(Geometric(p).sample().requires_grad)
 | 
			
		||||
        self.assertEqual(Geometric(r).sample((8,)).size(), (8,))
 | 
			
		||||
@ -1162,8 +1163,8 @@ class TestDistributions(TestCase):
 | 
			
		||||
        uniform = Uniform(low_1d, high_1d)
 | 
			
		||||
        above_high = torch.tensor([4.0])
 | 
			
		||||
        below_low = torch.tensor([-1.0])
 | 
			
		||||
        self.assertEqual(uniform.log_prob(above_high).item(), -float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(uniform.log_prob(below_low).item(), -float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(uniform.log_prob(above_high).item(), -inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(uniform.log_prob(below_low).item(), -inf, allow_inf=True)
 | 
			
		||||
 | 
			
		||||
        # check cdf computation when value outside range
 | 
			
		||||
        self.assertEqual(uniform.cdf(below_low).item(), 0)
 | 
			
		||||
@ -1190,7 +1191,7 @@ class TestDistributions(TestCase):
 | 
			
		||||
        loc_1d = torch.zeros(1, requires_grad=True)
 | 
			
		||||
        scale_1d = torch.ones(1, requires_grad=True)
 | 
			
		||||
        self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean))
 | 
			
		||||
        self.assertEqual(Cauchy(loc_1d, scale_1d).variance, float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(Cauchy(loc_1d, scale_1d).variance, inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5))
 | 
			
		||||
        self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5))
 | 
			
		||||
        self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,))
 | 
			
		||||
@ -1216,7 +1217,7 @@ class TestDistributions(TestCase):
 | 
			
		||||
        scale = torch.ones(5, 5, requires_grad=True)
 | 
			
		||||
        scale_1d = torch.ones(1, requires_grad=True)
 | 
			
		||||
        self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean))
 | 
			
		||||
        self.assertEqual(HalfCauchy(scale_1d).variance, float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(HalfCauchy(scale_1d).variance, inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5))
 | 
			
		||||
        self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5))
 | 
			
		||||
        self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,))
 | 
			
		||||
@ -1714,8 +1715,8 @@ class TestDistributions(TestCase):
 | 
			
		||||
        alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)
 | 
			
		||||
        scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True)
 | 
			
		||||
        alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True)
 | 
			
		||||
        self.assertEqual(Pareto(scale_1d, 0.5).mean, float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(Pareto(scale_1d, 0.5).variance, float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(Pareto(scale_1d, 0.5).mean, inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(Pareto(scale_1d, 0.5).variance, inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3))
 | 
			
		||||
        self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3))
 | 
			
		||||
        self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1))
 | 
			
		||||
@ -1832,7 +1833,7 @@ class TestDistributions(TestCase):
 | 
			
		||||
        df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True)
 | 
			
		||||
        self.assertTrue(is_all_nan(StudentT(1).mean))
 | 
			
		||||
        self.assertTrue(is_all_nan(StudentT(1).variance))
 | 
			
		||||
        self.assertEqual(StudentT(2).variance, float('inf'), allow_inf=True)
 | 
			
		||||
        self.assertEqual(StudentT(2).variance, inf, allow_inf=True)
 | 
			
		||||
        self.assertEqual(StudentT(df).sample().size(), (2, 3))
 | 
			
		||||
        self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3))
 | 
			
		||||
        self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1))
 | 
			
		||||
@ -2962,7 +2963,7 @@ class TestKL(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_kl_infinite(self):
 | 
			
		||||
        for p, q in self.infinite_examples:
 | 
			
		||||
            self.assertTrue((kl_divergence(p, q) == float('inf')).all(),
 | 
			
		||||
            self.assertTrue((kl_divergence(p, q) == inf).all(),
 | 
			
		||||
                            'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__))
 | 
			
		||||
 | 
			
		||||
    def test_kl_edgecases(self):
 | 
			
		||||
@ -2996,7 +2997,7 @@ class TestKL(TestCase):
 | 
			
		||||
                    continue
 | 
			
		||||
                x = dist.sample(sample_shape=(60000,))
 | 
			
		||||
                expected = -dist.log_prob(x).mean(0)
 | 
			
		||||
                ignore = (expected == float('inf'))
 | 
			
		||||
                ignore = (expected == inf)
 | 
			
		||||
                expected[ignore] = actual[ignore]
 | 
			
		||||
                self.assertEqual(actual, expected, prec=0.2, message='\n'.join([
 | 
			
		||||
                    '{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)),
 | 
			
		||||
@ -3157,12 +3158,12 @@ class TestNumericalStability(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_categorical_log_prob_with_logits(self):
 | 
			
		||||
        for dtype in ([torch.float, torch.double]):
 | 
			
		||||
            p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True)
 | 
			
		||||
            p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True)
 | 
			
		||||
            categorical = OneHotCategorical(logits=p)
 | 
			
		||||
            log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype))
 | 
			
		||||
            self.assertEqual(log_pdf_prob_1.item(), 0)
 | 
			
		||||
            log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype))
 | 
			
		||||
            self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True)
 | 
			
		||||
            self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True)
 | 
			
		||||
 | 
			
		||||
    def test_multinomial_log_prob(self):
 | 
			
		||||
        for dtype in ([torch.float, torch.double]):
 | 
			
		||||
@ -3174,12 +3175,12 @@ class TestNumericalStability(TestCase):
 | 
			
		||||
 | 
			
		||||
    def test_multinomial_log_prob_with_logits(self):
 | 
			
		||||
        for dtype in ([torch.float, torch.double]):
 | 
			
		||||
            p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True)
 | 
			
		||||
            p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True)
 | 
			
		||||
            multinomial = Multinomial(10, logits=p)
 | 
			
		||||
            log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype))
 | 
			
		||||
            self.assertEqual(log_pdf_prob_1.item(), 0)
 | 
			
		||||
            log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype))
 | 
			
		||||
            self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True)
 | 
			
		||||
            self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TestLazyLogitsInitialization(TestCase):
 | 
			
		||||
 | 
			
		||||
@ -15,6 +15,7 @@ import hashlib
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
import torch.backends.cudnn as cudnn
 | 
			
		||||
import torch.nn as nn
 | 
			
		||||
import torch.nn.functional as F
 | 
			
		||||
@ -1465,7 +1466,7 @@ class TestNN(NNTestCase):
 | 
			
		||||
 | 
			
		||||
        def compute_norm(norm_type):
 | 
			
		||||
            norm_type = float(norm_type)
 | 
			
		||||
            if norm_type != float('inf'):
 | 
			
		||||
            if norm_type != inf:
 | 
			
		||||
                total_norm = 0
 | 
			
		||||
                for p in l.parameters():
 | 
			
		||||
                    total_norm += p.grad.data.abs().pow(norm_type).sum()
 | 
			
		||||
@ -1560,8 +1561,6 @@ class TestNN(NNTestCase):
 | 
			
		||||
    # We don't want to make propagating NaN a hard requirement on ops, but for
 | 
			
		||||
    # these easy ones, we should make them do so.
 | 
			
		||||
    def _test_nonlinearity_propagate_nan(self, device):
 | 
			
		||||
        nan = float('nan')
 | 
			
		||||
 | 
			
		||||
        def test(nonlinearity, *args, **kwargs):
 | 
			
		||||
            x = torch.tensor([nan], device=device)
 | 
			
		||||
            fn = getattr(F, nonlinearity)
 | 
			
		||||
@ -2547,7 +2546,7 @@ class TestNN(NNTestCase):
 | 
			
		||||
            for num_dim in [1, 2, 3]:
 | 
			
		||||
                fn_name = '{}max_pool{}d'.format(adaptive, num_dim)
 | 
			
		||||
                fn = getattr(F, fn_name)
 | 
			
		||||
                x = torch.full([1, 1] + num_dim * [3], float('nan'))
 | 
			
		||||
                x = torch.full([1, 1] + num_dim * [3], nan)
 | 
			
		||||
                res = fn(x, 1 if adaptive else 3)
 | 
			
		||||
                self.assertTrue(math.isnan(res.item()))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -3,6 +3,7 @@ import unittest
 | 
			
		||||
import functools
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
import torch.optim as optim
 | 
			
		||||
import torch.legacy.optim as old_optim
 | 
			
		||||
import torch.nn.functional as F
 | 
			
		||||
@ -478,8 +479,8 @@ class TestOptim(TestCase):
 | 
			
		||||
    @unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN")
 | 
			
		||||
    def test_lbfgs_return_type(self):
 | 
			
		||||
        params = [torch.randn(10, 5), torch.randn(10)]
 | 
			
		||||
        opt1 = optim.LBFGS(params, 0.01, tolerance_grad=float('inf'))
 | 
			
		||||
        opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-float('inf'))
 | 
			
		||||
        opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf)
 | 
			
		||||
        opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf)
 | 
			
		||||
 | 
			
		||||
        def closure():
 | 
			
		||||
            return torch.Tensor([10])
 | 
			
		||||
 | 
			
		||||
@ -16,6 +16,7 @@ import gzip
 | 
			
		||||
from torch._utils_internal import get_file_path, get_file_path_2
 | 
			
		||||
from torch.utils.dlpack import from_dlpack, to_dlpack
 | 
			
		||||
from torch._utils import _rebuild_tensor
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
from itertools import product, combinations
 | 
			
		||||
from functools import reduce
 | 
			
		||||
from torch import multiprocessing as mp
 | 
			
		||||
@ -241,17 +242,17 @@ class TestTorch(TestCase):
 | 
			
		||||
        self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0))
 | 
			
		||||
        self.assertFalse(torch.allclose(x, y))
 | 
			
		||||
        self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8])))
 | 
			
		||||
        x = torch.tensor([2.0, 3.0, float('nan')])
 | 
			
		||||
        y = torch.tensor([2.01, 3.01, float('nan')])
 | 
			
		||||
        x = torch.tensor([2.0, 3.0, nan])
 | 
			
		||||
        y = torch.tensor([2.01, 3.01, nan])
 | 
			
		||||
        self.assertFalse(torch.allclose(x, y, rtol=1e-2))
 | 
			
		||||
        self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True))
 | 
			
		||||
        self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True))
 | 
			
		||||
        inf = torch.tensor([float('inf')])
 | 
			
		||||
        self.assertTrue(torch.allclose(inf, inf))
 | 
			
		||||
        self.assertTrue(torch.allclose(-inf, -inf))
 | 
			
		||||
        self.assertFalse(torch.allclose(inf, -inf))
 | 
			
		||||
        self.assertFalse(torch.allclose(inf, torch.tensor([1e20])))
 | 
			
		||||
        self.assertFalse(torch.allclose(-inf, torch.tensor([-1e20])))
 | 
			
		||||
        inf_t = torch.tensor([inf])
 | 
			
		||||
        self.assertTrue(torch.allclose(inf_t, inf_t))
 | 
			
		||||
        self.assertTrue(torch.allclose(-inf_t, -inf_t))
 | 
			
		||||
        self.assertFalse(torch.allclose(inf_t, -inf_t))
 | 
			
		||||
        self.assertFalse(torch.allclose(inf_t, torch.tensor([1e20])))
 | 
			
		||||
        self.assertFalse(torch.allclose(-inf_t, torch.tensor([-1e20])))
 | 
			
		||||
 | 
			
		||||
    def test_linear_algebra_scalar_raises(self):
 | 
			
		||||
        m = torch.randn(5, 5)
 | 
			
		||||
@ -359,13 +360,13 @@ class TestTorch(TestCase):
 | 
			
		||||
            try:
 | 
			
		||||
                return math.sinh(x)
 | 
			
		||||
            except OverflowError:
 | 
			
		||||
                return float('inf') if x > 0 else float('-inf')
 | 
			
		||||
                return inf if x > 0 else -inf
 | 
			
		||||
        self._test_math(torch.sinh, sinh)
 | 
			
		||||
 | 
			
		||||
    def test_lgamma(self):
 | 
			
		||||
        def lgamma(x):
 | 
			
		||||
            if x <= 0 and x == int(x):
 | 
			
		||||
                return float('inf')
 | 
			
		||||
                return inf
 | 
			
		||||
            return math.lgamma(x)
 | 
			
		||||
        self._test_math(torch.lgamma, lgamma)
 | 
			
		||||
 | 
			
		||||
@ -392,14 +393,14 @@ class TestTorch(TestCase):
 | 
			
		||||
        # scipy 1.1.0 changed when it returns +/-inf vs. NaN
 | 
			
		||||
        def torch_digamma_without_inf(inp):
 | 
			
		||||
            res = torch.digamma(inp)
 | 
			
		||||
            res[(res == float('-inf')) | (res == float('inf'))] = float('nan')
 | 
			
		||||
            res[(res == -inf) | (res == inf)] = nan
 | 
			
		||||
            return res
 | 
			
		||||
 | 
			
		||||
        def scipy_digamma_without_inf(inp):
 | 
			
		||||
            res = digamma(inp)
 | 
			
		||||
            if np.isscalar(res):
 | 
			
		||||
                return res if np.isfinite(res) else float('nan')
 | 
			
		||||
            res[np.isinf(res)] = float('nan')
 | 
			
		||||
                return res if np.isfinite(res) else nan
 | 
			
		||||
            res[np.isinf(res)] = nan
 | 
			
		||||
            return res
 | 
			
		||||
 | 
			
		||||
        self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input())
 | 
			
		||||
@ -413,7 +414,7 @@ class TestTorch(TestCase):
 | 
			
		||||
                            self._digamma_input(test_poles=False))
 | 
			
		||||
 | 
			
		||||
    def test_asin(self):
 | 
			
		||||
        self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else float('nan'))
 | 
			
		||||
        self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else nan)
 | 
			
		||||
 | 
			
		||||
    def test_cos(self):
 | 
			
		||||
        self._test_math_by_name('cos')
 | 
			
		||||
@ -425,11 +426,11 @@ class TestTorch(TestCase):
 | 
			
		||||
            except OverflowError:
 | 
			
		||||
                # Return inf on overflow.
 | 
			
		||||
                # See http://en.cppreference.com/w/cpp/numeric/math/cosh
 | 
			
		||||
                return float('inf')
 | 
			
		||||
                return inf
 | 
			
		||||
        self._test_math(torch.cosh, cosh)
 | 
			
		||||
 | 
			
		||||
    def test_acos(self):
 | 
			
		||||
        self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else float('nan'))
 | 
			
		||||
        self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else nan)
 | 
			
		||||
 | 
			
		||||
    def test_tan(self):
 | 
			
		||||
        self._test_math_by_name('tan')
 | 
			
		||||
@ -443,36 +444,36 @@ class TestTorch(TestCase):
 | 
			
		||||
    def test_log(self):
 | 
			
		||||
        def log(x):
 | 
			
		||||
            if x == 0:
 | 
			
		||||
                return float('-inf')
 | 
			
		||||
                return -inf
 | 
			
		||||
            elif x < 0:
 | 
			
		||||
                return float('nan')
 | 
			
		||||
                return nan
 | 
			
		||||
            return math.log(x)
 | 
			
		||||
        self._test_math(torch.log, log)
 | 
			
		||||
 | 
			
		||||
    def test_log10(self):
 | 
			
		||||
        def log10(x):
 | 
			
		||||
            if x == 0:
 | 
			
		||||
                return float('-inf')
 | 
			
		||||
                return -inf
 | 
			
		||||
            elif x < 0:
 | 
			
		||||
                return float('nan')
 | 
			
		||||
                return nan
 | 
			
		||||
            return math.log10(x)
 | 
			
		||||
        self._test_math(torch.log10, log10)
 | 
			
		||||
 | 
			
		||||
    def test_log1p(self):
 | 
			
		||||
        def log1p(x):
 | 
			
		||||
            if x == -1:
 | 
			
		||||
                return float('-inf')
 | 
			
		||||
                return -inf
 | 
			
		||||
            elif x < -1:
 | 
			
		||||
                return float('nan')
 | 
			
		||||
                return nan
 | 
			
		||||
            return math.log1p(x)
 | 
			
		||||
        self._test_math(torch.log1p, log1p)
 | 
			
		||||
 | 
			
		||||
    def test_log2(self):
 | 
			
		||||
        def log2(x):
 | 
			
		||||
            if x == 0:
 | 
			
		||||
                return float('-inf')
 | 
			
		||||
                return -inf
 | 
			
		||||
            elif x < 0:
 | 
			
		||||
                return float('nan')
 | 
			
		||||
                return nan
 | 
			
		||||
            try:
 | 
			
		||||
                return math.log2(x)
 | 
			
		||||
            except AttributeError:
 | 
			
		||||
@ -480,7 +481,7 @@ class TestTorch(TestCase):
 | 
			
		||||
        self._test_math(torch.log2, log2)
 | 
			
		||||
 | 
			
		||||
    def test_sqrt(self):
 | 
			
		||||
        self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else float('nan'))
 | 
			
		||||
        self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else nan)
 | 
			
		||||
 | 
			
		||||
    def test_erf(self):
 | 
			
		||||
        self._test_math_by_name('erf')
 | 
			
		||||
@ -493,9 +494,9 @@ class TestTorch(TestCase):
 | 
			
		||||
            inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.)
 | 
			
		||||
            self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues))
 | 
			
		||||
            # test inf
 | 
			
		||||
            self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([float('-inf'), float('inf')])))
 | 
			
		||||
            self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([-inf, inf])))
 | 
			
		||||
            # test nan
 | 
			
		||||
            self.assertEqual(tensor([-2, 2]).erfinv(), tensor([float('nan'), float('nan')]))
 | 
			
		||||
            self.assertEqual(tensor([-2, 2]).erfinv(), tensor([nan, nan]))
 | 
			
		||||
 | 
			
		||||
        checkType(torch.FloatTensor)
 | 
			
		||||
        checkType(torch.DoubleTensor)
 | 
			
		||||
@ -505,7 +506,7 @@ class TestTorch(TestCase):
 | 
			
		||||
            try:
 | 
			
		||||
                return math.exp(x)
 | 
			
		||||
            except OverflowError:
 | 
			
		||||
                return float('inf')
 | 
			
		||||
                return inf
 | 
			
		||||
        self._test_math(torch.exp, exp)
 | 
			
		||||
 | 
			
		||||
    def test_expm1(self):
 | 
			
		||||
@ -513,7 +514,7 @@ class TestTorch(TestCase):
 | 
			
		||||
            try:
 | 
			
		||||
                return math.expm1(x)
 | 
			
		||||
            except OverflowError:
 | 
			
		||||
                return float('inf')
 | 
			
		||||
                return inf
 | 
			
		||||
        self._test_math(torch.expm1, expm1)
 | 
			
		||||
 | 
			
		||||
    def test_floor(self):
 | 
			
		||||
@ -525,9 +526,9 @@ class TestTorch(TestCase):
 | 
			
		||||
    def test_rsqrt(self):
 | 
			
		||||
        def rsqrt(x):
 | 
			
		||||
            if x == 0:
 | 
			
		||||
                return float('inf')
 | 
			
		||||
                return inf
 | 
			
		||||
            elif x < 0:
 | 
			
		||||
                return float('nan')
 | 
			
		||||
                return nan
 | 
			
		||||
            return 1.0 / math.sqrt(x)
 | 
			
		||||
 | 
			
		||||
        self._test_math(torch.rsqrt, rsqrt)
 | 
			
		||||
@ -615,7 +616,7 @@ class TestTorch(TestCase):
 | 
			
		||||
        # NaNs
 | 
			
		||||
        for index in (0, 4, 99):
 | 
			
		||||
            m1 = torch.randn(100)
 | 
			
		||||
            m1[index] = float('nan')
 | 
			
		||||
            m1[index] = nan
 | 
			
		||||
            res1val, res1ind = torch.max(m1, 0)
 | 
			
		||||
            self.assertTrue(math.isnan(res1val))
 | 
			
		||||
            self.assertEqual(res1ind, index)
 | 
			
		||||
@ -633,14 +634,14 @@ class TestTorch(TestCase):
 | 
			
		||||
        # full reduction
 | 
			
		||||
        x = torch.randn(5, device=device)
 | 
			
		||||
        xn = x.cpu().numpy()
 | 
			
		||||
        for p in [0, 1, 2, 3, 4, float('inf')]:
 | 
			
		||||
        for p in [0, 1, 2, 3, 4, inf]:
 | 
			
		||||
            res = x.norm(p).item()
 | 
			
		||||
            expected = np.linalg.norm(xn, p)
 | 
			
		||||
            self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p))
 | 
			
		||||
        # one dimension
 | 
			
		||||
        x = torch.randn(5, 5, device=device)
 | 
			
		||||
        xn = x.cpu().numpy()
 | 
			
		||||
        for p in [0, 1, 2, 3, 4, float('inf')]:
 | 
			
		||||
        for p in [0, 1, 2, 3, 4, inf]:
 | 
			
		||||
            res = x.norm(p, 1).cpu().numpy()
 | 
			
		||||
            expected = np.linalg.norm(xn, p, 1)
 | 
			
		||||
            self.assertEqual(res.shape, expected.shape)
 | 
			
		||||
@ -808,10 +809,10 @@ class TestTorch(TestCase):
 | 
			
		||||
            ('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1),
 | 
			
		||||
            ('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0),
 | 
			
		||||
            ('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0),
 | 
			
		||||
            ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), float('nan')),
 | 
			
		||||
            ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), float('nan')),
 | 
			
		||||
            ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), float('nan')),
 | 
			
		||||
            ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), float('-inf')),
 | 
			
		||||
            ('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), nan),
 | 
			
		||||
            ('var', lambda *args, **kwargs: torch.var(*args, **kwargs), nan),
 | 
			
		||||
            ('std', lambda *args, **kwargs: torch.std(*args, **kwargs), nan),
 | 
			
		||||
            ('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), -inf),
 | 
			
		||||
        ]
 | 
			
		||||
 | 
			
		||||
        devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
 | 
			
		||||
@ -878,8 +879,8 @@ class TestTorch(TestCase):
 | 
			
		||||
    def test_logsumexp(self):
 | 
			
		||||
        from scipy.special import logsumexp
 | 
			
		||||
        a = torch.randn(5, 4)
 | 
			
		||||
        a[0, 0] = float('inf')
 | 
			
		||||
        a[1, :] = float('-inf')
 | 
			
		||||
        a[0, 0] = inf
 | 
			
		||||
        a[1, :] = -inf
 | 
			
		||||
        actual = a.logsumexp(1)
 | 
			
		||||
        expected = logsumexp(a.numpy(), 1)
 | 
			
		||||
        self.assertEqual(expected.shape, actual.shape)
 | 
			
		||||
@ -1540,7 +1541,7 @@ class TestTorch(TestCase):
 | 
			
		||||
        self._test_cop(torch.mul, lambda x, y: x * y)
 | 
			
		||||
 | 
			
		||||
    def test_cpow(self):
 | 
			
		||||
        self._test_cop(torch.pow, lambda x, y: float('nan') if x < 0 else math.pow(x, y))
 | 
			
		||||
        self._test_cop(torch.pow, lambda x, y: nan if x < 0 else math.pow(x, y))
 | 
			
		||||
 | 
			
		||||
    @unittest.skipIf(not TEST_NUMPY, 'Numpy not found')
 | 
			
		||||
    def test_einsum(self):
 | 
			
		||||
@ -2416,7 +2417,7 @@ class TestTorch(TestCase):
 | 
			
		||||
        # full reduction
 | 
			
		||||
        x = torch.randn(5, 5)
 | 
			
		||||
        xn = x.numpy()
 | 
			
		||||
        for p in [1, 2, 3, 4, float('inf')]:
 | 
			
		||||
        for p in [1, 2, 3, 4, inf]:
 | 
			
		||||
            res = x.renorm(p, 1, 1)
 | 
			
		||||
            expected = x / x.norm(p, 0, keepdim=True).clamp(min=1)
 | 
			
		||||
            self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p))
 | 
			
		||||
@ -2532,9 +2533,9 @@ class TestTorch(TestCase):
 | 
			
		||||
    def test_multinomial_invalid_probs(self):
 | 
			
		||||
        test_method = TestTorch._test_multinomial_invalid_probs
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, -1]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, float('inf')]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, float('-inf')]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, float('nan')]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, inf]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, -inf]))
 | 
			
		||||
        self._spawn_method(test_method, torch.Tensor([0, nan]))
 | 
			
		||||
 | 
			
		||||
    @suppress_warnings
 | 
			
		||||
    def test_range(self):
 | 
			
		||||
@ -4672,15 +4673,15 @@ class TestTorch(TestCase):
 | 
			
		||||
        self.assertEqual(x.nelement(), all.long().sum())
 | 
			
		||||
 | 
			
		||||
    def test_isfinite(self):
 | 
			
		||||
        x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan'), -10])
 | 
			
		||||
        x = torch.Tensor([1, inf, 2, -inf, nan, -10])
 | 
			
		||||
        self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1]))
 | 
			
		||||
 | 
			
		||||
    def test_isinf(self):
 | 
			
		||||
        x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan')])
 | 
			
		||||
        x = torch.Tensor([1, inf, 2, -inf, nan])
 | 
			
		||||
        self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0]))
 | 
			
		||||
 | 
			
		||||
    def test_isnan(self):
 | 
			
		||||
        x = torch.Tensor([1, float('nan'), 2])
 | 
			
		||||
        x = torch.Tensor([1, nan, 2])
 | 
			
		||||
        self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0]))
 | 
			
		||||
 | 
			
		||||
    def test_RNGState(self):
 | 
			
		||||
@ -7418,7 +7419,7 @@ class TestTorch(TestCase):
 | 
			
		||||
        self.assertExpected(str(x), subname='negint')
 | 
			
		||||
 | 
			
		||||
        # test inf and nan
 | 
			
		||||
        x = torch.tensor([4, float('inf'), 1.5, float('-inf'), 0, float('nan'), 1])
 | 
			
		||||
        x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1])
 | 
			
		||||
        self.assertEqual(x.__repr__(), str(x))
 | 
			
		||||
        self.assertExpected(str(x), subname='nonfinite')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -413,6 +413,7 @@ class TestFFI(TestCase):
 | 
			
		||||
    @unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
 | 
			
		||||
    @unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows")
 | 
			
		||||
    def test_gpu(self):
 | 
			
		||||
        from torch.utils.cpp_extension import CUDA_HOME
 | 
			
		||||
        create_extension(
 | 
			
		||||
            name='gpulib',
 | 
			
		||||
            headers=[test_dir + '/ffi/src/cuda/cudalib.h'],
 | 
			
		||||
@ -421,6 +422,7 @@ class TestFFI(TestCase):
 | 
			
		||||
            ],
 | 
			
		||||
            with_cuda=True,
 | 
			
		||||
            verbose=False,
 | 
			
		||||
            include_dirs=[os.path.join(CUDA_HOME, 'include')],
 | 
			
		||||
        ).build()
 | 
			
		||||
        import gpulib
 | 
			
		||||
        tensor = torch.ones(2, 2).float()
 | 
			
		||||
 | 
			
		||||
@ -41,6 +41,9 @@ while [[ $# -gt 0 ]]; do
 | 
			
		||||
      --full-caffe2)
 | 
			
		||||
          FULL_CAFFE2=1
 | 
			
		||||
          ;;
 | 
			
		||||
      --cuda-static-link)
 | 
			
		||||
          CAFFE2_STATIC_LINK_CUDA=1
 | 
			
		||||
          ;;
 | 
			
		||||
      *)
 | 
			
		||||
          break
 | 
			
		||||
          ;;
 | 
			
		||||
@ -261,6 +264,7 @@ function build_caffe2() {
 | 
			
		||||
      -DBUILD_SHARED_LIBS=ON \
 | 
			
		||||
      -DONNX_NAMESPACE=$ONNX_NAMESPACE \
 | 
			
		||||
      -DUSE_CUDA=$USE_CUDA \
 | 
			
		||||
      -DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \
 | 
			
		||||
      -DUSE_ROCM=$USE_ROCM \
 | 
			
		||||
      -DUSE_NNPACK=$USE_NNPACK \
 | 
			
		||||
      -DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \
 | 
			
		||||
 | 
			
		||||
@ -25,6 +25,13 @@ import sys
 | 
			
		||||
PY2 = sys.version_info[0] == 2
 | 
			
		||||
PY3 = sys.version_info[0] == 3
 | 
			
		||||
 | 
			
		||||
if PY2:
 | 
			
		||||
    inf = float('inf')
 | 
			
		||||
    nan = float('nan')
 | 
			
		||||
else:
 | 
			
		||||
    import math
 | 
			
		||||
    inf = math.inf
 | 
			
		||||
    nan = math.nan
 | 
			
		||||
 | 
			
		||||
if PY2:
 | 
			
		||||
    string_classes = basestring
 | 
			
		||||
 | 
			
		||||
@ -1743,8 +1743,8 @@ scatter_(dim, index, src) -> Tensor
 | 
			
		||||
 | 
			
		||||
Writes all values from the tensor :attr:`src` into :attr:`self` at the indices
 | 
			
		||||
specified in the :attr:`index` tensor. For each value in :attr:`src`, its output
 | 
			
		||||
index is specified by its index in :attr:`src` for dimension != :attr:`dim` and
 | 
			
		||||
by the corresponding value in :attr:`index` for dimension = :attr:`dim`.
 | 
			
		||||
index is specified by its index in :attr:`src` for ``dimension != dim`` and by
 | 
			
		||||
the corresponding value in :attr:`index` for ``dimension = dim``.
 | 
			
		||||
 | 
			
		||||
For a 3-D tensor, :attr:`self` is updated as::
 | 
			
		||||
 | 
			
		||||
@ -1754,14 +1754,14 @@ For a 3-D tensor, :attr:`self` is updated as::
 | 
			
		||||
 | 
			
		||||
This is the reverse operation of the manner described in :meth:`~Tensor.gather`.
 | 
			
		||||
 | 
			
		||||
:attr:`self`, :attr:`index` and :attr:`src` should have same number of
 | 
			
		||||
dimensions. It is also required that `index.size(d) <= src.size(d)` for all
 | 
			
		||||
dimensions `d`, and that `index.size(d) <= self.size(d)` for all dimensions
 | 
			
		||||
`d != dim`.
 | 
			
		||||
:attr:`self`, :attr:`index` and :attr:`src` (if it is a Tensor) should have same
 | 
			
		||||
number of dimensions. It is also required that ``index.size(d) <= src.size(d)``
 | 
			
		||||
for all dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all
 | 
			
		||||
dimensions ``d != dim``.
 | 
			
		||||
 | 
			
		||||
Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be
 | 
			
		||||
between `0` and `(self.size(dim) -1)` inclusive, and all values in a row along
 | 
			
		||||
the specified dimension :attr:`dim` must be unique.
 | 
			
		||||
between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row
 | 
			
		||||
along the specified dimension :attr:`dim` must be unique.
 | 
			
		||||
 | 
			
		||||
Args:
 | 
			
		||||
    dim (int): the axis along which to index
 | 
			
		||||
@ -1785,6 +1785,50 @@ Example::
 | 
			
		||||
            [ 0.0000,  0.0000,  0.0000,  1.2300]])
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
add_docstr_all('scatter_add_',
 | 
			
		||||
               r"""
 | 
			
		||||
scatter_add_(dim, index, other) -> Tensor
 | 
			
		||||
 | 
			
		||||
Adds all values from the tensor :attr:`other` into :attr:`self` at the indices
 | 
			
		||||
specified in the :attr:`index` tensor in a similar fashion as
 | 
			
		||||
:meth:`~torch.Tensor.scatter_`. For each value in :attr:`other`, it is added to
 | 
			
		||||
an index in :attr:`self` which is specified by its index in :attr:`other`
 | 
			
		||||
for ``dimension != dim`` and by the corresponding value in :attr:`index` for
 | 
			
		||||
``dimension = dim``.
 | 
			
		||||
 | 
			
		||||
For a 3-D tensor, :attr:`self` is updated as::
 | 
			
		||||
 | 
			
		||||
    self[index[i][j][k]][j][k] += other[i][j][k]  # if dim == 0
 | 
			
		||||
    self[i][index[i][j][k]][k] += other[i][j][k]  # if dim == 1
 | 
			
		||||
    self[i][j][index[i][j][k]] += other[i][j][k]  # if dim == 2
 | 
			
		||||
 | 
			
		||||
:attr:`self`, :attr:`index` and :attr:`other` should have same number of
 | 
			
		||||
dimensions. It is also required that ``index.size(d) <= other.size(d)`` for all
 | 
			
		||||
dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all dimensions
 | 
			
		||||
``d != dim``.
 | 
			
		||||
 | 
			
		||||
Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be
 | 
			
		||||
between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row along
 | 
			
		||||
the specified dimension :attr:`dim` must be unique.
 | 
			
		||||
 | 
			
		||||
Args:
 | 
			
		||||
    dim (int): the axis along which to index
 | 
			
		||||
    index (LongTensor): the indices of elements to scatter and add
 | 
			
		||||
    other (Tensor): the source elements to scatter and add
 | 
			
		||||
 | 
			
		||||
Example::
 | 
			
		||||
 | 
			
		||||
    >>> x = torch.rand(2, 5)
 | 
			
		||||
    >>> x
 | 
			
		||||
    tensor([[0.7404, 0.0427, 0.6480, 0.3806, 0.8328],
 | 
			
		||||
            [0.7953, 0.2009, 0.9154, 0.6782, 0.9620]])
 | 
			
		||||
    >>> torch.ones(3, 5).scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x)
 | 
			
		||||
    tensor([[1.7404, 1.2009, 1.9154, 1.3806, 1.8328],
 | 
			
		||||
            [1.0000, 1.0427, 1.0000, 1.6782, 1.0000],
 | 
			
		||||
            [1.7953, 1.0000, 1.6480, 1.0000, 1.9620]])
 | 
			
		||||
 | 
			
		||||
""")
 | 
			
		||||
 | 
			
		||||
add_docstr_all('select',
 | 
			
		||||
               r"""
 | 
			
		||||
select(dim, index) -> Tensor
 | 
			
		||||
 | 
			
		||||
@ -2,6 +2,7 @@ import math
 | 
			
		||||
import torch
 | 
			
		||||
from functools import reduce
 | 
			
		||||
from sys import float_info
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class __PrinterOptions(object):
 | 
			
		||||
@ -50,7 +51,7 @@ def set_printoptions(
 | 
			
		||||
            PRINT_OPTS.linewidth = 80
 | 
			
		||||
        elif profile == "full":
 | 
			
		||||
            PRINT_OPTS.precision = 4
 | 
			
		||||
            PRINT_OPTS.threshold = float('inf')
 | 
			
		||||
            PRINT_OPTS.threshold = inf
 | 
			
		||||
            PRINT_OPTS.edgeitems = 3
 | 
			
		||||
            PRINT_OPTS.linewidth = 80
 | 
			
		||||
 | 
			
		||||
@ -101,8 +102,8 @@ class _Formatter(object):
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                copy_abs = copy.abs()
 | 
			
		||||
                pos_inf_mask = copy_abs.eq(float('inf'))
 | 
			
		||||
                neg_inf_mask = copy_abs.eq(float('-inf'))
 | 
			
		||||
                pos_inf_mask = copy_abs.eq(inf)
 | 
			
		||||
                neg_inf_mask = copy_abs.eq(-inf)
 | 
			
		||||
                nan_mask = copy_abs.ne(copy)
 | 
			
		||||
                invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask
 | 
			
		||||
                if invalid_value_mask.all():
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import nan
 | 
			
		||||
from torch.distributions import constraints
 | 
			
		||||
from torch.distributions.distribution import Distribution
 | 
			
		||||
from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all
 | 
			
		||||
@ -72,11 +73,11 @@ class Categorical(Distribution):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def mean(self):
 | 
			
		||||
        return self.probs.new_tensor(float('nan')).expand(self._extended_shape())
 | 
			
		||||
        return self.probs.new_tensor(nan).expand(self._extended_shape())
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def variance(self):
 | 
			
		||||
        return self.probs.new_tensor(float('nan')).expand(self._extended_shape())
 | 
			
		||||
        return self.probs.new_tensor(nan).expand(self._extended_shape())
 | 
			
		||||
 | 
			
		||||
    def sample(self, sample_shape=torch.Size()):
 | 
			
		||||
        sample_shape = self._extended_shape(sample_shape)
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
import math
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
from numbers import Number
 | 
			
		||||
 | 
			
		||||
import torch
 | 
			
		||||
@ -37,11 +38,11 @@ class Cauchy(Distribution):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def mean(self):
 | 
			
		||||
        return self.loc.new_tensor(float('nan')).expand(self._extended_shape())
 | 
			
		||||
        return self.loc.new_tensor(nan).expand(self._extended_shape())
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def variance(self):
 | 
			
		||||
        return self.loc.new_tensor(float('inf')).expand(self._extended_shape())
 | 
			
		||||
        return self.loc.new_tensor(inf).expand(self._extended_shape())
 | 
			
		||||
 | 
			
		||||
    def rsample(self, sample_shape=torch.Size()):
 | 
			
		||||
        shape = self._extended_shape(sample_shape)
 | 
			
		||||
 | 
			
		||||
@ -1,6 +1,7 @@
 | 
			
		||||
from numbers import Number
 | 
			
		||||
import torch
 | 
			
		||||
import math
 | 
			
		||||
from torch._six import nan
 | 
			
		||||
from torch.distributions import constraints
 | 
			
		||||
from torch.distributions.distribution import Distribution
 | 
			
		||||
from torch.distributions.gamma import Gamma
 | 
			
		||||
@ -39,13 +40,13 @@ class FisherSnedecor(Distribution):
 | 
			
		||||
    @property
 | 
			
		||||
    def mean(self):
 | 
			
		||||
        df2 = self.df2.clone()
 | 
			
		||||
        df2[df2 <= 2] = float('nan')
 | 
			
		||||
        df2[df2 <= 2] = nan
 | 
			
		||||
        return df2 / (df2 - 2)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def variance(self):
 | 
			
		||||
        df2 = self.df2.clone()
 | 
			
		||||
        df2[df2 <= 4] = float('nan')
 | 
			
		||||
        df2[df2 <= 4] = nan
 | 
			
		||||
        return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4))
 | 
			
		||||
 | 
			
		||||
    def rsample(self, sample_shape=torch.Size(())):
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
import math
 | 
			
		||||
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from torch.distributions import constraints
 | 
			
		||||
from torch.distributions.transforms import AbsTransform
 | 
			
		||||
from torch.distributions.cauchy import Cauchy
 | 
			
		||||
@ -44,7 +45,7 @@ class HalfCauchy(TransformedDistribution):
 | 
			
		||||
 | 
			
		||||
    def log_prob(self, value):
 | 
			
		||||
        log_prob = self.base_dist.log_prob(value) + math.log(2)
 | 
			
		||||
        log_prob[value.expand(log_prob.shape) < 0] = -float('inf')
 | 
			
		||||
        log_prob[value.expand(log_prob.shape) < 0] = -inf
 | 
			
		||||
        return log_prob
 | 
			
		||||
 | 
			
		||||
    def cdf(self, value):
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
import math
 | 
			
		||||
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from torch.distributions import constraints
 | 
			
		||||
from torch.distributions.transforms import AbsTransform
 | 
			
		||||
from torch.distributions.normal import Normal
 | 
			
		||||
@ -44,7 +45,7 @@ class HalfNormal(TransformedDistribution):
 | 
			
		||||
 | 
			
		||||
    def log_prob(self, value):
 | 
			
		||||
        log_prob = self.base_dist.log_prob(value) + math.log(2)
 | 
			
		||||
        log_prob[value.expand(log_prob.shape) < 0] = -float('inf')
 | 
			
		||||
        log_prob[value.expand(log_prob.shape) < 0] = -inf
 | 
			
		||||
        return log_prob
 | 
			
		||||
 | 
			
		||||
    def cdf(self, value):
 | 
			
		||||
 | 
			
		||||
@ -3,6 +3,7 @@ import warnings
 | 
			
		||||
from functools import total_ordering
 | 
			
		||||
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
 | 
			
		||||
from .bernoulli import Bernoulli
 | 
			
		||||
from .beta import Beta
 | 
			
		||||
@ -113,7 +114,7 @@ def _infinite_like(tensor):
 | 
			
		||||
    """
 | 
			
		||||
    Helper function for obtaining infinite KL Divergence throughout
 | 
			
		||||
    """
 | 
			
		||||
    return tensor.new_tensor(float('inf')).expand_as(tensor)
 | 
			
		||||
    return tensor.new_tensor(inf).expand_as(tensor)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _x_log_x(tensor):
 | 
			
		||||
@ -173,10 +174,10 @@ _euler_gamma = 0.57721566490153286060
 | 
			
		||||
@register_kl(Bernoulli, Bernoulli)
 | 
			
		||||
def _kl_bernoulli_bernoulli(p, q):
 | 
			
		||||
    t1 = p.probs * (p.probs / q.probs).log()
 | 
			
		||||
    t1[q.probs == 0] = float('inf')
 | 
			
		||||
    t1[q.probs == 0] = inf
 | 
			
		||||
    t1[p.probs == 0] = 0
 | 
			
		||||
    t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log()
 | 
			
		||||
    t2[q.probs == 1] = float('inf')
 | 
			
		||||
    t2[q.probs == 1] = inf
 | 
			
		||||
    t2[p.probs == 1] = 0
 | 
			
		||||
    return t1 + t2
 | 
			
		||||
 | 
			
		||||
@ -208,7 +209,7 @@ def _kl_binomial_binomial(p, q):
 | 
			
		||||
@register_kl(Categorical, Categorical)
 | 
			
		||||
def _kl_categorical_categorical(p, q):
 | 
			
		||||
    t = p.probs * (p.logits - q.logits)
 | 
			
		||||
    t[q.probs == 0] = float('inf')
 | 
			
		||||
    t[q.probs == 0] = inf
 | 
			
		||||
    t[p.probs == 0] = 0
 | 
			
		||||
    return t.sum(-1)
 | 
			
		||||
 | 
			
		||||
@ -322,7 +323,7 @@ def _kl_pareto_pareto(p, q):
 | 
			
		||||
    t1 = q.alpha * scale_ratio.log()
 | 
			
		||||
    t2 = -alpha_ratio.log()
 | 
			
		||||
    result = t1 + t2 + alpha_ratio - 1
 | 
			
		||||
    result[p.support.lower_bound < q.support.lower_bound] = float('inf')
 | 
			
		||||
    result[p.support.lower_bound < q.support.lower_bound] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -346,7 +347,7 @@ def _kl_transformed_transformed(p, q):
 | 
			
		||||
@register_kl(Uniform, Uniform)
 | 
			
		||||
def _kl_uniform_uniform(p, q):
 | 
			
		||||
    result = ((q.high - q.low) / (p.high - p.low)).log()
 | 
			
		||||
    result[(q.low > p.low) | (q.high < p.high)] = float('inf')
 | 
			
		||||
    result[(q.low > p.low) | (q.high < p.high)] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -392,7 +393,7 @@ def _kl_beta_normal(p, q):
 | 
			
		||||
@register_kl(Beta, Uniform)
 | 
			
		||||
def _kl_beta_uniform(p, q):
 | 
			
		||||
    result = -p.entropy() + (q.high - q.low).log()
 | 
			
		||||
    result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = float('inf')
 | 
			
		||||
    result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -543,7 +544,7 @@ def _kl_pareto_exponential(p, q):
 | 
			
		||||
    t2 = p.alpha.reciprocal()
 | 
			
		||||
    t3 = p.alpha * scale_rate_prod / (p.alpha - 1)
 | 
			
		||||
    result = t1 - t2 + t3 - 1
 | 
			
		||||
    result[p.alpha <= 1] = float('inf')
 | 
			
		||||
    result[p.alpha <= 1] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -555,7 +556,7 @@ def _kl_pareto_gamma(p, q):
 | 
			
		||||
    t3 = (1 - q.concentration) * common_term
 | 
			
		||||
    t4 = q.rate * p.alpha * p.scale / (p.alpha - 1)
 | 
			
		||||
    result = t1 + t2 + t3 + t4 - 1
 | 
			
		||||
    result[p.alpha <= 1] = float('inf')
 | 
			
		||||
    result[p.alpha <= 1] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
# TODO: Add Pareto-Laplace KL Divergence
 | 
			
		||||
@ -570,7 +571,7 @@ def _kl_pareto_normal(p, q):
 | 
			
		||||
    t3 = p.alpha * common_term.pow(2) / (p.alpha - 2)
 | 
			
		||||
    t4 = (p.alpha * common_term - q.loc).pow(2)
 | 
			
		||||
    result = t1 - t2 + (t3 + t4) / var_normal - 1
 | 
			
		||||
    result[p.alpha <= 2] = float('inf')
 | 
			
		||||
    result[p.alpha <= 2] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -588,14 +589,14 @@ def _kl_uniform_beta(p, q):
 | 
			
		||||
    t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term
 | 
			
		||||
    t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma()
 | 
			
		||||
    result = t3 + t4 - t1 - t2
 | 
			
		||||
    result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = float('inf')
 | 
			
		||||
    result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@register_kl(Uniform, Exponential)
 | 
			
		||||
def _kl_uniform_exponetial(p, q):
 | 
			
		||||
    result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log()
 | 
			
		||||
    result[p.low < q.support.lower_bound] = float('inf')
 | 
			
		||||
    result[p.low < q.support.lower_bound] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -607,7 +608,7 @@ def _kl_uniform_gamma(p, q):
 | 
			
		||||
    t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term
 | 
			
		||||
    t4 = q.rate * (p.high + p.low) / 2
 | 
			
		||||
    result = -t1 + t2 + t3 + t4
 | 
			
		||||
    result[p.low < q.support.lower_bound] = float('inf')
 | 
			
		||||
    result[p.low < q.support.lower_bound] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -638,5 +639,5 @@ def _kl_uniform_pareto(p, q):
 | 
			
		||||
    t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log()
 | 
			
		||||
    t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform
 | 
			
		||||
    result = t2 * (q.alpha + 1) - t1
 | 
			
		||||
    result[p.low < q.support.lower_bound] = float('inf')
 | 
			
		||||
    result[p.low < q.support.lower_bound] = inf
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from torch.distributions.distribution import Distribution
 | 
			
		||||
from torch.distributions import Categorical
 | 
			
		||||
from numbers import Number
 | 
			
		||||
@ -93,6 +94,6 @@ class Multinomial(Distribution):
 | 
			
		||||
        logits, value = broadcast_all(self.logits.clone(), value)
 | 
			
		||||
        log_factorial_n = torch.lgamma(value.sum(-1) + 1)
 | 
			
		||||
        log_factorial_xs = torch.lgamma(value + 1).sum(-1)
 | 
			
		||||
        logits[(value == 0) & (logits == -float('inf'))] = 0
 | 
			
		||||
        logits[(value == 0) & (logits == -inf)] = 0
 | 
			
		||||
        log_powers = (logits * value).sum(-1)
 | 
			
		||||
        return log_factorial_n - log_factorial_xs + log_powers
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
from numbers import Number
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf, nan
 | 
			
		||||
import math
 | 
			
		||||
from torch.distributions import constraints
 | 
			
		||||
from torch.distributions.distribution import Distribution
 | 
			
		||||
@ -27,15 +28,15 @@ class StudentT(Distribution):
 | 
			
		||||
    @property
 | 
			
		||||
    def mean(self):
 | 
			
		||||
        m = self.loc.clone()
 | 
			
		||||
        m[self.df <= 1] = float('nan')
 | 
			
		||||
        m[self.df <= 1] = nan
 | 
			
		||||
        return m
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def variance(self):
 | 
			
		||||
        m = self.df.clone()
 | 
			
		||||
        m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2)
 | 
			
		||||
        m[(self.df <= 2) & (self.df > 1)] = float('inf')
 | 
			
		||||
        m[self.df <= 1] = float('nan')
 | 
			
		||||
        m[(self.df <= 2) & (self.df > 1)] = inf
 | 
			
		||||
        m[self.df <= 1] = nan
 | 
			
		||||
        return m
 | 
			
		||||
 | 
			
		||||
    def __init__(self, df, loc=0., scale=1., validate_args=None):
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
import torch
 | 
			
		||||
import torch.nn.functional as F
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from operator import mul
 | 
			
		||||
from functools import reduce
 | 
			
		||||
import math
 | 
			
		||||
@ -155,7 +156,7 @@ def isfinite(tensor):
 | 
			
		||||
    """
 | 
			
		||||
    if not isinstance(tensor, torch.Tensor):
 | 
			
		||||
        raise ValueError("The argument is not a tensor", str(tensor))
 | 
			
		||||
    return (tensor == tensor) & (tensor.abs() != float('inf'))
 | 
			
		||||
    return (tensor == tensor) & (tensor.abs() != inf)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def isinf(tensor):
 | 
			
		||||
@ -174,7 +175,7 @@ def isinf(tensor):
 | 
			
		||||
    """
 | 
			
		||||
    if not isinstance(tensor, torch.Tensor):
 | 
			
		||||
        raise ValueError("The argument is not a tensor", str(tensor))
 | 
			
		||||
    return tensor.abs() == float('inf')
 | 
			
		||||
    return tensor.abs() == inf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def stft(input, n_fft, hop_length=None, win_length=None, window=None,
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,5 @@
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from .Module import Module
 | 
			
		||||
from .utils import clear
 | 
			
		||||
 | 
			
		||||
@ -34,7 +35,7 @@ class Normalize(Module):
 | 
			
		||||
        self._output.resize_as_(input)
 | 
			
		||||
 | 
			
		||||
        # specialization for the infinity norm
 | 
			
		||||
        if self.p == float('inf'):
 | 
			
		||||
        if self.p == inf:
 | 
			
		||||
            if not self._indices:
 | 
			
		||||
                self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
 | 
			
		||||
                    else torch.LongTensor()
 | 
			
		||||
@ -72,7 +73,7 @@ class Normalize(Module):
 | 
			
		||||
            self.cross = input.new()
 | 
			
		||||
        # compute diagonal term with gradOutput
 | 
			
		||||
        self._gradInput.resize_(n, d)
 | 
			
		||||
        if self.p == float('inf'):
 | 
			
		||||
        if self.p == inf:
 | 
			
		||||
                # specialization for the inf case
 | 
			
		||||
            torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput)
 | 
			
		||||
            self.buffer.resize_as_(input).zero_()
 | 
			
		||||
@ -113,7 +114,7 @@ class Normalize(Module):
 | 
			
		||||
        self._gradInput.add_(-1, self.buffer)
 | 
			
		||||
 | 
			
		||||
        # reuse cross buffer for normalization
 | 
			
		||||
        if self.p == float('inf'):
 | 
			
		||||
        if self.p == inf:
 | 
			
		||||
            torch.mul(self.norm, self.norm, out=self.cross)
 | 
			
		||||
        else:
 | 
			
		||||
            torch.mul(self.normp, self.norm, out=self.cross)
 | 
			
		||||
 | 
			
		||||
@ -1,10 +1,11 @@
 | 
			
		||||
import math
 | 
			
		||||
 | 
			
		||||
INFINITY = float('inf')
 | 
			
		||||
NAN = float('nan')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def sqrt_nothrow(x):
 | 
			
		||||
    return math.sqrt(x) if x >= 0 else float('nan')
 | 
			
		||||
    return math.sqrt(x) if x >= 0 else NAN
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cg(opfunc, x, config, state=None):
 | 
			
		||||
@ -145,7 +146,7 @@ def cg(opfunc, x, config, state=None):
 | 
			
		||||
            A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
 | 
			
		||||
            B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
 | 
			
		||||
            _denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3))
 | 
			
		||||
            z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan')
 | 
			
		||||
            z2 = -d2 * z3 * z3 / _denom if _denom != 0 else NAN
 | 
			
		||||
 | 
			
		||||
            if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0:
 | 
			
		||||
                if limit < -0.5:
 | 
			
		||||
 | 
			
		||||
@ -523,7 +523,7 @@ class BCEWithLogitsLoss(_Loss):
 | 
			
		||||
    :math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision.
 | 
			
		||||
 | 
			
		||||
    For example, if a dataset contains 100 positive and 300 negative examples of a single class,
 | 
			
		||||
    then `pos_weight` for the class should be equal to math:`\frac{300}{100}=3`.
 | 
			
		||||
    then `pos_weight` for the class should be equal to :math:`\frac{300}{100}=3`.
 | 
			
		||||
    The loss would act as if the dataset contains math:`3\times 100=300` positive examples.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
 | 
			
		||||
@ -691,7 +691,7 @@ class _LPPoolNd(Module):
 | 
			
		||||
        self.ceil_mode = ceil_mode
 | 
			
		||||
 | 
			
		||||
    def extra_repr(self):
 | 
			
		||||
        return 'norm_type={norm_type}, kernel_size{kernel_size}, stride={stride}, ' \
 | 
			
		||||
        return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \
 | 
			
		||||
            'ceil_mode={ceil_mode}'.format(**self.__dict__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -1,5 +1,6 @@
 | 
			
		||||
import warnings
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def clip_grad_norm_(parameters, max_norm, norm_type=2):
 | 
			
		||||
@ -23,7 +24,7 @@ def clip_grad_norm_(parameters, max_norm, norm_type=2):
 | 
			
		||||
    parameters = list(filter(lambda p: p.grad is not None, parameters))
 | 
			
		||||
    max_norm = float(max_norm)
 | 
			
		||||
    norm_type = float(norm_type)
 | 
			
		||||
    if norm_type == float('inf'):
 | 
			
		||||
    if norm_type == inf:
 | 
			
		||||
        total_norm = max(p.grad.data.abs().max() for p in parameters)
 | 
			
		||||
    else:
 | 
			
		||||
        total_norm = 0
 | 
			
		||||
 | 
			
		||||
@ -1,4 +1,6 @@
 | 
			
		||||
import math
 | 
			
		||||
import torch
 | 
			
		||||
from torch._six import inf
 | 
			
		||||
from bisect import bisect_right
 | 
			
		||||
from functools import partial
 | 
			
		||||
from .optimizer import Optimizer
 | 
			
		||||
@ -367,9 +369,9 @@ class ReduceLROnPlateau(object):
 | 
			
		||||
            raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
 | 
			
		||||
 | 
			
		||||
        if mode == 'min':
 | 
			
		||||
            self.mode_worse = float('inf')
 | 
			
		||||
            self.mode_worse = inf
 | 
			
		||||
        else:  # mode == 'max':
 | 
			
		||||
            self.mode_worse = (-float('inf'))
 | 
			
		||||
            self.mode_worse = -inf
 | 
			
		||||
 | 
			
		||||
        self.is_better = partial(self._cmp, mode, threshold_mode, threshold)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@ -65,6 +65,10 @@ CUDA_HOME = _find_cuda_home()
 | 
			
		||||
BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_binary_build():
 | 
			
		||||
    return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def check_compiler_abi_compatibility(compiler):
 | 
			
		||||
    '''
 | 
			
		||||
    Verifies that the given compiler is ABI-compatible with PyTorch.
 | 
			
		||||
@ -77,7 +81,7 @@ def check_compiler_abi_compatibility(compiler):
 | 
			
		||||
        False if the compiler is (likely) ABI-incompatible with PyTorch,
 | 
			
		||||
        else True.
 | 
			
		||||
    '''
 | 
			
		||||
    if BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__):
 | 
			
		||||
    if not is_binary_build():
 | 
			
		||||
        return True
 | 
			
		||||
    try:
 | 
			
		||||
        check_cmd = '{}' if sys.platform == 'win32' else '{} --version'
 | 
			
		||||
@ -134,6 +138,7 @@ class BuildExtension(build_ext):
 | 
			
		||||
        self._check_abi()
 | 
			
		||||
        for extension in self.extensions:
 | 
			
		||||
            self._define_torch_extension_name(extension)
 | 
			
		||||
            self._add_gnu_abi_flag_if_binary(extension)
 | 
			
		||||
 | 
			
		||||
        # Register .cu and .cuh as valid source extensions.
 | 
			
		||||
        self.compiler.src_extensions += ['.cu', '.cuh']
 | 
			
		||||
@ -266,6 +271,21 @@ class BuildExtension(build_ext):
 | 
			
		||||
        else:
 | 
			
		||||
            extension.extra_compile_args.append(define)
 | 
			
		||||
 | 
			
		||||
    def _add_gnu_abi_flag_if_binary(self, extension):
 | 
			
		||||
        # If the version string looks like a binary build,
 | 
			
		||||
        # we know that PyTorch was compiled with gcc 4.9.2.
 | 
			
		||||
        # if the extension is compiled with gcc >= 5.1,
 | 
			
		||||
        # then we have to define _GLIBCXX_USE_CXX11_ABI=0
 | 
			
		||||
        # so that the std::string in the API is resolved to
 | 
			
		||||
        # non-C++11 symbols
 | 
			
		||||
        define = '-D_GLIBCXX_USE_CXX11_ABI=0'
 | 
			
		||||
        if is_binary_build():
 | 
			
		||||
            if isinstance(extension.extra_compile_args, dict):
 | 
			
		||||
                for args in extension.extra_compile_args.values():
 | 
			
		||||
                    args.append(define)
 | 
			
		||||
            else:
 | 
			
		||||
                extension.extra_compile_args.append(define)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def CppExtension(name, sources, *args, **kwargs):
 | 
			
		||||
    '''
 | 
			
		||||
@ -785,6 +805,9 @@ def _write_ninja_file(path,
 | 
			
		||||
    common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
 | 
			
		||||
    common_cflags += ['-I{}'.format(include) for include in includes]
 | 
			
		||||
 | 
			
		||||
    if is_binary_build():
 | 
			
		||||
        common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0']
 | 
			
		||||
 | 
			
		||||
    cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
 | 
			
		||||
    if sys.platform == 'win32':
 | 
			
		||||
        from distutils.spawn import _nt_quote_args
 | 
			
		||||
 | 
			
		||||
		Reference in New Issue
	
	Block a user