mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 21:49:24 +08:00
Compare commits
17 Commits
mlazos/bac
...
v0.4.1
Author | SHA1 | Date | |
---|---|---|---|
a24163a95e | |||
f08f222db3 | |||
8f916179f8 | |||
7b7e6dbfa7 | |||
84b8c1c357 | |||
b595c3e9ca | |||
6ecc275272 | |||
f34528a723 | |||
2edf053549 | |||
76c16a5a64 | |||
f6fac92692 | |||
bb60c97805 | |||
886a367247 | |||
416c8ef1d1 | |||
2fbbe42a30 | |||
f07e550b08 | |||
3684cc4e52 |
@ -151,10 +151,6 @@ endif()
|
||||
# ---[ CMake scripts + modules
|
||||
list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)
|
||||
|
||||
if (MSVC AND ${BUILD_SHARED_LIBS})
|
||||
set(CMAKE_WINDOWS_EXPORT_ALL_SYMBOLS ON)
|
||||
endif()
|
||||
|
||||
# ---[ CMake build directories
|
||||
set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
|
||||
|
@ -13,7 +13,7 @@ else()
|
||||
cmake_dependent_option(
|
||||
USE_CUDNN "Use cuDNN" ON
|
||||
"USE_CUDA" OFF)
|
||||
option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
|
||||
option(ATEN_NO_TEST "Do not build ATen test binaries" ON)
|
||||
|
||||
# Flag for shared dependencies
|
||||
set(BUILD_ATEN ON)
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include <ATen/optional.h>
|
||||
#include <ATen/Backtrace.h>
|
||||
|
||||
#include <functional>
|
||||
#include <memory>
|
||||
|
@ -4,9 +4,11 @@
|
||||
#include <string>
|
||||
#include <typeinfo>
|
||||
|
||||
#include <ATen/ATenGeneral.h>
|
||||
|
||||
namespace at {
|
||||
/// Utility to demangle a C++ symbol name.
|
||||
std::string demangle(const char* name);
|
||||
AT_API std::string demangle(const char* name);
|
||||
|
||||
/// Returns the printable name of the type.
|
||||
template <typename T>
|
||||
@ -19,7 +21,7 @@ inline const char* demangle_type() {
|
||||
#endif // __GXX_RTTI
|
||||
}
|
||||
|
||||
std::string get_backtrace(
|
||||
AT_API std::string get_backtrace(
|
||||
size_t frames_to_skip = 0,
|
||||
size_t maximum_number_of_frames = 64,
|
||||
bool skip_python_frames = true);
|
||||
|
@ -250,6 +250,7 @@ IF(USE_CUDA AND NOT USE_ROCM)
|
||||
ENDIF(USE_MAGMA)
|
||||
IF ($ENV{ATEN_STATIC_CUDA})
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libculibos.a")
|
||||
list(APPEND ATen_CUDA_DEPENDENCY_LIBS "${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcudart_static.a")
|
||||
ENDIF($ENV{ATEN_STATIC_CUDA})
|
||||
ENDIF()
|
||||
|
||||
@ -405,11 +406,11 @@ ENDFOREACH()
|
||||
INSTALL(FILES ${CMAKE_BINARY_DIR}/aten/src/ATen/Declarations.yaml
|
||||
DESTINATION ${AT_INSTALL_SHARE_DIR}/ATen)
|
||||
|
||||
if(ATEN_NO_TEST)
|
||||
message("disable test because ATEN_NO_TEST is set")
|
||||
else()
|
||||
add_subdirectory(test)
|
||||
endif()
|
||||
# if(ATEN_NO_TEST)
|
||||
# message("disable test because ATEN_NO_TEST is set")
|
||||
# else()
|
||||
# add_subdirectory(test)
|
||||
# endif()
|
||||
|
||||
if (NOT CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO)
|
||||
foreach(test_src ${ATen_CPU_TEST_SRCS})
|
||||
|
@ -3,6 +3,8 @@
|
||||
#include <cstdint>
|
||||
#include <utility>
|
||||
|
||||
#include <ATen/ATenGeneral.h>
|
||||
|
||||
/*
|
||||
* A CUDA stream interface with no CUDA build dependency.
|
||||
*
|
||||
@ -25,27 +27,27 @@ namespace detail {
|
||||
|
||||
// Pointer-based API (for internal use)
|
||||
// Note: ATen/Context is preferred to work with streams safely
|
||||
CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device);
|
||||
CUDAStreamInternals* CUDAStream_getDefaultStream();
|
||||
AT_API CUDAStreamInternals* CUDAStream_getDefaultStreamOnDevice(int64_t device);
|
||||
AT_API CUDAStreamInternals* CUDAStream_getDefaultStream();
|
||||
|
||||
CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority);
|
||||
AT_API CUDAStreamInternals* CUDAStream_createAndRetainWithOptions(int32_t flags, int32_t priority);
|
||||
|
||||
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device);
|
||||
CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream();
|
||||
AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStreamOnDevice(int64_t device);
|
||||
AT_API CUDAStreamInternals* CUDAStream_getAndRetainCurrentStream();
|
||||
|
||||
// Note: these Unsafe gets should NEVER be used and are only here for legacy
|
||||
// purposes. Once those uses are gone they should be removed.
|
||||
CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device);
|
||||
CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe();
|
||||
AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamOnDeviceUnsafe(int64_t device);
|
||||
AT_API CUDAStreamInternals* CUDAStream_getCurrentStreamUnsafe();
|
||||
|
||||
void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals);
|
||||
void CUDAStream_setStream(CUDAStreamInternals* internals);
|
||||
AT_API void CUDAStream_setStreamOnDevice(int64_t device, CUDAStreamInternals* internals);
|
||||
AT_API void CUDAStream_setStream(CUDAStreamInternals* internals);
|
||||
|
||||
cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
|
||||
int64_t CUDAStream_device(CUDAStreamInternals*);
|
||||
AT_API cudaStream_t CUDAStream_stream(CUDAStreamInternals*);
|
||||
AT_API int64_t CUDAStream_device(CUDAStreamInternals*);
|
||||
|
||||
bool CUDAStream_retain(CUDAStreamInternals*);
|
||||
void CUDAStream_free(CUDAStreamInternals*&);
|
||||
AT_API bool CUDAStream_retain(CUDAStreamInternals*);
|
||||
AT_API void CUDAStream_free(CUDAStreamInternals*&);
|
||||
|
||||
} // namespace detail
|
||||
|
||||
@ -64,10 +66,10 @@ struct CUDAStream {
|
||||
~CUDAStream() { detail::CUDAStream_free(internals_); }
|
||||
|
||||
// Copy constructor
|
||||
CUDAStream(const CUDAStream& other);
|
||||
AT_API CUDAStream(const CUDAStream& other);
|
||||
|
||||
// Move constructor
|
||||
CUDAStream(CUDAStream&& other);
|
||||
AT_API CUDAStream(CUDAStream&& other);
|
||||
|
||||
// Assignment operator
|
||||
CUDAStream& operator=(CUDAStream other) {
|
||||
|
@ -111,8 +111,8 @@ struct Device {
|
||||
};
|
||||
} // namespace at
|
||||
|
||||
std::ostream& operator<<(std::ostream& stream, at::Device::Type type);
|
||||
std::ostream& operator<<(std::ostream& stream, const at::Device& device);
|
||||
AT_API std::ostream& operator<<(std::ostream& stream, at::Device::Type type);
|
||||
AT_API std::ostream& operator<<(std::ostream& stream, const at::Device& device);
|
||||
|
||||
namespace std {
|
||||
template<> struct hash<at::Device>
|
||||
|
@ -43,7 +43,7 @@
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Long, int64_t, __VA_ARGS__) \
|
||||
AT_PRIVATE_CASE_TYPE(at::ScalarType::Short, int16_t, __VA_ARGS__) \
|
||||
default: \
|
||||
AT_ERROR("%s not implemented for '%s'", (NAME), the_type.toString()); \
|
||||
AT_ERROR(#NAME, " not implemented for '", the_type.toString(), "'"); \
|
||||
} \
|
||||
}()
|
||||
|
||||
|
@ -35,8 +35,8 @@ namespace at {
|
||||
|
||||
namespace detail {
|
||||
|
||||
float halfbits2float(unsigned short bits);
|
||||
unsigned short float2halfbits(float value);
|
||||
AT_API float halfbits2float(unsigned short bits);
|
||||
AT_API unsigned short float2halfbits(float value);
|
||||
|
||||
}
|
||||
|
||||
|
@ -33,6 +33,8 @@
|
||||
#include <type_traits>
|
||||
#include <utility>
|
||||
|
||||
#include <ATen/ATenGeneral.h>
|
||||
|
||||
#if __GNUG__ && __GNUC__ < 5
|
||||
#define AT_IS_TRIVIALLY_COPYABLE(T) __has_trivial_copy(T)
|
||||
#else
|
||||
@ -57,7 +59,7 @@ static inline uint64_t NextPowerOf2(uint64_t A) {
|
||||
}
|
||||
|
||||
/// This is all the non-templated stuff common to all SmallVectors.
|
||||
class SmallVectorBase {
|
||||
class AT_API SmallVectorBase {
|
||||
protected:
|
||||
void *BeginX, *EndX, *CapacityX;
|
||||
|
||||
|
@ -5,7 +5,7 @@
|
||||
#include "ATen/Error.h"
|
||||
|
||||
namespace at {
|
||||
struct SparseTensorImpl : public TensorImpl {
|
||||
struct AT_API SparseTensorImpl : public TensorImpl {
|
||||
// Stored in COO format, indices + values.
|
||||
|
||||
// Ideal INVARIANTS:
|
||||
|
@ -19,7 +19,7 @@ namespace at {
|
||||
/// `torch::TensorOptions` subclass of this `TensorOptions`, which changes
|
||||
/// `type()` to return a variable type instead of a tensor type, such that
|
||||
/// variables are created inside factory methods, instead of tensors.
|
||||
struct TensorOptions {
|
||||
struct AT_API TensorOptions {
|
||||
TensorOptions() : TensorOptions(/*use_thread_local_default_options=*/true) {}
|
||||
|
||||
/// Constructs the `TensorOptions` with defaults taken from the thread local
|
||||
|
@ -143,7 +143,7 @@ static inline ${return_type} ${api_name}(${formals}) {
|
||||
""")
|
||||
# add a native declaration for a native function
|
||||
NATIVE_DECLARATION = CodeTemplate("""\
|
||||
${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
|
||||
AT_API ${return_type} ${native_type_method_dispatch}(${formals_with_defaults});
|
||||
""")
|
||||
|
||||
# special method definition for factory functions in Functions.h
|
||||
|
@ -35,11 +35,14 @@
|
||||
#ifdef _WIN32
|
||||
# if defined(ATen_cpu_EXPORTS) || defined(caffe2_EXPORTS)
|
||||
# define TH_API TH_EXTERNC __declspec(dllexport)
|
||||
# define TH_CPP_API extern __declspec(dllexport)
|
||||
# else
|
||||
# define TH_API TH_EXTERNC __declspec(dllimport)
|
||||
# define TH_CPP_API extern __declspec(dllimport)
|
||||
# endif
|
||||
#else
|
||||
# define TH_API TH_EXTERNC
|
||||
# define TH_CPP_API extern
|
||||
#endif
|
||||
|
||||
#ifdef _WIN32
|
||||
|
@ -69,18 +69,18 @@ TH_API THStorage* THStorage_newWithSize(at::ScalarType scalar_type, ptrdiff_t si
|
||||
TH_API THStorage* THStorage_newWithAllocator(at::ScalarType scalar_type, ptrdiff_t size,
|
||||
at::Allocator *allocator);
|
||||
|
||||
ptrdiff_t THStorage_size(const THStorage *self);
|
||||
size_t THStorage_elementSize();
|
||||
THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags);
|
||||
void THStorage_setFlag(THStorage *storage, const char flag);
|
||||
void THStorage_clearFlag(THStorage *storage, const char flag);
|
||||
void THStorage_retain(THStorage *storage);
|
||||
THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type,
|
||||
at::DataPtr&& data, ptrdiff_t size,
|
||||
at::Allocator* allocator);
|
||||
void THStorage_resize(THStorage *storage, ptrdiff_t size);
|
||||
void THStorage_swap(THStorage *storage1, THStorage *storage2);
|
||||
TH_API ptrdiff_t THStorage_size(const THStorage *self);
|
||||
TH_API size_t THStorage_elementSize();
|
||||
TH_API THStorage* THStorage_newWithMapping(at::ScalarType scalar_type, const char *filename, ptrdiff_t size, int flags);
|
||||
TH_API void THStorage_setFlag(THStorage *storage, const char flag);
|
||||
TH_API void THStorage_clearFlag(THStorage *storage, const char flag);
|
||||
TH_API void THStorage_retain(THStorage *storage);
|
||||
TH_API THStorage* THStorage_newWithDataAndAllocator(at::ScalarType scalar_type,
|
||||
at::DataPtr&& data, ptrdiff_t size,
|
||||
at::Allocator* allocator);
|
||||
TH_API void THStorage_resize(THStorage *storage, ptrdiff_t size);
|
||||
TH_API void THStorage_swap(THStorage *storage1, THStorage *storage2);
|
||||
|
||||
void THStorage_weakRetain(THStorage *weak_storage);
|
||||
void THStorage_weakFree(THStorage *weak_storage);
|
||||
THStorage* THStorage_weakLock(THStorage *weak_storage);
|
||||
TH_API void THStorage_weakRetain(THStorage *weak_storage);
|
||||
TH_API void THStorage_weakFree(THStorage *weak_storage);
|
||||
TH_API THStorage* THStorage_weakLock(THStorage *weak_storage);
|
||||
|
@ -83,5 +83,5 @@ struct THTensor
|
||||
#include "THGenerateAllTypes.h"
|
||||
|
||||
TH_API void THTensor_free(THTensor *self);
|
||||
at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride,
|
||||
at::IntList newshape);
|
||||
TH_CPP_API at::optional<std::vector<int64_t>> THTensor_compute_stride(at::IntList oldshape, at::IntList oldstride,
|
||||
at::IntList newshape);
|
||||
|
@ -6,9 +6,9 @@ static inline void THNN_(Col2Im_shapeCheck)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
int outputHeight, int outputWidth,
|
||||
int kH, int kW, int dH, int dW,
|
||||
int padH, int padW, int sH, int sW) {
|
||||
int64_t outputHeight, int64_t outputWidth,
|
||||
int64_t kH, int64_t kW, int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
|
||||
|
||||
THArgCheck(kW > 0 && kH > 0, 6,
|
||||
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
|
||||
@ -17,7 +17,7 @@ static inline void THNN_(Col2Im_shapeCheck)(
|
||||
THArgCheck(dW > 0 && dH > 0, 8,
|
||||
"dilation should be greater than zero, but got dH: %d dW: %d", dH, dW);
|
||||
|
||||
int ndim = THCTensor_(nDimension)(state, input);
|
||||
int64_t ndim = THCTensor_(nDimension)(state, input);
|
||||
THCUNN_argCheck(state, !input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
|
||||
"Expected non-empty 2D or 3D input tensor, but got input of shape %s");
|
||||
|
||||
@ -54,11 +54,11 @@ void THNN_(Col2Im_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
int outputHeight, int outputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t outputHeight, int64_t outputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THCUNN_assertSameGPU(state, 2, input, output);
|
||||
|
||||
@ -84,10 +84,10 @@ void THNN_(Col2Im_updateOutput)(
|
||||
THCTensor *input_n = THCTensor_(new)(state);
|
||||
THCTensor *output_n = THCTensor_(new)(state);
|
||||
|
||||
int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
|
||||
for (int elt = 0; elt < batchSize; elt++) {
|
||||
for (int64_t elt = 0; elt < batchSize; elt++) {
|
||||
THCTensor_(select)(state, input_n, input, 0, elt);
|
||||
THCTensor_(select)(state, output_n, output, 0, elt);
|
||||
|
||||
@ -116,10 +116,10 @@ void THNN_(Col2Im_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
|
||||
kH, kW, dH, dW, padH, padW, sH, sW);
|
||||
|
@ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *gradOutput,
|
||||
int kH, int kW, int dH, int dW,
|
||||
int padH, int padW, int sH, int sW) {
|
||||
int64_t kH, int64_t kW, int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
|
||||
|
||||
THArgCheck(kW > 0 && kH > 0, 4,
|
||||
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
|
||||
@ -18,7 +18,7 @@ static inline void THNN_(Im2Col_shapeCheck)(
|
||||
THArgCheck(sW > 0 && sH > 0, 10,
|
||||
"stride should be greater than zero, but got sH: %d sW: %d", sH, sW);
|
||||
|
||||
int ndim = THCTensor_(nDimension)(state, input);
|
||||
int64_t ndim = THCTensor_(nDimension)(state, input);
|
||||
THCUNN_argCheck(state, !input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
|
||||
"Expected non-empty 3D or 4D input tensor, but got input of shape %s");
|
||||
|
||||
@ -26,11 +26,11 @@ static inline void THNN_(Im2Col_shapeCheck)(
|
||||
if (ndim == 3) {
|
||||
dim_batch = -1;
|
||||
}
|
||||
int nInputPlane = THCTensor_(size)(state, input, dim_batch + 1);
|
||||
int inputHeight = THCTensor_(size)(state, input, dim_batch + 2);
|
||||
int inputWidth = THCTensor_(size)(state, input, dim_batch + 3);
|
||||
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int64_t nInputPlane = THCTensor_(size)(state, input, dim_batch + 1);
|
||||
int64_t inputHeight = THCTensor_(size)(state, input, dim_batch + 2);
|
||||
int64_t inputWidth = THCTensor_(size)(state, input, dim_batch + 3);
|
||||
int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
|
||||
if (outputHeight < 1 || outputWidth < 1) {
|
||||
THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), "
|
||||
@ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THCUNN_assertSameGPU(state, 2, input, output);
|
||||
|
||||
@ -62,15 +62,15 @@ void THNN_(Im2Col_updateOutput)(
|
||||
THCTensor_(resize4d)(state, input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
}
|
||||
|
||||
int batchSize = THCTensor_(size)(state, input, 0);
|
||||
int nInputPlane = THCTensor_(size)(state, input, 1);
|
||||
int inputHeight = THCTensor_(size)(state, input, 2);
|
||||
int inputWidth = THCTensor_(size)(state, input, 3);
|
||||
int64_t batchSize = THCTensor_(size)(state, input, 0);
|
||||
int64_t nInputPlane = THCTensor_(size)(state, input, 1);
|
||||
int64_t inputHeight = THCTensor_(size)(state, input, 2);
|
||||
int64_t inputWidth = THCTensor_(size)(state, input, 3);
|
||||
|
||||
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int nOutputPlane = nInputPlane * kW * kH;
|
||||
int outputLength = outputHeight * outputWidth;
|
||||
int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int64_t nOutputPlane = nInputPlane * kW * kH;
|
||||
int64_t outputLength = outputHeight * outputWidth;
|
||||
|
||||
THCTensor_(resize3d)(state, output, batchSize, nOutputPlane, outputLength);
|
||||
THCTensor_(zero)(state, output);
|
||||
@ -78,7 +78,7 @@ void THNN_(Im2Col_updateOutput)(
|
||||
THCTensor *input_n = THCTensor_(new)(state);
|
||||
THCTensor *output_n = THCTensor_(new)(state);
|
||||
|
||||
for (int elt = 0; elt < batchSize; elt++) {
|
||||
for (int64_t elt = 0; elt < batchSize; elt++) {
|
||||
THCTensor_(select)(state, input_n, input, 0, elt);
|
||||
THCTensor_(select)(state, output_n, output, 0, elt);
|
||||
|
||||
@ -104,11 +104,11 @@ void THNN_(Im2Col_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
int inputHeight, int inputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t inputHeight, int64_t inputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput,
|
||||
inputHeight, inputWidth,
|
||||
|
@ -183,39 +183,39 @@ THC_API void THNN_(Im2Col_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
THC_API void THNN_(Im2Col_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
int inputHeight, int inputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t inputHeight, int64_t inputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
THC_API void THNN_(Col2Im_updateOutput)(
|
||||
THCState *state,
|
||||
THCTensor *input,
|
||||
THCTensor *output,
|
||||
int outputHeight, int outputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t outputHeight, int64_t outputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
THC_API void THNN_(Col2Im_updateGradInput)(
|
||||
THCState *state,
|
||||
THCTensor *gradOutput,
|
||||
THCTensor *gradInput,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
THC_API void THNN_(LeakyReLU_updateOutput)(
|
||||
THCState *state,
|
||||
|
@ -8,28 +8,28 @@
|
||||
// (borrowed from Caffe: https://github.com/BVLC/caffe/blob/master/src/caffe/layers/conv_layer.cu)
|
||||
template <typename Dtype>
|
||||
__launch_bounds__(CUDA_NUM_THREADS)
|
||||
__global__ void im2col_kernel(const int n, const Dtype* data_im,
|
||||
const int height, const int width,
|
||||
const int ksize_h, const int ksize_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w,
|
||||
const int height_col, const int width_col,
|
||||
__global__ void im2col_kernel(const int64_t n, const Dtype* data_im,
|
||||
const int64_t height, const int64_t width,
|
||||
const int64_t ksize_h, const int64_t ksize_w,
|
||||
const int64_t pad_h, const int64_t pad_w,
|
||||
const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w,
|
||||
const int64_t height_col, const int64_t width_col,
|
||||
Dtype* data_col) {
|
||||
CUDA_KERNEL_LOOP(index, n) {
|
||||
int w_out = index % width_col;
|
||||
int64_t w_out = index % width_col;
|
||||
index /= width_col;
|
||||
int h_out = index % height_col;
|
||||
int channel_in = index / height_col;
|
||||
int channel_out = channel_in * ksize_h * ksize_w;
|
||||
int h_in = h_out * stride_h - pad_h;
|
||||
int w_in = w_out * stride_w - pad_w;
|
||||
int64_t h_out = index % height_col;
|
||||
int64_t channel_in = index / height_col;
|
||||
int64_t channel_out = channel_in * ksize_h * ksize_w;
|
||||
int64_t h_in = h_out * stride_h - pad_h;
|
||||
int64_t w_in = w_out * stride_w - pad_w;
|
||||
data_col += (channel_out * height_col + h_out) * width_col + w_out;
|
||||
data_im += (channel_in * height + h_in) * width + w_in;
|
||||
for (int i = 0; i < ksize_h; ++i) {
|
||||
for (int j = 0; j < ksize_w; ++j) {
|
||||
int h = h_in + i * dilation_h;
|
||||
int w = w_in + j * dilation_w;
|
||||
for (int64_t i = 0; i < ksize_h; ++i) {
|
||||
for (int64_t j = 0; j < ksize_w; ++j) {
|
||||
int64_t h = h_in + i * dilation_h;
|
||||
int64_t w = w_in + j * dilation_w;
|
||||
*data_col = (h >= 0 && w >= 0 && h < height && w < width) ?
|
||||
data_im[i * dilation_h * width + j * dilation_w] : ScalarConvert<int, Dtype>::to(0);
|
||||
data_col += height_col * width_col;
|
||||
@ -39,15 +39,15 @@ __global__ void im2col_kernel(const int n, const Dtype* data_im,
|
||||
}
|
||||
|
||||
template <typename Dtype>
|
||||
void im2col(cudaStream_t stream, const Dtype* data_im, const int channels,
|
||||
const int height, const int width,
|
||||
const int height_col, const int width_col,
|
||||
const int ksize_h, const int ksize_w, const int pad_h,
|
||||
const int pad_w, const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w, Dtype* data_col) {
|
||||
void im2col(cudaStream_t stream, const Dtype* data_im, const int64_t channels,
|
||||
const int64_t height, const int64_t width,
|
||||
const int64_t height_col, const int64_t width_col,
|
||||
const int64_t ksize_h, const int64_t ksize_w, const int64_t pad_h,
|
||||
const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w, Dtype* data_col) {
|
||||
// We are going to launch channels * height_col * width_col kernels, each
|
||||
// kernel responsible for copying a single-channel grid.
|
||||
int num_kernels = channels * height_col * width_col;
|
||||
int64_t num_kernels = channels * height_col * width_col;
|
||||
// Launch
|
||||
im2col_kernel <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> (
|
||||
num_kernels, data_im, height, width, ksize_h, ksize_w,
|
||||
@ -60,37 +60,37 @@ void im2col(cudaStream_t stream, const Dtype* data_im, const int channels,
|
||||
|
||||
template <typename Dtype, typename Acctype>
|
||||
__launch_bounds__(CUDA_NUM_THREADS)
|
||||
__global__ void col2im_kernel(const int n, const Dtype* data_col,
|
||||
const int height, const int width, const int channels,
|
||||
const int kernel_h, const int kernel_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w,
|
||||
const int height_col, const int width_col,
|
||||
__global__ void col2im_kernel(const int64_t n, const Dtype* data_col,
|
||||
const int64_t height, const int64_t width, const int64_t channels,
|
||||
const int64_t kernel_h, const int64_t kernel_w,
|
||||
const int64_t pad_h, const int64_t pad_w,
|
||||
const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w,
|
||||
const int64_t height_col, const int64_t width_col,
|
||||
Dtype* data_im) {
|
||||
CUDA_KERNEL_LOOP(index, n) {
|
||||
Acctype val = Acctype(0);
|
||||
const int w_im = index % width + pad_w;
|
||||
const int h_im = (index / width) % height + pad_h;
|
||||
const int c_im = index / (width * height);
|
||||
int kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
|
||||
int kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
|
||||
const int64_t w_im = index % width + pad_w;
|
||||
const int64_t h_im = (index / width) % height + pad_h;
|
||||
const int64_t c_im = index / (width * height);
|
||||
int64_t kernel_extent_w = (kernel_w - 1) * dilation_w + 1;
|
||||
int64_t kernel_extent_h = (kernel_h - 1) * dilation_h + 1;
|
||||
// compute the start and end of the output
|
||||
const int w_col_start =
|
||||
const int64_t w_col_start =
|
||||
(w_im < kernel_extent_w) ? 0 : (w_im - kernel_extent_w) / stride_w + 1;
|
||||
const int w_col_end = min(w_im / stride_w + 1, width_col);
|
||||
const int h_col_start =
|
||||
const int64_t w_col_end = min(w_im / stride_w + 1, width_col);
|
||||
const int64_t h_col_start =
|
||||
(h_im < kernel_extent_h) ? 0 : (h_im - kernel_extent_h) / stride_h + 1;
|
||||
const int h_col_end = min(h_im / stride_h + 1, height_col);
|
||||
const int64_t h_col_end = min(h_im / stride_h + 1, height_col);
|
||||
// TODO: use LCM of stride and dilation to avoid unnecessary loops
|
||||
for (int h_col = h_col_start; h_col < h_col_end; h_col += 1) {
|
||||
for (int w_col = w_col_start; w_col < w_col_end; w_col += 1) {
|
||||
int h_k = (h_im - h_col * stride_h);
|
||||
int w_k = (w_im - w_col * stride_w);
|
||||
for (int64_t h_col = h_col_start; h_col < h_col_end; h_col += 1) {
|
||||
for (int64_t w_col = w_col_start; w_col < w_col_end; w_col += 1) {
|
||||
int64_t h_k = (h_im - h_col * stride_h);
|
||||
int64_t w_k = (w_im - w_col * stride_w);
|
||||
if (h_k % dilation_h == 0 && w_k % dilation_w == 0) {
|
||||
h_k /= dilation_h;
|
||||
w_k /= dilation_w;
|
||||
int data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
|
||||
int64_t data_col_index = (((c_im * kernel_h + h_k) * kernel_w + w_k) *
|
||||
height_col + h_col) * width_col + w_col;
|
||||
val += data_col[data_col_index];
|
||||
}
|
||||
@ -101,21 +101,21 @@ __global__ void col2im_kernel(const int n, const Dtype* data_col,
|
||||
}
|
||||
|
||||
template <typename Dtype, typename Acctype>
|
||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int channels,
|
||||
const int height, const int width,
|
||||
const int output_height, const int output_width,
|
||||
const int patch_h, const int patch_w, const int pad_h,
|
||||
const int pad_w, const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w, Dtype* data_im);
|
||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels,
|
||||
const int64_t height, const int64_t width,
|
||||
const int64_t output_height, const int64_t output_width,
|
||||
const int64_t patch_h, const int64_t patch_w, const int64_t pad_h,
|
||||
const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im);
|
||||
|
||||
template <typename Dtype, typename Acctype>
|
||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int channels,
|
||||
const int height, const int width,
|
||||
const int output_height, const int output_width,
|
||||
const int patch_h, const int patch_w, const int pad_h,
|
||||
const int pad_w, const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w, Dtype* data_im) {
|
||||
int num_kernels = channels * height * width;
|
||||
void col2im(cudaStream_t stream, const Dtype* data_col, const int64_t channels,
|
||||
const int64_t height, const int64_t width,
|
||||
const int64_t output_height, const int64_t output_width,
|
||||
const int64_t patch_h, const int64_t patch_w, const int64_t pad_h,
|
||||
const int64_t pad_w, const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w, Dtype* data_im) {
|
||||
int64_t num_kernels = channels * height * width;
|
||||
// To avoid involving atomic operations, we will launch one kernel per
|
||||
// bottom dimension, and then in the kernel add up the top dimensions.
|
||||
col2im_kernel<Dtype, Acctype> <<<GET_BLOCKS(num_kernels), CUDA_NUM_THREADS, 0, stream>>> (
|
||||
|
@ -54,25 +54,25 @@
|
||||
//
|
||||
// ALSO do vol2col
|
||||
|
||||
static void THNN_(im2col)(const real* data_im, const int channels,
|
||||
const int height, const int width,
|
||||
const int output_height, const int output_width,
|
||||
const int kernel_h, const int kernel_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w,
|
||||
static void THNN_(im2col)(const real* data_im, const int64_t channels,
|
||||
const int64_t height, const int64_t width,
|
||||
const int64_t output_height, const int64_t output_width,
|
||||
const int64_t kernel_h, const int64_t kernel_w,
|
||||
const int64_t pad_h, const int64_t pad_w,
|
||||
const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w,
|
||||
real* data_col) {
|
||||
const int height_col = output_height;
|
||||
const int width_col = output_width;
|
||||
const int channels_col = channels * kernel_h * kernel_w;
|
||||
for (int c_col = 0; c_col < channels_col; ++c_col) {
|
||||
int w_offset = c_col % kernel_w;
|
||||
int h_offset = (c_col / kernel_w) % kernel_h;
|
||||
int c_im = c_col / kernel_h / kernel_w;
|
||||
for (int h_col = 0; h_col < height_col; ++h_col) {
|
||||
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
|
||||
for (int w_col = 0; w_col < width_col; ++w_col) {
|
||||
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
|
||||
const int64_t height_col = output_height;
|
||||
const int64_t width_col = output_width;
|
||||
const int64_t channels_col = channels * kernel_h * kernel_w;
|
||||
for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
|
||||
int64_t w_offset = c_col % kernel_w;
|
||||
int64_t h_offset = (c_col / kernel_w) % kernel_h;
|
||||
int64_t c_im = c_col / kernel_h / kernel_w;
|
||||
for (int64_t h_col = 0; h_col < height_col; ++h_col) {
|
||||
int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
|
||||
for (int64_t w_col = 0; w_col < width_col; ++w_col) {
|
||||
int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
|
||||
data_col[(c_col * height_col + h_col) * width_col + w_col] =
|
||||
(h_im >= 0 && w_im >= 0 && h_im < height && w_im < width) ?
|
||||
data_im[(c_im * height + h_im) * width + w_im] : 0;
|
||||
@ -81,26 +81,26 @@ static void THNN_(im2col)(const real* data_im, const int channels,
|
||||
}
|
||||
}
|
||||
|
||||
static void THNN_(col2im)(const real* data_col, const int channels,
|
||||
const int height, const int width,
|
||||
const int output_height, const int output_width,
|
||||
const int kernel_h, const int kernel_w,
|
||||
const int pad_h, const int pad_w,
|
||||
const int stride_h, const int stride_w,
|
||||
const int dilation_h, const int dilation_w,
|
||||
static void THNN_(col2im)(const real* data_col, const int64_t channels,
|
||||
const int64_t height, const int64_t width,
|
||||
const int64_t output_height, const int64_t output_width,
|
||||
const int64_t kernel_h, const int64_t kernel_w,
|
||||
const int64_t pad_h, const int64_t pad_w,
|
||||
const int64_t stride_h, const int64_t stride_w,
|
||||
const int64_t dilation_h, const int64_t dilation_w,
|
||||
real* data_im) {
|
||||
memset(data_im, 0, sizeof(real) * height * width * channels);
|
||||
const int height_col = output_height;
|
||||
const int width_col = output_width;
|
||||
const int channels_col = channels * kernel_h * kernel_w;
|
||||
for (int c_col = 0; c_col < channels_col; ++c_col) {
|
||||
int w_offset = c_col % kernel_w;
|
||||
int h_offset = (c_col / kernel_w) % kernel_h;
|
||||
int c_im = c_col / kernel_h / kernel_w;
|
||||
for (int h_col = 0; h_col < height_col; ++h_col) {
|
||||
int h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
|
||||
for (int w_col = 0; w_col < width_col; ++w_col) {
|
||||
int w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
|
||||
const int64_t height_col = output_height;
|
||||
const int64_t width_col = output_width;
|
||||
const int64_t channels_col = channels * kernel_h * kernel_w;
|
||||
for (int64_t c_col = 0; c_col < channels_col; ++c_col) {
|
||||
int64_t w_offset = c_col % kernel_w;
|
||||
int64_t h_offset = (c_col / kernel_w) % kernel_h;
|
||||
int64_t c_im = c_col / kernel_h / kernel_w;
|
||||
for (int64_t h_col = 0; h_col < height_col; ++h_col) {
|
||||
int64_t h_im = h_col * stride_h - pad_h + h_offset * dilation_h;
|
||||
for (int64_t w_col = 0; w_col < width_col; ++w_col) {
|
||||
int64_t w_im = w_col * stride_w - pad_w + w_offset * dilation_w;
|
||||
if (h_im >= 0 && h_im < height && w_im >= 0 && w_im < width)
|
||||
data_im[(c_im * height + h_im) * width + w_im] +=
|
||||
data_col[(c_col * height_col + h_col) * width_col + w_col];
|
||||
@ -113,9 +113,9 @@ static inline void THNN_(Col2Im_shapeCheck)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
int outputHeight, int outputWidth,
|
||||
int kH, int kW, int dH, int dW,
|
||||
int padH, int padW, int sH, int sW) {
|
||||
int64_t outputHeight, int64_t outputWidth,
|
||||
int64_t kH, int64_t kW, int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
|
||||
|
||||
THArgCheck(kW > 0 && kH > 0, 6,
|
||||
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
|
||||
@ -124,11 +124,11 @@ static inline void THNN_(Col2Im_shapeCheck)(
|
||||
THArgCheck(dW > 0 && dH > 0, 8,
|
||||
"dilation should be greater than zero, but got dH: %d dW: %d", dH, dW);
|
||||
|
||||
int ndim = THTensor_(nDimension)(input);
|
||||
int64_t ndim = THTensor_(nDimension)(input);
|
||||
THNN_ARGCHECK(!input->is_empty() && (ndim == 2 || ndim == 3), 2, input,
|
||||
"Expected non-empty 2D or 3D input tensor, but got input of shape %s");
|
||||
|
||||
int batch_dim = (ndim == 3) ? 0 : -1;
|
||||
int64_t batch_dim = (ndim == 3) ? 0 : -1;
|
||||
int64_t nInputPlane = input->size[batch_dim + 1];
|
||||
|
||||
if (nInputPlane % (kW * kH) != 0) {
|
||||
@ -161,11 +161,11 @@ void THNN_(Col2Im_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int outputHeight, int outputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t outputHeight, int64_t outputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THNN_(Col2Im_shapeCheck)(state, input, NULL, outputHeight, outputWidth,
|
||||
kH, kW, dH, dW, padH, padW, sH, sW);
|
||||
@ -189,10 +189,10 @@ void THNN_(Col2Im_updateOutput)(
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *output_n = THTensor_(new)();
|
||||
|
||||
int height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int64_t height_col = (outputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int64_t width_col = (outputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
|
||||
for (int elt = 0; elt < batchSize; elt++) {
|
||||
for (int64_t elt = 0; elt < batchSize; elt++) {
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(output_n, output, 0, elt);
|
||||
|
||||
@ -220,10 +220,10 @@ void THNN_(Col2Im_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THNN_(Im2Col_updateOutput)(state, gradOutput, gradInput,
|
||||
kH, kW, dH, dW, padH, padW, sH, sW);
|
||||
|
@ -6,8 +6,8 @@ static inline void THNN_(Im2Col_shapeCheck)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *gradOutput,
|
||||
int kH, int kW, int dH, int dW,
|
||||
int padH, int padW, int sH, int sW) {
|
||||
int64_t kH, int64_t kW, int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW, int64_t sH, int64_t sW) {
|
||||
|
||||
THArgCheck(kW > 0 && kH > 0, 4,
|
||||
"kernel size should be greater than zero, but got kH: %d kW: %d", kH, kW);
|
||||
@ -16,21 +16,21 @@ static inline void THNN_(Im2Col_shapeCheck)(
|
||||
THArgCheck(sW > 0 && sH > 0, 10,
|
||||
"stride should be greater than zero, but got sH: %d sW: %d", sH, sW);
|
||||
|
||||
int ndim = THTensor_(nDimension)(input);
|
||||
int64_t ndim = THTensor_(nDimension)(input);
|
||||
THNN_ARGCHECK(!input->is_empty() && (ndim == 3 || ndim == 4), 2, input,
|
||||
"Expected non-empty 3D or 4D input tensor, but got input of shape %s");
|
||||
|
||||
int dim_batch = 0;
|
||||
int64_t dim_batch = 0;
|
||||
if (ndim == 3) {
|
||||
dim_batch = -1;
|
||||
}
|
||||
int nInputPlane = THTensor_(size)(input, dim_batch + 1);
|
||||
int inputHeight = THTensor_(size)(input, dim_batch + 2);
|
||||
int inputWidth = THTensor_(size)(input, dim_batch + 3);
|
||||
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int nOutputPlane = nInputPlane * kW * kH;
|
||||
int outputLength = outputHeight * outputWidth;
|
||||
int64_t nInputPlane = THTensor_(size)(input, dim_batch + 1);
|
||||
int64_t inputHeight = THTensor_(size)(input, dim_batch + 2);
|
||||
int64_t inputWidth = THTensor_(size)(input, dim_batch + 3);
|
||||
int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int64_t nOutputPlane = nInputPlane * kW * kH;
|
||||
int64_t outputLength = outputHeight * outputWidth;
|
||||
|
||||
if (outputHeight < 1 || outputWidth < 1) {
|
||||
THError("Given input with spatial size (%d, %d), kernel_size=(%d, %d), "
|
||||
@ -46,10 +46,10 @@ void THNN_(Im2Col_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
THNN_(Im2Col_shapeCheck)(state, input, NULL, kH, kW, dH, dW, padH, padW, sH, sW);
|
||||
|
||||
@ -60,15 +60,15 @@ void THNN_(Im2Col_updateOutput)(
|
||||
THTensor_(resize4d)(input, 1, input->size[0], input->size[1], input->size[2]);
|
||||
}
|
||||
|
||||
int batchSize = THTensor_(size)(input, 0);
|
||||
int nInputPlane = THTensor_(size)(input, 1);
|
||||
int inputHeight = THTensor_(size)(input, 2);
|
||||
int inputWidth = THTensor_(size)(input, 3);
|
||||
int64_t batchSize = THTensor_(size)(input, 0);
|
||||
int64_t nInputPlane = THTensor_(size)(input, 1);
|
||||
int64_t inputHeight = THTensor_(size)(input, 2);
|
||||
int64_t inputWidth = THTensor_(size)(input, 3);
|
||||
|
||||
int outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int nOutputPlane = nInputPlane * kW * kH;
|
||||
int outputLength = outputHeight * outputWidth;
|
||||
int64_t outputHeight = (inputHeight + 2 * padH - (dH * (kH - 1) + 1)) / sH + 1;
|
||||
int64_t outputWidth = (inputWidth + 2 * padW - (dW * (kW - 1) + 1)) / sW + 1;
|
||||
int64_t nOutputPlane = nInputPlane * kW * kH;
|
||||
int64_t outputLength = outputHeight * outputWidth;
|
||||
|
||||
THTensor_(resize3d)(output, batchSize, nOutputPlane, outputLength);
|
||||
THTensor_(zero)(output);
|
||||
@ -76,7 +76,7 @@ void THNN_(Im2Col_updateOutput)(
|
||||
THTensor *input_n = THTensor_(new)();
|
||||
THTensor *output_n = THTensor_(new)();
|
||||
|
||||
for (int elt = 0; elt < batchSize; elt++) {
|
||||
for (int64_t elt = 0; elt < batchSize; elt++) {
|
||||
THTensor_(select)(input_n, input, 0, elt);
|
||||
THTensor_(select)(output_n, output, 0, elt);
|
||||
|
||||
@ -102,11 +102,11 @@ void THNN_(Im2Col_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int inputHeight, int inputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW) {
|
||||
int64_t inputHeight, int64_t inputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW) {
|
||||
|
||||
|
||||
THNN_(Col2Im_updateOutput)(state, gradOutput, gradInput,
|
||||
|
@ -220,8 +220,8 @@ void THNN_(SpatialDilatedConvolution_updateGradInput)(
|
||||
dilationH, dilationW, 0);
|
||||
|
||||
// Params
|
||||
int nInputPlane = weight->size[1];
|
||||
int nOutputPlane = weight->size[0];
|
||||
int64_t nInputPlane = weight->size[1];
|
||||
int64_t nOutputPlane = weight->size[0];
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
weight = THTensor_(newContiguous)(weight);
|
||||
|
@ -221,8 +221,8 @@ void THNN_(SpatialFullDilatedConvolution_updateGradInput)(
|
||||
(input, gradOutput, weight, NULL, kH, kW, dH, dW, padH, padW,
|
||||
dilationH, dilationW, adjH, adjW, 0);
|
||||
|
||||
int nInputPlane = THTensor_(size)(weight,0);
|
||||
int nOutputPlane = THTensor_(size)(weight,1);
|
||||
int64_t nInputPlane = THTensor_(size)(weight,0);
|
||||
int64_t nOutputPlane = THTensor_(size)(weight,1);
|
||||
|
||||
input = THTensor_(newContiguous)(input);
|
||||
gradOutput = THTensor_(newContiguous)(gradOutput);
|
||||
@ -328,7 +328,7 @@ void THNN_(SpatialFullDilatedConvolution_accGradParameters)(
|
||||
(input, gradOutput, gradWeight, gradBias, kH, kW, dH, dW, padH, padW,
|
||||
dilationH, dilationW, adjH, adjW, 1);
|
||||
|
||||
int nOutputPlane;
|
||||
int64_t nOutputPlane;
|
||||
if (gradWeight) {
|
||||
nOutputPlane = THTensor_(size)(gradWeight, 1);
|
||||
} else if (gradBias) {
|
||||
|
@ -147,39 +147,39 @@ TH_API void THNN_(Im2Col_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
TH_API void THNN_(Im2Col_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int inputHeight, int inputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t inputHeight, int64_t inputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
TH_API void THNN_(Col2Im_updateOutput)(
|
||||
THNNState *state,
|
||||
THTensor *input,
|
||||
THTensor *output,
|
||||
int outputHeight, int outputWidth,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t outputHeight, int64_t outputWidth,
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
TH_API void THNN_(Col2Im_updateGradInput)(
|
||||
THNNState *state,
|
||||
THTensor *gradOutput,
|
||||
THTensor *gradInput,
|
||||
int kH, int kW,
|
||||
int dH, int dW,
|
||||
int padH, int padW,
|
||||
int sH, int sW);
|
||||
int64_t kH, int64_t kW,
|
||||
int64_t dH, int64_t dW,
|
||||
int64_t padH, int64_t padW,
|
||||
int64_t sH, int64_t sW);
|
||||
|
||||
TH_API void THNN_(L1Cost_updateOutput)(
|
||||
THNNState *state, // library's state
|
||||
|
@ -224,11 +224,7 @@ if(USE_CUDA)
|
||||
# it. We will then manually add the cudart library as interface libs.
|
||||
set(__tmp ${CUDA_LIBRARIES})
|
||||
set(CUDA_LIBRARIES PRIVATE ${CUDA_LIBRARIES})
|
||||
if(CAFFE2_STATIC_LINK_CUDA)
|
||||
torch_cuda_based_add_library(caffe2_gpu STATIC ${Caffe2_GPU_SRCS})
|
||||
else()
|
||||
torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS})
|
||||
endif()
|
||||
torch_cuda_based_add_library(caffe2_gpu ${Caffe2_GPU_SRCS})
|
||||
set(CUDA_LIBRARIES ${__tmp})
|
||||
target_link_libraries(caffe2_gpu INTERFACE caffe2::cudart)
|
||||
|
||||
|
@ -1,4 +1,5 @@
|
||||
#include "caffe2/utils/proto_wrap.h"
|
||||
#include "caffe2/core/common.h"
|
||||
|
||||
#include <google/protobuf/stubs/common.h>
|
||||
#include <google/protobuf/generated_message_util.h>
|
||||
@ -8,7 +9,7 @@ namespace caffe {
|
||||
// Caffe wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
|
||||
// used to avoid duplicated global variable in the case when protobuf
|
||||
// is built with hidden visibility.
|
||||
const ::std::string& GetEmptyStringAlreadyInited() {
|
||||
CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
|
||||
return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
|
||||
}
|
||||
|
||||
@ -19,7 +20,7 @@ namespace ONNX_NAMESPACE {
|
||||
// ONNX wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
|
||||
// used to avoid duplicated global variable in the case when protobuf
|
||||
// is built with hidden visibility.
|
||||
const ::std::string& GetEmptyStringAlreadyInited() {
|
||||
CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
|
||||
return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
|
||||
}
|
||||
|
||||
@ -30,7 +31,7 @@ namespace caffe2 {
|
||||
// Caffe2 wrapper functions for protobuf's GetEmptyStringAlreadyInited() function
|
||||
// used to avoid duplicated global variable in the case when protobuf
|
||||
// is built with hidden visibility.
|
||||
const ::std::string& GetEmptyStringAlreadyInited() {
|
||||
CAFFE2_API const ::std::string& GetEmptyStringAlreadyInited() {
|
||||
return ::google::protobuf::internal::GetEmptyStringAlreadyInited();
|
||||
}
|
||||
|
||||
|
@ -1102,6 +1102,11 @@ Linear functions
|
||||
|
||||
.. autofunction:: linear
|
||||
|
||||
:hidden:`bilinear`
|
||||
~~~~~~~~~~~~~~~~
|
||||
|
||||
.. autofunction:: bilinear
|
||||
|
||||
Dropout functions
|
||||
-----------------
|
||||
|
||||
|
@ -337,6 +337,7 @@ view of a storage and defines numeric operations on it.
|
||||
.. automethod:: rsqrt
|
||||
.. automethod:: rsqrt_
|
||||
.. automethod:: scatter_
|
||||
.. automethod:: scatter_add_
|
||||
.. automethod:: select
|
||||
.. automethod:: set_
|
||||
.. automethod:: share_memory_
|
||||
|
@ -251,6 +251,7 @@ Spectral Ops
|
||||
|
||||
Other Operations
|
||||
~~~~~~~~~~~~~~~~~~~~~~
|
||||
.. autofunction:: bincount
|
||||
.. autofunction:: cross
|
||||
.. autofunction:: diag
|
||||
.. autofunction:: diagflat
|
||||
@ -258,6 +259,7 @@ Other Operations
|
||||
.. autofunction:: einsum
|
||||
.. autofunction:: flip
|
||||
.. autofunction:: histc
|
||||
.. autofunction:: meshgrid
|
||||
.. autofunction:: renorm
|
||||
.. autofunction:: trace
|
||||
.. autofunction:: tril
|
||||
|
4
setup.py
4
setup.py
@ -152,6 +152,8 @@ IS_LINUX = (platform.system() == 'Linux')
|
||||
FULL_CAFFE2 = check_env_flag('FULL_CAFFE2')
|
||||
BUILD_PYTORCH = check_env_flag('BUILD_PYTORCH')
|
||||
|
||||
USE_CUDA_STATIC_LINK = check_env_flag('USE_CUDA_STATIC_LINK')
|
||||
|
||||
NUM_JOBS = multiprocessing.cpu_count()
|
||||
max_jobs = os.getenv("MAX_JOBS")
|
||||
if max_jobs is not None:
|
||||
@ -318,6 +320,8 @@ def build_libs(libs):
|
||||
if USE_CUDA:
|
||||
my_env["CUDA_BIN_PATH"] = CUDA_HOME
|
||||
build_libs_cmd += ['--use-cuda']
|
||||
if USE_CUDA_STATIC_LINK:
|
||||
build_libs_cmd += ['--cuda-static-link']
|
||||
if USE_ROCM:
|
||||
build_libs_cmd += ['--use-rocm']
|
||||
if USE_NNPACK:
|
||||
|
@ -28,7 +28,7 @@ import errno
|
||||
import torch
|
||||
import torch.cuda
|
||||
from torch._utils_internal import get_writable_path
|
||||
from torch._six import string_classes
|
||||
from torch._six import string_classes, inf
|
||||
import torch.backends.cudnn
|
||||
import torch.backends.mkl
|
||||
|
||||
@ -353,7 +353,7 @@ class TestCase(unittest.TestCase):
|
||||
elif isinstance(x, bool) and isinstance(y, bool):
|
||||
super(TestCase, self).assertEqual(x, y, message)
|
||||
elif isinstance(x, Number) and isinstance(y, Number):
|
||||
if abs(x) == float('inf') or abs(y) == float('inf'):
|
||||
if abs(x) == inf or abs(y) == inf:
|
||||
if allow_inf:
|
||||
super(TestCase, self).assertEqual(x, y, message)
|
||||
else:
|
||||
|
@ -10,6 +10,7 @@ from collections import OrderedDict
|
||||
from itertools import product
|
||||
from operator import mul, itemgetter
|
||||
from functools import reduce, wraps
|
||||
from torch._six import inf, nan
|
||||
from torch.autograd.gradcheck import gradgradcheck, gradcheck
|
||||
from torch.autograd.function import once_differentiable
|
||||
from torch.autograd.profiler import profile
|
||||
@ -1524,12 +1525,12 @@ class TestAutograd(TestCase):
|
||||
pyscalar = -12345.1
|
||||
f[0] = pyscalar
|
||||
self.assertEqual(float(f), pyscalar)
|
||||
f[0] = float('nan')
|
||||
f[0] = nan
|
||||
self.assertTrue(math.isnan(float(f)))
|
||||
f[0] = float('inf')
|
||||
self.assertEqual(float(f), float('inf'), allow_inf=True)
|
||||
f[0] = float('-inf')
|
||||
self.assertEqual(float(f), float('-inf'), allow_inf=True)
|
||||
f[0] = inf
|
||||
self.assertEqual(float(f), inf, allow_inf=True)
|
||||
f[0] = -inf
|
||||
self.assertEqual(float(f), -inf, allow_inf=True)
|
||||
|
||||
# integral -> floating point
|
||||
# check we can convert something that loses precision
|
||||
@ -1539,11 +1540,11 @@ class TestAutograd(TestCase):
|
||||
self.assertEqual(float(l), float(pyscalar))
|
||||
|
||||
# floating point -> integral
|
||||
f[0] = float('nan')
|
||||
f[0] = nan
|
||||
self.assertRaises(ValueError, lambda: integral_conv(f[0]))
|
||||
f[0] = float('inf')
|
||||
f[0] = inf
|
||||
self.assertRaises(OverflowError, lambda: integral_conv(f[0]))
|
||||
f[0] = float('-inf')
|
||||
f[0] = -inf
|
||||
self.assertRaises(OverflowError, lambda: integral_conv(f[0]))
|
||||
f[0] = sys.float_info.max
|
||||
self.assertEqual(integral_conv(f), sys.float_info.max)
|
||||
@ -1558,9 +1559,9 @@ class TestAutograd(TestCase):
|
||||
test_nonzero(l, -2, True)
|
||||
test_nonzero(f, 0.0, False)
|
||||
test_nonzero(f, sys.float_info.min, True)
|
||||
test_nonzero(f, float('nan'), bool(float('nan')))
|
||||
test_nonzero(f, float('inf'), bool(float('inf')))
|
||||
test_nonzero(f, float('-inf'), bool(float('-inf')))
|
||||
test_nonzero(f, nan, bool(nan))
|
||||
test_nonzero(f, inf, bool(inf))
|
||||
test_nonzero(f, -inf, bool(-inf))
|
||||
|
||||
def test_pyscalar_conversions(self):
|
||||
self._test_pyscalar_conversions(lambda x: x, lambda x: int(x))
|
||||
@ -2825,7 +2826,7 @@ method_tests = [
|
||||
('std', (S,), (0, True, True), 'keepdim_dim_1d', [0]),
|
||||
('renorm', (S, S, S), (2, 1, 0.5), 'dim', [1]),
|
||||
('renorm', (S, S, S), (1, 2, 3), 'norm_1'),
|
||||
('renorm', (S, S, S), (float('inf'), 2, 0.5), 'norm_inf'),
|
||||
('renorm', (S, S, S), (inf, 2, 0.5), 'norm_inf'),
|
||||
('repeat', (S,), (2,), 'single_number'),
|
||||
('repeat', (), (2, 3), 'scalar'),
|
||||
('repeat', (2, 2), (3, 2)),
|
||||
@ -2917,7 +2918,7 @@ method_tests = [
|
||||
('norm', (S, S), (0.5,), '0_5'),
|
||||
('norm', (S, S), (1,), '1'),
|
||||
('norm', (S, S), (3,), '3'),
|
||||
('norm', (S, S), (float('inf'),), 'inf'),
|
||||
('norm', (S, S), (inf,), 'inf'),
|
||||
('norm', (S, S), (-1,), 'neg_1'),
|
||||
('norm', (S, S), (-0.5,), 'neg_0_5'),
|
||||
('norm', (S, S), (-1.5,), 'neg_1_5'),
|
||||
|
@ -1,3 +1,4 @@
|
||||
import os
|
||||
import unittest
|
||||
import sys
|
||||
|
||||
@ -15,7 +16,10 @@ import common
|
||||
|
||||
from torch.utils.cpp_extension import CUDA_HOME
|
||||
TEST_CUDA = torch.cuda.is_available() and CUDA_HOME is not None
|
||||
TEST_CUDNN = TEST_CUDA and torch.backends.cudnn.is_available()
|
||||
TEST_CUDNN = False
|
||||
if TEST_CUDA:
|
||||
CUDNN_HEADER_EXISTS = os.path.isfile(os.path.join(CUDA_HOME, 'include/cudnn.h'))
|
||||
TEST_CUDNN = TEST_CUDA and CUDNN_HEADER_EXISTS and torch.backends.cudnn.is_available()
|
||||
|
||||
|
||||
class TestCppExtension(common.TestCase):
|
||||
|
@ -12,6 +12,7 @@ import torch
|
||||
import torch.cuda
|
||||
import torch.cuda.comm as comm
|
||||
from torch import multiprocessing as mp
|
||||
from torch._six import inf, nan
|
||||
|
||||
from test_torch import TestTorch
|
||||
from common import TestCase, get_gpu_type, to_gpu, freeze_rng_state, run_tests, \
|
||||
@ -782,7 +783,7 @@ class TestCuda(TestCase):
|
||||
if not end0:
|
||||
gen1_max_times = torch.LongTensor(1).random_(0, 3)[0]
|
||||
else:
|
||||
gen1_max_times = float('inf')
|
||||
gen1_max_times = inf
|
||||
t = 0
|
||||
while t < gen1_max_times and not end1:
|
||||
end1 = advance(gen1, end1)
|
||||
@ -901,7 +902,7 @@ class TestCuda(TestCase):
|
||||
(lambda x: x.max(0)[0], 'max_dim')]
|
||||
for f, name in tests:
|
||||
a = torch.arange(25.0).view(5, 5)
|
||||
a[2, 2] = float('nan')
|
||||
a[2, 2] = nan
|
||||
actual = f(a.cuda()).cpu()
|
||||
expected = f(a).cpu()
|
||||
self.assertEqual(torch.isnan(actual), torch.isnan(expected), 'nans for {}'.format(name))
|
||||
@ -1503,9 +1504,9 @@ class TestCuda(TestCase):
|
||||
def test_multinomial_invalid_probs_cuda(self):
|
||||
test_method = TestCuda._test_multinomial_invalid_probs_cuda
|
||||
self._spawn_method(test_method, torch.Tensor([0, -1]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, float('inf')]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, float('-inf')]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, float('nan')]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, inf]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, -inf]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, nan]))
|
||||
|
||||
def test_broadcast(self):
|
||||
TestTorch._test_broadcast(self, lambda t: t.cuda())
|
||||
@ -1686,7 +1687,6 @@ class TestCuda(TestCase):
|
||||
cpu_tensor = torch.tensor([-0.999999994, -1.999999994, -2.0000000111,
|
||||
-100.99999994, -1931.99999994, 0.000000111,
|
||||
-0.000000111, 0, -1, -2, -931])
|
||||
nan = float('nan')
|
||||
expected_errors = torch.tensor([0, 0, 0, 0, 0, 0, 0, nan, nan, nan, nan])
|
||||
gpu_tensor = cpu_tensor.cuda()
|
||||
cpu_out = cpu_tensor.digamma()
|
||||
|
@ -30,6 +30,7 @@ from itertools import product
|
||||
from random import shuffle
|
||||
|
||||
import torch
|
||||
from torch._six import inf
|
||||
from common import TestCase, run_tests, set_rng_seed, TEST_WITH_UBSAN
|
||||
from common_cuda import TEST_CUDA
|
||||
from torch.autograd import grad, gradcheck
|
||||
@ -782,7 +783,7 @@ class TestDistributions(TestCase):
|
||||
s = 0.3
|
||||
self.assertEqual(Geometric(p).sample((8,)).size(), (8, 3))
|
||||
self.assertEqual(Geometric(1).sample(), 0)
|
||||
self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -float('inf'), allow_inf=True)
|
||||
self.assertEqual(Geometric(1).log_prob(torch.tensor(1.)), -inf, allow_inf=True)
|
||||
self.assertEqual(Geometric(1).log_prob(torch.tensor(0.)), 0)
|
||||
self.assertFalse(Geometric(p).sample().requires_grad)
|
||||
self.assertEqual(Geometric(r).sample((8,)).size(), (8,))
|
||||
@ -1162,8 +1163,8 @@ class TestDistributions(TestCase):
|
||||
uniform = Uniform(low_1d, high_1d)
|
||||
above_high = torch.tensor([4.0])
|
||||
below_low = torch.tensor([-1.0])
|
||||
self.assertEqual(uniform.log_prob(above_high).item(), -float('inf'), allow_inf=True)
|
||||
self.assertEqual(uniform.log_prob(below_low).item(), -float('inf'), allow_inf=True)
|
||||
self.assertEqual(uniform.log_prob(above_high).item(), -inf, allow_inf=True)
|
||||
self.assertEqual(uniform.log_prob(below_low).item(), -inf, allow_inf=True)
|
||||
|
||||
# check cdf computation when value outside range
|
||||
self.assertEqual(uniform.cdf(below_low).item(), 0)
|
||||
@ -1190,7 +1191,7 @@ class TestDistributions(TestCase):
|
||||
loc_1d = torch.zeros(1, requires_grad=True)
|
||||
scale_1d = torch.ones(1, requires_grad=True)
|
||||
self.assertTrue(is_all_nan(Cauchy(loc_1d, scale_1d).mean))
|
||||
self.assertEqual(Cauchy(loc_1d, scale_1d).variance, float('inf'), allow_inf=True)
|
||||
self.assertEqual(Cauchy(loc_1d, scale_1d).variance, inf, allow_inf=True)
|
||||
self.assertEqual(Cauchy(loc, scale).sample().size(), (5, 5))
|
||||
self.assertEqual(Cauchy(loc, scale).sample((7,)).size(), (7, 5, 5))
|
||||
self.assertEqual(Cauchy(loc_1d, scale_1d).sample().size(), (1,))
|
||||
@ -1216,7 +1217,7 @@ class TestDistributions(TestCase):
|
||||
scale = torch.ones(5, 5, requires_grad=True)
|
||||
scale_1d = torch.ones(1, requires_grad=True)
|
||||
self.assertTrue(is_all_nan(HalfCauchy(scale_1d).mean))
|
||||
self.assertEqual(HalfCauchy(scale_1d).variance, float('inf'), allow_inf=True)
|
||||
self.assertEqual(HalfCauchy(scale_1d).variance, inf, allow_inf=True)
|
||||
self.assertEqual(HalfCauchy(scale).sample().size(), (5, 5))
|
||||
self.assertEqual(HalfCauchy(scale).sample((7,)).size(), (7, 5, 5))
|
||||
self.assertEqual(HalfCauchy(scale_1d).sample().size(), (1,))
|
||||
@ -1714,8 +1715,8 @@ class TestDistributions(TestCase):
|
||||
alpha = torch.tensor(torch.randn(2, 3).abs(), requires_grad=True)
|
||||
scale_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True)
|
||||
alpha_1d = torch.tensor(torch.randn(1).abs(), requires_grad=True)
|
||||
self.assertEqual(Pareto(scale_1d, 0.5).mean, float('inf'), allow_inf=True)
|
||||
self.assertEqual(Pareto(scale_1d, 0.5).variance, float('inf'), allow_inf=True)
|
||||
self.assertEqual(Pareto(scale_1d, 0.5).mean, inf, allow_inf=True)
|
||||
self.assertEqual(Pareto(scale_1d, 0.5).variance, inf, allow_inf=True)
|
||||
self.assertEqual(Pareto(scale, alpha).sample().size(), (2, 3))
|
||||
self.assertEqual(Pareto(scale, alpha).sample((5,)).size(), (5, 2, 3))
|
||||
self.assertEqual(Pareto(scale_1d, alpha_1d).sample((1,)).size(), (1, 1))
|
||||
@ -1832,7 +1833,7 @@ class TestDistributions(TestCase):
|
||||
df_1d = torch.tensor(torch.exp(torch.randn(1)), requires_grad=True)
|
||||
self.assertTrue(is_all_nan(StudentT(1).mean))
|
||||
self.assertTrue(is_all_nan(StudentT(1).variance))
|
||||
self.assertEqual(StudentT(2).variance, float('inf'), allow_inf=True)
|
||||
self.assertEqual(StudentT(2).variance, inf, allow_inf=True)
|
||||
self.assertEqual(StudentT(df).sample().size(), (2, 3))
|
||||
self.assertEqual(StudentT(df).sample((5,)).size(), (5, 2, 3))
|
||||
self.assertEqual(StudentT(df_1d).sample((1,)).size(), (1, 1))
|
||||
@ -2962,7 +2963,7 @@ class TestKL(TestCase):
|
||||
|
||||
def test_kl_infinite(self):
|
||||
for p, q in self.infinite_examples:
|
||||
self.assertTrue((kl_divergence(p, q) == float('inf')).all(),
|
||||
self.assertTrue((kl_divergence(p, q) == inf).all(),
|
||||
'Incorrect KL({}, {})'.format(type(p).__name__, type(q).__name__))
|
||||
|
||||
def test_kl_edgecases(self):
|
||||
@ -2996,7 +2997,7 @@ class TestKL(TestCase):
|
||||
continue
|
||||
x = dist.sample(sample_shape=(60000,))
|
||||
expected = -dist.log_prob(x).mean(0)
|
||||
ignore = (expected == float('inf'))
|
||||
ignore = (expected == inf)
|
||||
expected[ignore] = actual[ignore]
|
||||
self.assertEqual(actual, expected, prec=0.2, message='\n'.join([
|
||||
'{} example {}/{}, incorrect .entropy().'.format(Dist.__name__, i + 1, len(params)),
|
||||
@ -3157,12 +3158,12 @@ class TestNumericalStability(TestCase):
|
||||
|
||||
def test_categorical_log_prob_with_logits(self):
|
||||
for dtype in ([torch.float, torch.double]):
|
||||
p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True)
|
||||
p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True)
|
||||
categorical = OneHotCategorical(logits=p)
|
||||
log_pdf_prob_1 = categorical.log_prob(torch.tensor([0, 1], dtype=dtype))
|
||||
self.assertEqual(log_pdf_prob_1.item(), 0)
|
||||
log_pdf_prob_0 = categorical.log_prob(torch.tensor([1, 0], dtype=dtype))
|
||||
self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True)
|
||||
self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True)
|
||||
|
||||
def test_multinomial_log_prob(self):
|
||||
for dtype in ([torch.float, torch.double]):
|
||||
@ -3174,12 +3175,12 @@ class TestNumericalStability(TestCase):
|
||||
|
||||
def test_multinomial_log_prob_with_logits(self):
|
||||
for dtype in ([torch.float, torch.double]):
|
||||
p = torch.tensor([-float('inf'), 0], dtype=dtype, requires_grad=True)
|
||||
p = torch.tensor([-inf, 0], dtype=dtype, requires_grad=True)
|
||||
multinomial = Multinomial(10, logits=p)
|
||||
log_pdf_prob_1 = multinomial.log_prob(torch.tensor([0, 10], dtype=dtype))
|
||||
self.assertEqual(log_pdf_prob_1.item(), 0)
|
||||
log_pdf_prob_0 = multinomial.log_prob(torch.tensor([10, 0], dtype=dtype))
|
||||
self.assertEqual(log_pdf_prob_0.item(), -float('inf'), allow_inf=True)
|
||||
self.assertEqual(log_pdf_prob_0.item(), -inf, allow_inf=True)
|
||||
|
||||
|
||||
class TestLazyLogitsInitialization(TestCase):
|
||||
|
@ -15,6 +15,7 @@ import hashlib
|
||||
import os
|
||||
|
||||
import torch
|
||||
from torch._six import inf, nan
|
||||
import torch.backends.cudnn as cudnn
|
||||
import torch.nn as nn
|
||||
import torch.nn.functional as F
|
||||
@ -1465,7 +1466,7 @@ class TestNN(NNTestCase):
|
||||
|
||||
def compute_norm(norm_type):
|
||||
norm_type = float(norm_type)
|
||||
if norm_type != float('inf'):
|
||||
if norm_type != inf:
|
||||
total_norm = 0
|
||||
for p in l.parameters():
|
||||
total_norm += p.grad.data.abs().pow(norm_type).sum()
|
||||
@ -1560,8 +1561,6 @@ class TestNN(NNTestCase):
|
||||
# We don't want to make propagating NaN a hard requirement on ops, but for
|
||||
# these easy ones, we should make them do so.
|
||||
def _test_nonlinearity_propagate_nan(self, device):
|
||||
nan = float('nan')
|
||||
|
||||
def test(nonlinearity, *args, **kwargs):
|
||||
x = torch.tensor([nan], device=device)
|
||||
fn = getattr(F, nonlinearity)
|
||||
@ -2547,7 +2546,7 @@ class TestNN(NNTestCase):
|
||||
for num_dim in [1, 2, 3]:
|
||||
fn_name = '{}max_pool{}d'.format(adaptive, num_dim)
|
||||
fn = getattr(F, fn_name)
|
||||
x = torch.full([1, 1] + num_dim * [3], float('nan'))
|
||||
x = torch.full([1, 1] + num_dim * [3], nan)
|
||||
res = fn(x, 1 if adaptive else 3)
|
||||
self.assertTrue(math.isnan(res.item()))
|
||||
|
||||
|
@ -3,6 +3,7 @@ import unittest
|
||||
import functools
|
||||
from copy import deepcopy
|
||||
import torch
|
||||
from torch._six import inf
|
||||
import torch.optim as optim
|
||||
import torch.legacy.optim as old_optim
|
||||
import torch.nn.functional as F
|
||||
@ -478,8 +479,8 @@ class TestOptim(TestCase):
|
||||
@unittest.skipIf(TEST_WITH_UBSAN, "division-by-zero error with UBSAN")
|
||||
def test_lbfgs_return_type(self):
|
||||
params = [torch.randn(10, 5), torch.randn(10)]
|
||||
opt1 = optim.LBFGS(params, 0.01, tolerance_grad=float('inf'))
|
||||
opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-float('inf'))
|
||||
opt1 = optim.LBFGS(params, 0.01, tolerance_grad=inf)
|
||||
opt2 = optim.LBFGS(params, 0.01, tolerance_grad=-inf)
|
||||
|
||||
def closure():
|
||||
return torch.Tensor([10])
|
||||
|
@ -16,6 +16,7 @@ import gzip
|
||||
from torch._utils_internal import get_file_path, get_file_path_2
|
||||
from torch.utils.dlpack import from_dlpack, to_dlpack
|
||||
from torch._utils import _rebuild_tensor
|
||||
from torch._six import inf, nan
|
||||
from itertools import product, combinations
|
||||
from functools import reduce
|
||||
from torch import multiprocessing as mp
|
||||
@ -241,17 +242,17 @@ class TestTorch(TestCase):
|
||||
self.assertTrue(torch.allclose(x, y, rtol=0.01, atol=0.0))
|
||||
self.assertFalse(torch.allclose(x, y))
|
||||
self.assertTrue(torch.allclose(torch.tensor([0.0]), torch.tensor([1e-8])))
|
||||
x = torch.tensor([2.0, 3.0, float('nan')])
|
||||
y = torch.tensor([2.01, 3.01, float('nan')])
|
||||
x = torch.tensor([2.0, 3.0, nan])
|
||||
y = torch.tensor([2.01, 3.01, nan])
|
||||
self.assertFalse(torch.allclose(x, y, rtol=1e-2))
|
||||
self.assertTrue(torch.allclose(x, y, rtol=1e-2, equal_nan=True))
|
||||
self.assertFalse(torch.allclose(x, y, rtol=1e-3, equal_nan=True))
|
||||
inf = torch.tensor([float('inf')])
|
||||
self.assertTrue(torch.allclose(inf, inf))
|
||||
self.assertTrue(torch.allclose(-inf, -inf))
|
||||
self.assertFalse(torch.allclose(inf, -inf))
|
||||
self.assertFalse(torch.allclose(inf, torch.tensor([1e20])))
|
||||
self.assertFalse(torch.allclose(-inf, torch.tensor([-1e20])))
|
||||
inf_t = torch.tensor([inf])
|
||||
self.assertTrue(torch.allclose(inf_t, inf_t))
|
||||
self.assertTrue(torch.allclose(-inf_t, -inf_t))
|
||||
self.assertFalse(torch.allclose(inf_t, -inf_t))
|
||||
self.assertFalse(torch.allclose(inf_t, torch.tensor([1e20])))
|
||||
self.assertFalse(torch.allclose(-inf_t, torch.tensor([-1e20])))
|
||||
|
||||
def test_linear_algebra_scalar_raises(self):
|
||||
m = torch.randn(5, 5)
|
||||
@ -359,13 +360,13 @@ class TestTorch(TestCase):
|
||||
try:
|
||||
return math.sinh(x)
|
||||
except OverflowError:
|
||||
return float('inf') if x > 0 else float('-inf')
|
||||
return inf if x > 0 else -inf
|
||||
self._test_math(torch.sinh, sinh)
|
||||
|
||||
def test_lgamma(self):
|
||||
def lgamma(x):
|
||||
if x <= 0 and x == int(x):
|
||||
return float('inf')
|
||||
return inf
|
||||
return math.lgamma(x)
|
||||
self._test_math(torch.lgamma, lgamma)
|
||||
|
||||
@ -392,14 +393,14 @@ class TestTorch(TestCase):
|
||||
# scipy 1.1.0 changed when it returns +/-inf vs. NaN
|
||||
def torch_digamma_without_inf(inp):
|
||||
res = torch.digamma(inp)
|
||||
res[(res == float('-inf')) | (res == float('inf'))] = float('nan')
|
||||
res[(res == -inf) | (res == inf)] = nan
|
||||
return res
|
||||
|
||||
def scipy_digamma_without_inf(inp):
|
||||
res = digamma(inp)
|
||||
if np.isscalar(res):
|
||||
return res if np.isfinite(res) else float('nan')
|
||||
res[np.isinf(res)] = float('nan')
|
||||
return res if np.isfinite(res) else nan
|
||||
res[np.isinf(res)] = nan
|
||||
return res
|
||||
|
||||
self._test_math(torch_digamma_without_inf, scipy_digamma_without_inf, self._digamma_input())
|
||||
@ -413,7 +414,7 @@ class TestTorch(TestCase):
|
||||
self._digamma_input(test_poles=False))
|
||||
|
||||
def test_asin(self):
|
||||
self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else float('nan'))
|
||||
self._test_math(torch.asin, lambda x: math.asin(x) if abs(x) <= 1 else nan)
|
||||
|
||||
def test_cos(self):
|
||||
self._test_math_by_name('cos')
|
||||
@ -425,11 +426,11 @@ class TestTorch(TestCase):
|
||||
except OverflowError:
|
||||
# Return inf on overflow.
|
||||
# See http://en.cppreference.com/w/cpp/numeric/math/cosh
|
||||
return float('inf')
|
||||
return inf
|
||||
self._test_math(torch.cosh, cosh)
|
||||
|
||||
def test_acos(self):
|
||||
self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else float('nan'))
|
||||
self._test_math(torch.acos, lambda x: math.acos(x) if abs(x) <= 1 else nan)
|
||||
|
||||
def test_tan(self):
|
||||
self._test_math_by_name('tan')
|
||||
@ -443,36 +444,36 @@ class TestTorch(TestCase):
|
||||
def test_log(self):
|
||||
def log(x):
|
||||
if x == 0:
|
||||
return float('-inf')
|
||||
return -inf
|
||||
elif x < 0:
|
||||
return float('nan')
|
||||
return nan
|
||||
return math.log(x)
|
||||
self._test_math(torch.log, log)
|
||||
|
||||
def test_log10(self):
|
||||
def log10(x):
|
||||
if x == 0:
|
||||
return float('-inf')
|
||||
return -inf
|
||||
elif x < 0:
|
||||
return float('nan')
|
||||
return nan
|
||||
return math.log10(x)
|
||||
self._test_math(torch.log10, log10)
|
||||
|
||||
def test_log1p(self):
|
||||
def log1p(x):
|
||||
if x == -1:
|
||||
return float('-inf')
|
||||
return -inf
|
||||
elif x < -1:
|
||||
return float('nan')
|
||||
return nan
|
||||
return math.log1p(x)
|
||||
self._test_math(torch.log1p, log1p)
|
||||
|
||||
def test_log2(self):
|
||||
def log2(x):
|
||||
if x == 0:
|
||||
return float('-inf')
|
||||
return -inf
|
||||
elif x < 0:
|
||||
return float('nan')
|
||||
return nan
|
||||
try:
|
||||
return math.log2(x)
|
||||
except AttributeError:
|
||||
@ -480,7 +481,7 @@ class TestTorch(TestCase):
|
||||
self._test_math(torch.log2, log2)
|
||||
|
||||
def test_sqrt(self):
|
||||
self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else float('nan'))
|
||||
self._test_math(torch.sqrt, lambda x: math.sqrt(x) if x >= 0 else nan)
|
||||
|
||||
def test_erf(self):
|
||||
self._test_math_by_name('erf')
|
||||
@ -493,9 +494,9 @@ class TestTorch(TestCase):
|
||||
inputValues = torch.randn(4, 4, out=tensor()).clamp(-2., 2.)
|
||||
self.assertEqual(tensor(inputValues).erf().erfinv(), tensor(inputValues))
|
||||
# test inf
|
||||
self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([float('-inf'), float('inf')])))
|
||||
self.assertTrue(torch.equal(tensor([-1, 1]).erfinv(), tensor([-inf, inf])))
|
||||
# test nan
|
||||
self.assertEqual(tensor([-2, 2]).erfinv(), tensor([float('nan'), float('nan')]))
|
||||
self.assertEqual(tensor([-2, 2]).erfinv(), tensor([nan, nan]))
|
||||
|
||||
checkType(torch.FloatTensor)
|
||||
checkType(torch.DoubleTensor)
|
||||
@ -505,7 +506,7 @@ class TestTorch(TestCase):
|
||||
try:
|
||||
return math.exp(x)
|
||||
except OverflowError:
|
||||
return float('inf')
|
||||
return inf
|
||||
self._test_math(torch.exp, exp)
|
||||
|
||||
def test_expm1(self):
|
||||
@ -513,7 +514,7 @@ class TestTorch(TestCase):
|
||||
try:
|
||||
return math.expm1(x)
|
||||
except OverflowError:
|
||||
return float('inf')
|
||||
return inf
|
||||
self._test_math(torch.expm1, expm1)
|
||||
|
||||
def test_floor(self):
|
||||
@ -525,9 +526,9 @@ class TestTorch(TestCase):
|
||||
def test_rsqrt(self):
|
||||
def rsqrt(x):
|
||||
if x == 0:
|
||||
return float('inf')
|
||||
return inf
|
||||
elif x < 0:
|
||||
return float('nan')
|
||||
return nan
|
||||
return 1.0 / math.sqrt(x)
|
||||
|
||||
self._test_math(torch.rsqrt, rsqrt)
|
||||
@ -615,7 +616,7 @@ class TestTorch(TestCase):
|
||||
# NaNs
|
||||
for index in (0, 4, 99):
|
||||
m1 = torch.randn(100)
|
||||
m1[index] = float('nan')
|
||||
m1[index] = nan
|
||||
res1val, res1ind = torch.max(m1, 0)
|
||||
self.assertTrue(math.isnan(res1val))
|
||||
self.assertEqual(res1ind, index)
|
||||
@ -633,14 +634,14 @@ class TestTorch(TestCase):
|
||||
# full reduction
|
||||
x = torch.randn(5, device=device)
|
||||
xn = x.cpu().numpy()
|
||||
for p in [0, 1, 2, 3, 4, float('inf')]:
|
||||
for p in [0, 1, 2, 3, 4, inf]:
|
||||
res = x.norm(p).item()
|
||||
expected = np.linalg.norm(xn, p)
|
||||
self.assertEqual(res, expected, "full reduction failed for {}-norm".format(p))
|
||||
# one dimension
|
||||
x = torch.randn(5, 5, device=device)
|
||||
xn = x.cpu().numpy()
|
||||
for p in [0, 1, 2, 3, 4, float('inf')]:
|
||||
for p in [0, 1, 2, 3, 4, inf]:
|
||||
res = x.norm(p, 1).cpu().numpy()
|
||||
expected = np.linalg.norm(xn, p, 1)
|
||||
self.assertEqual(res.shape, expected.shape)
|
||||
@ -808,10 +809,10 @@ class TestTorch(TestCase):
|
||||
('prod', lambda *args, **kwargs: torch.prod(*args, **kwargs), 1),
|
||||
('sum', lambda *args, **kwargs: torch.sum(*args, **kwargs), 0),
|
||||
('norm', lambda *args, **kwargs: torch.norm(*args, p=2, **kwargs), 0),
|
||||
('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), float('nan')),
|
||||
('var', lambda *args, **kwargs: torch.var(*args, **kwargs), float('nan')),
|
||||
('std', lambda *args, **kwargs: torch.std(*args, **kwargs), float('nan')),
|
||||
('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), float('-inf')),
|
||||
('mean', lambda *args, **kwargs: torch.mean(*args, **kwargs), nan),
|
||||
('var', lambda *args, **kwargs: torch.var(*args, **kwargs), nan),
|
||||
('std', lambda *args, **kwargs: torch.std(*args, **kwargs), nan),
|
||||
('logsumexp', lambda *args, **kwargs: torch.logsumexp(*args, **kwargs), -inf),
|
||||
]
|
||||
|
||||
devices = ['cpu'] if not torch.cuda.is_available() else ['cpu', 'cuda']
|
||||
@ -878,8 +879,8 @@ class TestTorch(TestCase):
|
||||
def test_logsumexp(self):
|
||||
from scipy.special import logsumexp
|
||||
a = torch.randn(5, 4)
|
||||
a[0, 0] = float('inf')
|
||||
a[1, :] = float('-inf')
|
||||
a[0, 0] = inf
|
||||
a[1, :] = -inf
|
||||
actual = a.logsumexp(1)
|
||||
expected = logsumexp(a.numpy(), 1)
|
||||
self.assertEqual(expected.shape, actual.shape)
|
||||
@ -1540,7 +1541,7 @@ class TestTorch(TestCase):
|
||||
self._test_cop(torch.mul, lambda x, y: x * y)
|
||||
|
||||
def test_cpow(self):
|
||||
self._test_cop(torch.pow, lambda x, y: float('nan') if x < 0 else math.pow(x, y))
|
||||
self._test_cop(torch.pow, lambda x, y: nan if x < 0 else math.pow(x, y))
|
||||
|
||||
@unittest.skipIf(not TEST_NUMPY, 'Numpy not found')
|
||||
def test_einsum(self):
|
||||
@ -2416,7 +2417,7 @@ class TestTorch(TestCase):
|
||||
# full reduction
|
||||
x = torch.randn(5, 5)
|
||||
xn = x.numpy()
|
||||
for p in [1, 2, 3, 4, float('inf')]:
|
||||
for p in [1, 2, 3, 4, inf]:
|
||||
res = x.renorm(p, 1, 1)
|
||||
expected = x / x.norm(p, 0, keepdim=True).clamp(min=1)
|
||||
self.assertEqual(res.numpy(), expected.numpy(), "renorm failed for {}-norm".format(p))
|
||||
@ -2532,9 +2533,9 @@ class TestTorch(TestCase):
|
||||
def test_multinomial_invalid_probs(self):
|
||||
test_method = TestTorch._test_multinomial_invalid_probs
|
||||
self._spawn_method(test_method, torch.Tensor([0, -1]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, float('inf')]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, float('-inf')]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, float('nan')]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, inf]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, -inf]))
|
||||
self._spawn_method(test_method, torch.Tensor([0, nan]))
|
||||
|
||||
@suppress_warnings
|
||||
def test_range(self):
|
||||
@ -4672,15 +4673,15 @@ class TestTorch(TestCase):
|
||||
self.assertEqual(x.nelement(), all.long().sum())
|
||||
|
||||
def test_isfinite(self):
|
||||
x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan'), -10])
|
||||
x = torch.Tensor([1, inf, 2, -inf, nan, -10])
|
||||
self.assertEqual(torch.isfinite(x), torch.ByteTensor([1, 0, 1, 0, 0, 1]))
|
||||
|
||||
def test_isinf(self):
|
||||
x = torch.Tensor([1, float('inf'), 2, float('-inf'), float('nan')])
|
||||
x = torch.Tensor([1, inf, 2, -inf, nan])
|
||||
self.assertEqual(torch.isinf(x), torch.ByteTensor([0, 1, 0, 1, 0]))
|
||||
|
||||
def test_isnan(self):
|
||||
x = torch.Tensor([1, float('nan'), 2])
|
||||
x = torch.Tensor([1, nan, 2])
|
||||
self.assertEqual(torch.isnan(x), torch.ByteTensor([0, 1, 0]))
|
||||
|
||||
def test_RNGState(self):
|
||||
@ -7418,7 +7419,7 @@ class TestTorch(TestCase):
|
||||
self.assertExpected(str(x), subname='negint')
|
||||
|
||||
# test inf and nan
|
||||
x = torch.tensor([4, float('inf'), 1.5, float('-inf'), 0, float('nan'), 1])
|
||||
x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1])
|
||||
self.assertEqual(x.__repr__(), str(x))
|
||||
self.assertExpected(str(x), subname='nonfinite')
|
||||
|
||||
|
@ -413,6 +413,7 @@ class TestFFI(TestCase):
|
||||
@unittest.skipIf(not HAS_CFFI or not HAS_CUDA, "ffi tests require cffi package")
|
||||
@unittest.skipIf(IS_WINDOWS, "ffi doesn't currently work on Windows")
|
||||
def test_gpu(self):
|
||||
from torch.utils.cpp_extension import CUDA_HOME
|
||||
create_extension(
|
||||
name='gpulib',
|
||||
headers=[test_dir + '/ffi/src/cuda/cudalib.h'],
|
||||
@ -421,6 +422,7 @@ class TestFFI(TestCase):
|
||||
],
|
||||
with_cuda=True,
|
||||
verbose=False,
|
||||
include_dirs=[os.path.join(CUDA_HOME, 'include')],
|
||||
).build()
|
||||
import gpulib
|
||||
tensor = torch.ones(2, 2).float()
|
||||
|
@ -41,6 +41,9 @@ while [[ $# -gt 0 ]]; do
|
||||
--full-caffe2)
|
||||
FULL_CAFFE2=1
|
||||
;;
|
||||
--cuda-static-link)
|
||||
CAFFE2_STATIC_LINK_CUDA=1
|
||||
;;
|
||||
*)
|
||||
break
|
||||
;;
|
||||
@ -261,6 +264,7 @@ function build_caffe2() {
|
||||
-DBUILD_SHARED_LIBS=ON \
|
||||
-DONNX_NAMESPACE=$ONNX_NAMESPACE \
|
||||
-DUSE_CUDA=$USE_CUDA \
|
||||
-DCAFFE2_STATIC_LINK_CUDA=$CAFFE2_STATIC_LINK_CUDA \
|
||||
-DUSE_ROCM=$USE_ROCM \
|
||||
-DUSE_NNPACK=$USE_NNPACK \
|
||||
-DCUDNN_INCLUDE_DIR=$CUDNN_INCLUDE_DIR \
|
||||
|
@ -25,6 +25,13 @@ import sys
|
||||
PY2 = sys.version_info[0] == 2
|
||||
PY3 = sys.version_info[0] == 3
|
||||
|
||||
if PY2:
|
||||
inf = float('inf')
|
||||
nan = float('nan')
|
||||
else:
|
||||
import math
|
||||
inf = math.inf
|
||||
nan = math.nan
|
||||
|
||||
if PY2:
|
||||
string_classes = basestring
|
||||
|
@ -1743,8 +1743,8 @@ scatter_(dim, index, src) -> Tensor
|
||||
|
||||
Writes all values from the tensor :attr:`src` into :attr:`self` at the indices
|
||||
specified in the :attr:`index` tensor. For each value in :attr:`src`, its output
|
||||
index is specified by its index in :attr:`src` for dimension != :attr:`dim` and
|
||||
by the corresponding value in :attr:`index` for dimension = :attr:`dim`.
|
||||
index is specified by its index in :attr:`src` for ``dimension != dim`` and by
|
||||
the corresponding value in :attr:`index` for ``dimension = dim``.
|
||||
|
||||
For a 3-D tensor, :attr:`self` is updated as::
|
||||
|
||||
@ -1754,14 +1754,14 @@ For a 3-D tensor, :attr:`self` is updated as::
|
||||
|
||||
This is the reverse operation of the manner described in :meth:`~Tensor.gather`.
|
||||
|
||||
:attr:`self`, :attr:`index` and :attr:`src` should have same number of
|
||||
dimensions. It is also required that `index.size(d) <= src.size(d)` for all
|
||||
dimensions `d`, and that `index.size(d) <= self.size(d)` for all dimensions
|
||||
`d != dim`.
|
||||
:attr:`self`, :attr:`index` and :attr:`src` (if it is a Tensor) should have same
|
||||
number of dimensions. It is also required that ``index.size(d) <= src.size(d)``
|
||||
for all dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all
|
||||
dimensions ``d != dim``.
|
||||
|
||||
Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be
|
||||
between `0` and `(self.size(dim) -1)` inclusive, and all values in a row along
|
||||
the specified dimension :attr:`dim` must be unique.
|
||||
between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row
|
||||
along the specified dimension :attr:`dim` must be unique.
|
||||
|
||||
Args:
|
||||
dim (int): the axis along which to index
|
||||
@ -1785,6 +1785,50 @@ Example::
|
||||
[ 0.0000, 0.0000, 0.0000, 1.2300]])
|
||||
""")
|
||||
|
||||
add_docstr_all('scatter_add_',
|
||||
r"""
|
||||
scatter_add_(dim, index, other) -> Tensor
|
||||
|
||||
Adds all values from the tensor :attr:`other` into :attr:`self` at the indices
|
||||
specified in the :attr:`index` tensor in a similar fashion as
|
||||
:meth:`~torch.Tensor.scatter_`. For each value in :attr:`other`, it is added to
|
||||
an index in :attr:`self` which is specified by its index in :attr:`other`
|
||||
for ``dimension != dim`` and by the corresponding value in :attr:`index` for
|
||||
``dimension = dim``.
|
||||
|
||||
For a 3-D tensor, :attr:`self` is updated as::
|
||||
|
||||
self[index[i][j][k]][j][k] += other[i][j][k] # if dim == 0
|
||||
self[i][index[i][j][k]][k] += other[i][j][k] # if dim == 1
|
||||
self[i][j][index[i][j][k]] += other[i][j][k] # if dim == 2
|
||||
|
||||
:attr:`self`, :attr:`index` and :attr:`other` should have same number of
|
||||
dimensions. It is also required that ``index.size(d) <= other.size(d)`` for all
|
||||
dimensions ``d``, and that ``index.size(d) <= self.size(d)`` for all dimensions
|
||||
``d != dim``.
|
||||
|
||||
Moreover, as for :meth:`~Tensor.gather`, the values of :attr:`index` must be
|
||||
between ``0`` and ``self.size(dim) - 1`` inclusive, and all values in a row along
|
||||
the specified dimension :attr:`dim` must be unique.
|
||||
|
||||
Args:
|
||||
dim (int): the axis along which to index
|
||||
index (LongTensor): the indices of elements to scatter and add
|
||||
other (Tensor): the source elements to scatter and add
|
||||
|
||||
Example::
|
||||
|
||||
>>> x = torch.rand(2, 5)
|
||||
>>> x
|
||||
tensor([[0.7404, 0.0427, 0.6480, 0.3806, 0.8328],
|
||||
[0.7953, 0.2009, 0.9154, 0.6782, 0.9620]])
|
||||
>>> torch.ones(3, 5).scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x)
|
||||
tensor([[1.7404, 1.2009, 1.9154, 1.3806, 1.8328],
|
||||
[1.0000, 1.0427, 1.0000, 1.6782, 1.0000],
|
||||
[1.7953, 1.0000, 1.6480, 1.0000, 1.9620]])
|
||||
|
||||
""")
|
||||
|
||||
add_docstr_all('select',
|
||||
r"""
|
||||
select(dim, index) -> Tensor
|
||||
|
@ -2,6 +2,7 @@ import math
|
||||
import torch
|
||||
from functools import reduce
|
||||
from sys import float_info
|
||||
from torch._six import inf, nan
|
||||
|
||||
|
||||
class __PrinterOptions(object):
|
||||
@ -50,7 +51,7 @@ def set_printoptions(
|
||||
PRINT_OPTS.linewidth = 80
|
||||
elif profile == "full":
|
||||
PRINT_OPTS.precision = 4
|
||||
PRINT_OPTS.threshold = float('inf')
|
||||
PRINT_OPTS.threshold = inf
|
||||
PRINT_OPTS.edgeitems = 3
|
||||
PRINT_OPTS.linewidth = 80
|
||||
|
||||
@ -101,8 +102,8 @@ class _Formatter(object):
|
||||
|
||||
else:
|
||||
copy_abs = copy.abs()
|
||||
pos_inf_mask = copy_abs.eq(float('inf'))
|
||||
neg_inf_mask = copy_abs.eq(float('-inf'))
|
||||
pos_inf_mask = copy_abs.eq(inf)
|
||||
neg_inf_mask = copy_abs.eq(-inf)
|
||||
nan_mask = copy_abs.ne(copy)
|
||||
invalid_value_mask = pos_inf_mask + neg_inf_mask + nan_mask
|
||||
if invalid_value_mask.all():
|
||||
|
@ -1,4 +1,5 @@
|
||||
import torch
|
||||
from torch._six import nan
|
||||
from torch.distributions import constraints
|
||||
from torch.distributions.distribution import Distribution
|
||||
from torch.distributions.utils import probs_to_logits, logits_to_probs, lazy_property, broadcast_all
|
||||
@ -72,11 +73,11 @@ class Categorical(Distribution):
|
||||
|
||||
@property
|
||||
def mean(self):
|
||||
return self.probs.new_tensor(float('nan')).expand(self._extended_shape())
|
||||
return self.probs.new_tensor(nan).expand(self._extended_shape())
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
return self.probs.new_tensor(float('nan')).expand(self._extended_shape())
|
||||
return self.probs.new_tensor(nan).expand(self._extended_shape())
|
||||
|
||||
def sample(self, sample_shape=torch.Size()):
|
||||
sample_shape = self._extended_shape(sample_shape)
|
||||
|
@ -1,4 +1,5 @@
|
||||
import math
|
||||
from torch._six import inf, nan
|
||||
from numbers import Number
|
||||
|
||||
import torch
|
||||
@ -37,11 +38,11 @@ class Cauchy(Distribution):
|
||||
|
||||
@property
|
||||
def mean(self):
|
||||
return self.loc.new_tensor(float('nan')).expand(self._extended_shape())
|
||||
return self.loc.new_tensor(nan).expand(self._extended_shape())
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
return self.loc.new_tensor(float('inf')).expand(self._extended_shape())
|
||||
return self.loc.new_tensor(inf).expand(self._extended_shape())
|
||||
|
||||
def rsample(self, sample_shape=torch.Size()):
|
||||
shape = self._extended_shape(sample_shape)
|
||||
|
@ -1,6 +1,7 @@
|
||||
from numbers import Number
|
||||
import torch
|
||||
import math
|
||||
from torch._six import nan
|
||||
from torch.distributions import constraints
|
||||
from torch.distributions.distribution import Distribution
|
||||
from torch.distributions.gamma import Gamma
|
||||
@ -39,13 +40,13 @@ class FisherSnedecor(Distribution):
|
||||
@property
|
||||
def mean(self):
|
||||
df2 = self.df2.clone()
|
||||
df2[df2 <= 2] = float('nan')
|
||||
df2[df2 <= 2] = nan
|
||||
return df2 / (df2 - 2)
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
df2 = self.df2.clone()
|
||||
df2[df2 <= 4] = float('nan')
|
||||
df2[df2 <= 4] = nan
|
||||
return 2 * df2.pow(2) * (self.df1 + df2 - 2) / (self.df1 * (df2 - 2).pow(2) * (df2 - 4))
|
||||
|
||||
def rsample(self, sample_shape=torch.Size(())):
|
||||
|
@ -1,5 +1,6 @@
|
||||
import math
|
||||
|
||||
from torch._six import inf
|
||||
from torch.distributions import constraints
|
||||
from torch.distributions.transforms import AbsTransform
|
||||
from torch.distributions.cauchy import Cauchy
|
||||
@ -44,7 +45,7 @@ class HalfCauchy(TransformedDistribution):
|
||||
|
||||
def log_prob(self, value):
|
||||
log_prob = self.base_dist.log_prob(value) + math.log(2)
|
||||
log_prob[value.expand(log_prob.shape) < 0] = -float('inf')
|
||||
log_prob[value.expand(log_prob.shape) < 0] = -inf
|
||||
return log_prob
|
||||
|
||||
def cdf(self, value):
|
||||
|
@ -1,5 +1,6 @@
|
||||
import math
|
||||
|
||||
from torch._six import inf
|
||||
from torch.distributions import constraints
|
||||
from torch.distributions.transforms import AbsTransform
|
||||
from torch.distributions.normal import Normal
|
||||
@ -44,7 +45,7 @@ class HalfNormal(TransformedDistribution):
|
||||
|
||||
def log_prob(self, value):
|
||||
log_prob = self.base_dist.log_prob(value) + math.log(2)
|
||||
log_prob[value.expand(log_prob.shape) < 0] = -float('inf')
|
||||
log_prob[value.expand(log_prob.shape) < 0] = -inf
|
||||
return log_prob
|
||||
|
||||
def cdf(self, value):
|
||||
|
@ -3,6 +3,7 @@ import warnings
|
||||
from functools import total_ordering
|
||||
|
||||
import torch
|
||||
from torch._six import inf
|
||||
|
||||
from .bernoulli import Bernoulli
|
||||
from .beta import Beta
|
||||
@ -113,7 +114,7 @@ def _infinite_like(tensor):
|
||||
"""
|
||||
Helper function for obtaining infinite KL Divergence throughout
|
||||
"""
|
||||
return tensor.new_tensor(float('inf')).expand_as(tensor)
|
||||
return tensor.new_tensor(inf).expand_as(tensor)
|
||||
|
||||
|
||||
def _x_log_x(tensor):
|
||||
@ -173,10 +174,10 @@ _euler_gamma = 0.57721566490153286060
|
||||
@register_kl(Bernoulli, Bernoulli)
|
||||
def _kl_bernoulli_bernoulli(p, q):
|
||||
t1 = p.probs * (p.probs / q.probs).log()
|
||||
t1[q.probs == 0] = float('inf')
|
||||
t1[q.probs == 0] = inf
|
||||
t1[p.probs == 0] = 0
|
||||
t2 = (1 - p.probs) * ((1 - p.probs) / (1 - q.probs)).log()
|
||||
t2[q.probs == 1] = float('inf')
|
||||
t2[q.probs == 1] = inf
|
||||
t2[p.probs == 1] = 0
|
||||
return t1 + t2
|
||||
|
||||
@ -208,7 +209,7 @@ def _kl_binomial_binomial(p, q):
|
||||
@register_kl(Categorical, Categorical)
|
||||
def _kl_categorical_categorical(p, q):
|
||||
t = p.probs * (p.logits - q.logits)
|
||||
t[q.probs == 0] = float('inf')
|
||||
t[q.probs == 0] = inf
|
||||
t[p.probs == 0] = 0
|
||||
return t.sum(-1)
|
||||
|
||||
@ -322,7 +323,7 @@ def _kl_pareto_pareto(p, q):
|
||||
t1 = q.alpha * scale_ratio.log()
|
||||
t2 = -alpha_ratio.log()
|
||||
result = t1 + t2 + alpha_ratio - 1
|
||||
result[p.support.lower_bound < q.support.lower_bound] = float('inf')
|
||||
result[p.support.lower_bound < q.support.lower_bound] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -346,7 +347,7 @@ def _kl_transformed_transformed(p, q):
|
||||
@register_kl(Uniform, Uniform)
|
||||
def _kl_uniform_uniform(p, q):
|
||||
result = ((q.high - q.low) / (p.high - p.low)).log()
|
||||
result[(q.low > p.low) | (q.high < p.high)] = float('inf')
|
||||
result[(q.low > p.low) | (q.high < p.high)] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -392,7 +393,7 @@ def _kl_beta_normal(p, q):
|
||||
@register_kl(Beta, Uniform)
|
||||
def _kl_beta_uniform(p, q):
|
||||
result = -p.entropy() + (q.high - q.low).log()
|
||||
result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = float('inf')
|
||||
result[(q.low > p.support.lower_bound) | (q.high < p.support.upper_bound)] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -543,7 +544,7 @@ def _kl_pareto_exponential(p, q):
|
||||
t2 = p.alpha.reciprocal()
|
||||
t3 = p.alpha * scale_rate_prod / (p.alpha - 1)
|
||||
result = t1 - t2 + t3 - 1
|
||||
result[p.alpha <= 1] = float('inf')
|
||||
result[p.alpha <= 1] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -555,7 +556,7 @@ def _kl_pareto_gamma(p, q):
|
||||
t3 = (1 - q.concentration) * common_term
|
||||
t4 = q.rate * p.alpha * p.scale / (p.alpha - 1)
|
||||
result = t1 + t2 + t3 + t4 - 1
|
||||
result[p.alpha <= 1] = float('inf')
|
||||
result[p.alpha <= 1] = inf
|
||||
return result
|
||||
|
||||
# TODO: Add Pareto-Laplace KL Divergence
|
||||
@ -570,7 +571,7 @@ def _kl_pareto_normal(p, q):
|
||||
t3 = p.alpha * common_term.pow(2) / (p.alpha - 2)
|
||||
t4 = (p.alpha * common_term - q.loc).pow(2)
|
||||
result = t1 - t2 + (t3 + t4) / var_normal - 1
|
||||
result[p.alpha <= 2] = float('inf')
|
||||
result[p.alpha <= 2] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -588,14 +589,14 @@ def _kl_uniform_beta(p, q):
|
||||
t3 = (q.concentration0 - 1) * (_x_log_x((1 - p.high)) - _x_log_x((1 - p.low)) + common_term) / common_term
|
||||
t4 = q.concentration1.lgamma() + q.concentration0.lgamma() - (q.concentration1 + q.concentration0).lgamma()
|
||||
result = t3 + t4 - t1 - t2
|
||||
result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = float('inf')
|
||||
result[(p.high > q.support.upper_bound) | (p.low < q.support.lower_bound)] = inf
|
||||
return result
|
||||
|
||||
|
||||
@register_kl(Uniform, Exponential)
|
||||
def _kl_uniform_exponetial(p, q):
|
||||
result = q.rate * (p.high + p.low) / 2 - ((p.high - p.low) * q.rate).log()
|
||||
result[p.low < q.support.lower_bound] = float('inf')
|
||||
result[p.low < q.support.lower_bound] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -607,7 +608,7 @@ def _kl_uniform_gamma(p, q):
|
||||
t3 = (1 - q.concentration) * (_x_log_x(p.high) - _x_log_x(p.low) - common_term) / common_term
|
||||
t4 = q.rate * (p.high + p.low) / 2
|
||||
result = -t1 + t2 + t3 + t4
|
||||
result[p.low < q.support.lower_bound] = float('inf')
|
||||
result[p.low < q.support.lower_bound] = inf
|
||||
return result
|
||||
|
||||
|
||||
@ -638,5 +639,5 @@ def _kl_uniform_pareto(p, q):
|
||||
t1 = (q.alpha * q.scale.pow(q.alpha) * (support_uniform)).log()
|
||||
t2 = (_x_log_x(p.high) - _x_log_x(p.low) - support_uniform) / support_uniform
|
||||
result = t2 * (q.alpha + 1) - t1
|
||||
result[p.low < q.support.lower_bound] = float('inf')
|
||||
result[p.low < q.support.lower_bound] = inf
|
||||
return result
|
||||
|
@ -1,4 +1,5 @@
|
||||
import torch
|
||||
from torch._six import inf
|
||||
from torch.distributions.distribution import Distribution
|
||||
from torch.distributions import Categorical
|
||||
from numbers import Number
|
||||
@ -93,6 +94,6 @@ class Multinomial(Distribution):
|
||||
logits, value = broadcast_all(self.logits.clone(), value)
|
||||
log_factorial_n = torch.lgamma(value.sum(-1) + 1)
|
||||
log_factorial_xs = torch.lgamma(value + 1).sum(-1)
|
||||
logits[(value == 0) & (logits == -float('inf'))] = 0
|
||||
logits[(value == 0) & (logits == -inf)] = 0
|
||||
log_powers = (logits * value).sum(-1)
|
||||
return log_factorial_n - log_factorial_xs + log_powers
|
||||
|
@ -1,5 +1,6 @@
|
||||
from numbers import Number
|
||||
import torch
|
||||
from torch._six import inf, nan
|
||||
import math
|
||||
from torch.distributions import constraints
|
||||
from torch.distributions.distribution import Distribution
|
||||
@ -27,15 +28,15 @@ class StudentT(Distribution):
|
||||
@property
|
||||
def mean(self):
|
||||
m = self.loc.clone()
|
||||
m[self.df <= 1] = float('nan')
|
||||
m[self.df <= 1] = nan
|
||||
return m
|
||||
|
||||
@property
|
||||
def variance(self):
|
||||
m = self.df.clone()
|
||||
m[self.df > 2] = self.scale[self.df > 2].pow(2) * self.df[self.df > 2] / (self.df[self.df > 2] - 2)
|
||||
m[(self.df <= 2) & (self.df > 1)] = float('inf')
|
||||
m[self.df <= 1] = float('nan')
|
||||
m[(self.df <= 2) & (self.df > 1)] = inf
|
||||
m[self.df <= 1] = nan
|
||||
return m
|
||||
|
||||
def __init__(self, df, loc=0., scale=1., validate_args=None):
|
||||
|
@ -1,5 +1,6 @@
|
||||
import torch
|
||||
import torch.nn.functional as F
|
||||
from torch._six import inf
|
||||
from operator import mul
|
||||
from functools import reduce
|
||||
import math
|
||||
@ -155,7 +156,7 @@ def isfinite(tensor):
|
||||
"""
|
||||
if not isinstance(tensor, torch.Tensor):
|
||||
raise ValueError("The argument is not a tensor", str(tensor))
|
||||
return (tensor == tensor) & (tensor.abs() != float('inf'))
|
||||
return (tensor == tensor) & (tensor.abs() != inf)
|
||||
|
||||
|
||||
def isinf(tensor):
|
||||
@ -174,7 +175,7 @@ def isinf(tensor):
|
||||
"""
|
||||
if not isinstance(tensor, torch.Tensor):
|
||||
raise ValueError("The argument is not a tensor", str(tensor))
|
||||
return tensor.abs() == float('inf')
|
||||
return tensor.abs() == inf
|
||||
|
||||
|
||||
def stft(input, n_fft, hop_length=None, win_length=None, window=None,
|
||||
|
@ -1,4 +1,5 @@
|
||||
import torch
|
||||
from torch._six import inf
|
||||
from .Module import Module
|
||||
from .utils import clear
|
||||
|
||||
@ -34,7 +35,7 @@ class Normalize(Module):
|
||||
self._output.resize_as_(input)
|
||||
|
||||
# specialization for the infinity norm
|
||||
if self.p == float('inf'):
|
||||
if self.p == inf:
|
||||
if not self._indices:
|
||||
self._indices = torch.cuda.FloatTensor() if torch.typename(self.output) == 'torch.cuda.FloatTensor' \
|
||||
else torch.LongTensor()
|
||||
@ -72,7 +73,7 @@ class Normalize(Module):
|
||||
self.cross = input.new()
|
||||
# compute diagonal term with gradOutput
|
||||
self._gradInput.resize_(n, d)
|
||||
if self.p == float('inf'):
|
||||
if self.p == inf:
|
||||
# specialization for the inf case
|
||||
torch.mul(self.norm.view(n, 1, 1).expand(n, d, 1), gradOutput, out=self._gradInput)
|
||||
self.buffer.resize_as_(input).zero_()
|
||||
@ -113,7 +114,7 @@ class Normalize(Module):
|
||||
self._gradInput.add_(-1, self.buffer)
|
||||
|
||||
# reuse cross buffer for normalization
|
||||
if self.p == float('inf'):
|
||||
if self.p == inf:
|
||||
torch.mul(self.norm, self.norm, out=self.cross)
|
||||
else:
|
||||
torch.mul(self.normp, self.norm, out=self.cross)
|
||||
|
@ -1,10 +1,11 @@
|
||||
import math
|
||||
|
||||
INFINITY = float('inf')
|
||||
NAN = float('nan')
|
||||
|
||||
|
||||
def sqrt_nothrow(x):
|
||||
return math.sqrt(x) if x >= 0 else float('nan')
|
||||
return math.sqrt(x) if x >= 0 else NAN
|
||||
|
||||
|
||||
def cg(opfunc, x, config, state=None):
|
||||
@ -145,7 +146,7 @@ def cg(opfunc, x, config, state=None):
|
||||
A = 6 * (f2 - f3) / z3 + 3 * (d2 + d3)
|
||||
B = 3 * (f3 - f2) - z3 * (d3 + 2 * d2)
|
||||
_denom = (B + sqrt_nothrow(B * B - A * d2 * z3 * z3))
|
||||
z2 = -d2 * z3 * z3 / _denom if _denom != 0 else float('nan')
|
||||
z2 = -d2 * z3 * z3 / _denom if _denom != 0 else NAN
|
||||
|
||||
if z2 != z2 or z2 == INFINITY or z2 == -INFINITY or z2 < 0:
|
||||
if limit < -0.5:
|
||||
|
@ -523,7 +523,7 @@ class BCEWithLogitsLoss(_Loss):
|
||||
:math:`p_n > 1` increases the recall, :math:`p_n < 1` increases the precision.
|
||||
|
||||
For example, if a dataset contains 100 positive and 300 negative examples of a single class,
|
||||
then `pos_weight` for the class should be equal to math:`\frac{300}{100}=3`.
|
||||
then `pos_weight` for the class should be equal to :math:`\frac{300}{100}=3`.
|
||||
The loss would act as if the dataset contains math:`3\times 100=300` positive examples.
|
||||
|
||||
Args:
|
||||
|
@ -691,7 +691,7 @@ class _LPPoolNd(Module):
|
||||
self.ceil_mode = ceil_mode
|
||||
|
||||
def extra_repr(self):
|
||||
return 'norm_type={norm_type}, kernel_size{kernel_size}, stride={stride}, ' \
|
||||
return 'norm_type={norm_type}, kernel_size={kernel_size}, stride={stride}, ' \
|
||||
'ceil_mode={ceil_mode}'.format(**self.__dict__)
|
||||
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
import warnings
|
||||
import torch
|
||||
from torch._six import inf
|
||||
|
||||
|
||||
def clip_grad_norm_(parameters, max_norm, norm_type=2):
|
||||
@ -23,7 +24,7 @@ def clip_grad_norm_(parameters, max_norm, norm_type=2):
|
||||
parameters = list(filter(lambda p: p.grad is not None, parameters))
|
||||
max_norm = float(max_norm)
|
||||
norm_type = float(norm_type)
|
||||
if norm_type == float('inf'):
|
||||
if norm_type == inf:
|
||||
total_norm = max(p.grad.data.abs().max() for p in parameters)
|
||||
else:
|
||||
total_norm = 0
|
||||
|
@ -1,4 +1,6 @@
|
||||
import math
|
||||
import torch
|
||||
from torch._six import inf
|
||||
from bisect import bisect_right
|
||||
from functools import partial
|
||||
from .optimizer import Optimizer
|
||||
@ -367,9 +369,9 @@ class ReduceLROnPlateau(object):
|
||||
raise ValueError('threshold mode ' + threshold_mode + ' is unknown!')
|
||||
|
||||
if mode == 'min':
|
||||
self.mode_worse = float('inf')
|
||||
self.mode_worse = inf
|
||||
else: # mode == 'max':
|
||||
self.mode_worse = (-float('inf'))
|
||||
self.mode_worse = -inf
|
||||
|
||||
self.is_better = partial(self._cmp, mode, threshold_mode, threshold)
|
||||
|
||||
|
@ -65,6 +65,10 @@ CUDA_HOME = _find_cuda_home()
|
||||
BUILT_FROM_SOURCE_VERSION_PATTERN = re.compile(r'\d+\.\d+\.\d+\w+\+\w+')
|
||||
|
||||
|
||||
def is_binary_build():
|
||||
return not BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__)
|
||||
|
||||
|
||||
def check_compiler_abi_compatibility(compiler):
|
||||
'''
|
||||
Verifies that the given compiler is ABI-compatible with PyTorch.
|
||||
@ -77,7 +81,7 @@ def check_compiler_abi_compatibility(compiler):
|
||||
False if the compiler is (likely) ABI-incompatible with PyTorch,
|
||||
else True.
|
||||
'''
|
||||
if BUILT_FROM_SOURCE_VERSION_PATTERN.match(torch.version.__version__):
|
||||
if not is_binary_build():
|
||||
return True
|
||||
try:
|
||||
check_cmd = '{}' if sys.platform == 'win32' else '{} --version'
|
||||
@ -134,6 +138,7 @@ class BuildExtension(build_ext):
|
||||
self._check_abi()
|
||||
for extension in self.extensions:
|
||||
self._define_torch_extension_name(extension)
|
||||
self._add_gnu_abi_flag_if_binary(extension)
|
||||
|
||||
# Register .cu and .cuh as valid source extensions.
|
||||
self.compiler.src_extensions += ['.cu', '.cuh']
|
||||
@ -266,6 +271,21 @@ class BuildExtension(build_ext):
|
||||
else:
|
||||
extension.extra_compile_args.append(define)
|
||||
|
||||
def _add_gnu_abi_flag_if_binary(self, extension):
|
||||
# If the version string looks like a binary build,
|
||||
# we know that PyTorch was compiled with gcc 4.9.2.
|
||||
# if the extension is compiled with gcc >= 5.1,
|
||||
# then we have to define _GLIBCXX_USE_CXX11_ABI=0
|
||||
# so that the std::string in the API is resolved to
|
||||
# non-C++11 symbols
|
||||
define = '-D_GLIBCXX_USE_CXX11_ABI=0'
|
||||
if is_binary_build():
|
||||
if isinstance(extension.extra_compile_args, dict):
|
||||
for args in extension.extra_compile_args.values():
|
||||
args.append(define)
|
||||
else:
|
||||
extension.extra_compile_args.append(define)
|
||||
|
||||
|
||||
def CppExtension(name, sources, *args, **kwargs):
|
||||
'''
|
||||
@ -785,6 +805,9 @@ def _write_ninja_file(path,
|
||||
common_cflags = ['-DTORCH_EXTENSION_NAME={}'.format(name)]
|
||||
common_cflags += ['-I{}'.format(include) for include in includes]
|
||||
|
||||
if is_binary_build():
|
||||
common_cflags += ['-D_GLIBCXX_USE_CXX11_ABI=0']
|
||||
|
||||
cflags = common_cflags + ['-fPIC', '-std=c++11'] + extra_cflags
|
||||
if sys.platform == 'win32':
|
||||
from distutils.spawn import _nt_quote_args
|
||||
|
Reference in New Issue
Block a user