mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 20:27:50 +08:00 
			
		
		
		
	Compare commits
	
		
			23 Commits
		
	
	
		
			ciflow/tru
			...
			v0.4.0
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 3749c581b7 | |||
| 200fb22b22 | |||
| 86b2165ab8 | |||
| 07091ad7dc | |||
| 92b137a9ed | |||
| ce0f350393 | |||
| 77e8c92ab9 | |||
| 46c534a14e | |||
| 58ed43d6e4 | |||
| 5f93a2b14c | |||
| 10175ed4f2 | |||
| 307db03fac | |||
| 98822f3753 | |||
| dd5a319055 | |||
| 9b90c66af8 | |||
| 7cba734a59 | |||
| 38aaa6354f | |||
| 8b767d2b0f | |||
| 068fb53fd2 | |||
| 06caf5d76f | |||
| 951cdc2b22 | |||
| eaba629943 | |||
| 33c2dc99cf | 
							
								
								
									
										2
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.gitmodules
									
									
									
									
										vendored
									
									
								
							| @ -22,7 +22,7 @@ | |||||||
| 	url = https://github.com/NVlabs/cub.git | 	url = https://github.com/NVlabs/cub.git | ||||||
| [submodule "third_party/eigen"] | [submodule "third_party/eigen"] | ||||||
| 	path = third_party/eigen | 	path = third_party/eigen | ||||||
| 	url = https://github.com/RLovelett/eigen.git | 	url = https://github.com/eigenteam/eigen-git-mirror.git | ||||||
| [submodule "third_party/googletest"] | [submodule "third_party/googletest"] | ||||||
| 	path = third_party/googletest | 	path = third_party/googletest | ||||||
| 	url = https://github.com/google/googletest.git | 	url = https://github.com/google/googletest.git | ||||||
|  | |||||||
| @ -123,11 +123,6 @@ function(filter_list output input) | |||||||
| endfunction() | endfunction() | ||||||
|  |  | ||||||
|  |  | ||||||
| IF ($ENV{TH_BINARY_BUILD}) |  | ||||||
|   MESSAGE(STATUS "TH_BINARY_BUILD detected. Statically linking libstdc++") |  | ||||||
|   SET(CMAKE_CXX_FLAGS "-static-libstdc++ ${CMAKE_CXX_FLAGS}") |  | ||||||
| ENDIF() |  | ||||||
|  |  | ||||||
| # Can be compiled standalone | # Can be compiled standalone | ||||||
| IF(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DIR OR NOT AT_INSTALL_SHARE_DIR) | IF(NOT AT_INSTALL_BIN_DIR OR NOT AT_INSTALL_LIB_DIR OR NOT AT_INSTALL_INCLUDE_DIR OR NOT AT_INSTALL_SHARE_DIR) | ||||||
|   SET(AT_INSTALL_BIN_DIR "bin" CACHE PATH "AT install binary subdirectory") |   SET(AT_INSTALL_BIN_DIR "bin" CACHE PATH "AT install binary subdirectory") | ||||||
| @ -332,12 +327,55 @@ ENDIF() | |||||||
| TARGET_LINK_LIBRARIES(ATen cpuinfo) | TARGET_LINK_LIBRARIES(ATen cpuinfo) | ||||||
|  |  | ||||||
| IF(CUDA_FOUND) | IF(CUDA_FOUND) | ||||||
|   TARGET_LINK_LIBRARIES(ATen |   IF ($ENV{ATEN_STATIC_CUDA}) | ||||||
|     ${CUDA_LIBRARIES} |     # CuFFT has a complicated static story (especially around CUDA < 9) because it has device callback support | ||||||
|     ${CUDA_cusparse_LIBRARY} |     # we first have to build a fake lib that links with no device callbacks, | ||||||
|     ${CUDA_curand_LIBRARY}) |     # and then we link against this object file. | ||||||
|   CUDA_ADD_CUBLAS_TO_TARGET(ATen) |     # This was recommended by the CuFFT team at NVIDIA | ||||||
|   CUDA_ADD_CUFFT_TO_TARGET(ATen) |  | ||||||
|  |     # build fake CuFFT lib in build dir | ||||||
|  |     EXECUTE_PROCESS(COMMAND touch ${CMAKE_CURRENT_BINARY_DIR}/empty_file.cc) | ||||||
|  |     if(${CUDA_VERSION_MAJOR} EQUAL "8") | ||||||
|  |       SET(CUFFT_FAKELINK_OPTIONS | ||||||
|  | 	--generate-code arch=compute_35,code=sm_35 | ||||||
|  | 	--generate-code arch=compute_50,code=sm_50 | ||||||
|  | 	--generate-code arch=compute_60,code=sm_60) | ||||||
|  |     elseif(${CUDA_VERSION_MAJOR} EQUAL "9") | ||||||
|  |       SET(CUFFT_FAKELINK_OPTIONS | ||||||
|  | 	--generate-code arch=compute_35,code=sm_35 | ||||||
|  | 	--generate-code arch=compute_50,code=sm_50 | ||||||
|  | 	--generate-code arch=compute_60,code=sm_60 | ||||||
|  | 	--generate-code arch=compute_70,code=sm_70) | ||||||
|  |     else() | ||||||
|  |       MESSAGE(FATAL_ERROR "Unhandled major cuda version ${CUDA_VERSION_MAJOR}") | ||||||
|  |     endif() | ||||||
|  |     ADD_CUSTOM_COMMAND( | ||||||
|  |       OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a | ||||||
|  |       COMMAND "${CUDA_TOOLKIT_ROOT_DIR}/bin/nvcc" -o ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a -Xcompiler -fPIC | ||||||
|  |       ${CUFFT_FAKELINK_OPTIONS} | ||||||
|  |       --device-link ${CMAKE_CURRENT_BINARY_DIR}/empty_file.cc -lcufft_static -lculibos | ||||||
|  |       ) | ||||||
|  |     ADD_CUSTOM_TARGET(FAKELINKED_CUFFT_TARGET DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a) | ||||||
|  |     add_library(FAKELINKED_CUFFT STATIC IMPORTED GLOBAL) | ||||||
|  |     add_dependencies(FAKELINKED_CUFFT FAKELINKED_CUFFT_TARGET) | ||||||
|  |     set_target_properties(FAKELINKED_CUFFT PROPERTIES IMPORTED_LOCATION ${CMAKE_CURRENT_BINARY_DIR}/cufft_static_library.a) | ||||||
|  |  | ||||||
|  |     TARGET_LINK_LIBRARIES(ATen | ||||||
|  |       ${CUDA_LIBRARIES} | ||||||
|  |       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcusparse_static.a | ||||||
|  |       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcurand_static.a | ||||||
|  |       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcublas_static.a | ||||||
|  |       FAKELINKED_CUFFT | ||||||
|  |       ${CUDA_TOOLKIT_ROOT_DIR}/lib64/libcufft_static.a | ||||||
|  |       ) | ||||||
|  |   ELSE() | ||||||
|  |     TARGET_LINK_LIBRARIES(ATen | ||||||
|  |       ${CUDA_LIBRARIES} | ||||||
|  |       ${CUDA_cusparse_LIBRARY} | ||||||
|  |       ${CUDA_curand_LIBRARY}) | ||||||
|  |     CUDA_ADD_CUBLAS_TO_TARGET(ATen) | ||||||
|  |     CUDA_ADD_CUFFT_TO_TARGET(ATen) | ||||||
|  |   ENDIF() | ||||||
|  |  | ||||||
|   if(CUDNN_FOUND) |   if(CUDNN_FOUND) | ||||||
|     target_link_libraries(ATen ${CUDNN_LIBRARIES}) |     target_link_libraries(ATen ${CUDNN_LIBRARIES}) | ||||||
|  | |||||||
| @ -3,7 +3,6 @@ | |||||||
| #include "ATen/ExpandUtils.h" | #include "ATen/ExpandUtils.h" | ||||||
| #include "ATen/NativeFunctions.h" | #include "ATen/NativeFunctions.h" | ||||||
| #include "ATen/WrapDimUtils.h" | #include "ATen/WrapDimUtils.h" | ||||||
| #include "cpu/ReduceOpsKernel.h" |  | ||||||
|  |  | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include <functional> | #include <functional> | ||||||
| @ -92,11 +91,6 @@ Tensor sum(const Tensor &self) { | |||||||
| } | } | ||||||
|  |  | ||||||
| Tensor _sum_cpu(const Tensor& self) { | Tensor _sum_cpu(const Tensor& self) { | ||||||
|   if (self.is_contiguous()) { |  | ||||||
|     Tensor result = self.type().tensor({}); |  | ||||||
|     sum_kernel(result, self, at::nullopt); |  | ||||||
|     return result; |  | ||||||
|   } |  | ||||||
|   return self._sumall(); |   return self._sumall(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @ -113,11 +107,6 @@ Tensor prod(const Tensor &self) { | |||||||
| } | } | ||||||
|  |  | ||||||
| Tensor _prod_cpu(const Tensor &self) { | Tensor _prod_cpu(const Tensor &self) { | ||||||
|   if (self.is_contiguous()) { |  | ||||||
|     Tensor result = self.type().tensor({}); |  | ||||||
|     prod_kernel(result, self, at::nullopt); |  | ||||||
|     return result; |  | ||||||
|   } |  | ||||||
|   return self._prodall(); |   return self._prodall(); | ||||||
| } | } | ||||||
|  |  | ||||||
| @ -180,12 +169,6 @@ Tensor &_sum_out_cpu(Tensor &result, const Tensor &self, int64_t dim_, | |||||||
|   int64_t dim = maybe_wrap_dim(dim_, self.dim()); |   int64_t dim = maybe_wrap_dim(dim_, self.dim()); | ||||||
|   if (_dimreduce_return_trivial(result, self, 0)) |   if (_dimreduce_return_trivial(result, self, 0)) | ||||||
|     return result; |     return result; | ||||||
|   if (self.is_contiguous() && result.is_contiguous()) { |  | ||||||
|     _dimreduce_setup(result, self, dim); |  | ||||||
|     sum_kernel(result, self, dim); |  | ||||||
|     if (!keepdim) result.squeeze_(dim); |  | ||||||
|     return result; |  | ||||||
|   } |  | ||||||
|   return at::_th_sum_out(result, self, dim, keepdim); |   return at::_th_sum_out(result, self, dim, keepdim); | ||||||
| } | } | ||||||
|  |  | ||||||
| @ -214,12 +197,6 @@ Tensor &_prod_out_cpu(Tensor &result, const Tensor &self, int64_t dim_, | |||||||
|   int64_t dim = maybe_wrap_dim(dim_, self.dim()); |   int64_t dim = maybe_wrap_dim(dim_, self.dim()); | ||||||
|   if (_dimreduce_return_trivial(result, self, 1)) |   if (_dimreduce_return_trivial(result, self, 1)) | ||||||
|     return result; |     return result; | ||||||
|   if (self.is_contiguous() && result.is_contiguous()) { |  | ||||||
|     _dimreduce_setup(result, self, dim); |  | ||||||
|     prod_kernel(result, self, dim); |  | ||||||
|     if (!keepdim) result.squeeze_(dim); |  | ||||||
|     return result; |  | ||||||
|   } |  | ||||||
|   return at::_th_prod_out(result, self, dim, keepdim); |   return at::_th_prod_out(result, self, dim, keepdim); | ||||||
| } | } | ||||||
|  |  | ||||||
|  | |||||||
| @ -1,154 +0,0 @@ | |||||||
| #include "ATen/native/cpu/ReduceOpsKernel.h" |  | ||||||
|  |  | ||||||
| #include <numeric> |  | ||||||
|  |  | ||||||
| #include "ATen/Dispatch.h" |  | ||||||
| #include "ATen/Parallel.h" |  | ||||||
| #include "ATen/optional.h" |  | ||||||
| #include "ATen/cpu/vec256/vec256.h" |  | ||||||
|  |  | ||||||
| namespace at { namespace native { namespace { |  | ||||||
|  |  | ||||||
| using namespace vec256; |  | ||||||
|  |  | ||||||
| static inline int64_t round_down(int64_t a, int64_t m) { |  | ||||||
|   return a - (a % m); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| template<typename F> |  | ||||||
| static void parallel_for(int64_t end, int64_t step, bool parallelize, F func) { |  | ||||||
|   if (parallelize) { |  | ||||||
|     tbb::parallel_for<int64_t>(0, end, step, func); |  | ||||||
|   } else { |  | ||||||
|     for (int64_t i = 0; i != end; i += step) { |  | ||||||
|       func(i); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static tbb::affinity_partitioner ap; |  | ||||||
|  |  | ||||||
| // Vectorized reduction defined by reduce operation `Op` with identity `ident`. |  | ||||||
| // The reduction is built on top of reduce128, which reduces down a column |  | ||||||
| // 128 bytes wide (WIDTH scalar elements). The width of 128 bytes is chosen |  | ||||||
| // because of the "adjacent cache line prefetch" behavior on x86 CPUs. |  | ||||||
| template<typename scalar_t, template <class> class Op, int ident> |  | ||||||
| struct Reduction { |  | ||||||
|   // reduction width in number of scalar elements |  | ||||||
|   static constexpr int WIDTH = 128 / sizeof(scalar_t); |  | ||||||
|  |  | ||||||
|   using Vec = Vec256<scalar_t>; |  | ||||||
|   using Reduce = Op<Vec>; |  | ||||||
|   using ReduceScalar = Op<scalar_t>; |  | ||||||
|  |  | ||||||
|   static void apply(Tensor& res, const Tensor& self, at::optional<int64_t> dim) { |  | ||||||
|     internal::init_tbb_num_threads(); |  | ||||||
|  |  | ||||||
|     auto out = res.data<scalar_t>(); |  | ||||||
|     auto data = self.data<scalar_t>(); |  | ||||||
|     auto numel = self.numel(); |  | ||||||
|     if (!dim.has_value()) { |  | ||||||
|       *out = reduce_all(data, numel); |  | ||||||
|       return; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     int64_t n = self.size(*dim); |  | ||||||
|     int64_t stride = self.stride(*dim); |  | ||||||
|     int64_t batch = numel / (n * stride); |  | ||||||
|     bool paralellize = batch * n > internal::TBB_GRAIN_SIZE; |  | ||||||
|     parallel_for(batch, 1, paralellize, [=](int64_t b) { |  | ||||||
|       if (stride == 1) { |  | ||||||
|         out[b] = reduce_all(&data[b * n], n); |  | ||||||
|       } else { |  | ||||||
|         reduce2d(&data[b * n * stride], &out[b * stride], n, stride, stride); |  | ||||||
|       } |  | ||||||
|     }); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   static scalar_t reduce_all(const scalar_t* data, int64_t size) { |  | ||||||
|     int64_t k = size / WIDTH; |  | ||||||
|  |  | ||||||
|     scalar_t sum; |  | ||||||
|     if (size > internal::TBB_GRAIN_SIZE) { |  | ||||||
|       sum = tbb::parallel_reduce( |  | ||||||
|           tbb::blocked_range<int64_t>(0, k, internal::TBB_GRAIN_SIZE / WIDTH), |  | ||||||
|           scalar_t(ident), |  | ||||||
|           [=](const tbb::blocked_range<int64_t>& r, scalar_t init) { |  | ||||||
|             scalar_t buf[WIDTH]; |  | ||||||
|             reduce128(&data[r.begin() * WIDTH], buf, r.end() - r.begin(), WIDTH); |  | ||||||
|             return std::accumulate(buf, buf + WIDTH, init, ReduceScalar()); |  | ||||||
|           }, |  | ||||||
|           ReduceScalar(), |  | ||||||
|           ap); |  | ||||||
|     } else { |  | ||||||
|       scalar_t buf[WIDTH]; |  | ||||||
|       reduce128(data, buf, k, WIDTH); |  | ||||||
|       sum = std::accumulate(buf, buf + WIDTH, scalar_t(ident), ReduceScalar()); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     for (int i = k * WIDTH; i != size; i++) { |  | ||||||
|       sum = ReduceScalar()(sum, data[i]); |  | ||||||
|     } |  | ||||||
|     return sum; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Reduce down a column of WIDTH elements (128 bytes) with the given number |  | ||||||
|   // of rows. Stores the results in out[0 ... WIDTH-1]. |  | ||||||
|   static void reduce128(const scalar_t* data, scalar_t* out, int64_t rows, int64_t stride) { |  | ||||||
|     Vec acc[4] = {ident, ident, ident, ident};  // 128 bytes (two cache lines) |  | ||||||
|     static_assert(sizeof(acc) == 128, "accumulator should be 128 bytes"); |  | ||||||
|     for (int64_t row = 0; row != rows; row++) { |  | ||||||
|       for (int j = 0; j != 4; j++) { |  | ||||||
|         auto val = Vec::s_load(&data[row * stride + j * Vec::size]); |  | ||||||
|         acc[j] = Reduce()(acc[j], val); |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|     for (int j = 0; j != 4; j++) { |  | ||||||
|       acc[j].store(&out[j * Vec::size]); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // Reduce a 2d matrix down each column. Stores the results in out[0 ... cols-1] |  | ||||||
|   static void reduce2d(const scalar_t* data, scalar_t* out, int64_t rows, int64_t cols, int64_t stride) { |  | ||||||
|     int64_t cols_rounded = round_down(cols, WIDTH); |  | ||||||
|     bool paralellize = cols * rows > internal::TBB_GRAIN_SIZE; |  | ||||||
|     parallel_for(cols_rounded, WIDTH, paralellize, [=](int64_t col) { |  | ||||||
|       reduce128(&data[col], &out[col], rows, stride); |  | ||||||
|     }); |  | ||||||
|  |  | ||||||
|     if (cols_rounded != cols) { |  | ||||||
|       scalar_t buf[WIDTH]; |  | ||||||
|       for (int64_t j = 0; j != cols - cols_rounded; j++) { |  | ||||||
|         buf[j] = ident; |  | ||||||
|       } |  | ||||||
|       for (int64_t row = 0; row != rows; row++) { |  | ||||||
|         for (int64_t j = 0; j != cols - cols_rounded; j++) { |  | ||||||
|           auto val = data[row * stride + j + cols_rounded]; |  | ||||||
|           buf[j] = ReduceScalar()(buf[j], val); |  | ||||||
|         } |  | ||||||
|       } |  | ||||||
|       for (int64_t j = 0; j != cols - cols_rounded; j++) { |  | ||||||
|         out[j + cols_rounded] = buf[j]; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| static void sum_kernel_impl(Tensor& result, const Tensor& self, at::optional<int64_t> dim) { |  | ||||||
|   AT_DISPATCH_ALL_TYPES(self.type(), "sum", [&] { |  | ||||||
|     Reduction<scalar_t, std::plus, 0>::apply(result, self, dim); |  | ||||||
|   }); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void prod_kernel_impl(Tensor& result, const Tensor& self, at::optional<int64_t> dim) { |  | ||||||
|   AT_DISPATCH_ALL_TYPES(self.type(), "prod", [&] { |  | ||||||
|     Reduction<scalar_t, std::multiplies, 1>::apply(result, self, dim); |  | ||||||
|   }); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| }  // anonymous namespace |  | ||||||
|  |  | ||||||
| REGISTER_DISPATCH(sum_kernel, &sum_kernel_impl); |  | ||||||
| REGISTER_DISPATCH(prod_kernel, &prod_kernel_impl); |  | ||||||
|  |  | ||||||
| }}  // namespace at::native |  | ||||||
| @ -1,16 +0,0 @@ | |||||||
| #pragma once |  | ||||||
|  |  | ||||||
| #include <ATen/ATen.h> |  | ||||||
| #include <ATen/optional.h> |  | ||||||
| #include "CapabilityDispatch.h" |  | ||||||
|  |  | ||||||
| namespace at { |  | ||||||
| namespace native { |  | ||||||
|  |  | ||||||
| using reduce_fn = void(*)(Tensor &, const Tensor &, at::optional<int64_t>); |  | ||||||
|  |  | ||||||
| extern DispatchStub<reduce_fn> sum_kernel; |  | ||||||
| extern DispatchStub<reduce_fn> prod_kernel; |  | ||||||
|  |  | ||||||
| } |  | ||||||
| } |  | ||||||
| @ -392,6 +392,9 @@ THCTensor_(median)(THCState *state, | |||||||
|   THCTensor *newValues = THCTensor_(newNarrow)(state, sorted, dimension, k, 1); |   THCTensor *newValues = THCTensor_(newNarrow)(state, sorted, dimension, k, 1); | ||||||
|   THCudaLongTensor *newIndices = THCudaLongTensor_newNarrow(state, sorted_indices, dimension, k, 1); |   THCudaLongTensor *newIndices = THCudaLongTensor_newNarrow(state, sorted_indices, dimension, k, 1); | ||||||
|  |  | ||||||
|  |   THCTensor_(free)(state, sorted); | ||||||
|  |   THCudaLongTensor_free(state, sorted_indices); | ||||||
|  |  | ||||||
|   if (!keepdim) { |   if (!keepdim) { | ||||||
|     THCTensor_(squeeze1d)(state, newValues, newValues, dimension); |     THCTensor_(squeeze1d)(state, newValues, newValues, dimension); | ||||||
|     THCudaLongTensor_squeeze1d(state, newIndices, newIndices, dimension); |     THCudaLongTensor_squeeze1d(state, newIndices, newIndices, dimension); | ||||||
|  | |||||||
| @ -11,6 +11,8 @@ Automatic differentiation package - torch.autograd | |||||||
|  |  | ||||||
| .. autofunction:: grad | .. autofunction:: grad | ||||||
|  |  | ||||||
|  | .. _locally-disable-grad: | ||||||
|  |  | ||||||
| Locally disabling gradient computation | Locally disabling gradient computation | ||||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | |||||||
| @ -1,56 +0,0 @@ | |||||||
| .. currentmodule:: torch |  | ||||||
|  |  | ||||||
| .. _device-doc: |  | ||||||
|  |  | ||||||
| torch.device |  | ||||||
| =================================== |  | ||||||
|  |  | ||||||
| A :class:`torch.device` is an object representing the device on which a :class:`torch.Tensor` is |  | ||||||
| or will be allocated. |  | ||||||
|  |  | ||||||
| The :class:`torch.device` contains a device type (``'cpu'`` or ``'cuda'``) and optional device ordinal for the |  | ||||||
| device type.  If the device ordinal is not present, this represents the current device for the device type; |  | ||||||
| e.g. a :class:`torch.Tensor` constructed with device ``'cuda'`` is equivalent to ``'cuda:X'`` where X is the result of |  | ||||||
| :func:`torch.cuda.current_device()`. |  | ||||||
|  |  | ||||||
| A :class:`torch.Tensor`'s device can be accessed via the :attr:`Tensor.device` property. |  | ||||||
|  |  | ||||||
| A :class:`torch.device` can be constructed via a string or via a string and device ordinal |  | ||||||
|  |  | ||||||
| Via a string: |  | ||||||
| :: |  | ||||||
|  |  | ||||||
|     >>> torch.device('cuda:0') |  | ||||||
|     device(type='cuda', index=0) |  | ||||||
|  |  | ||||||
|     >>> torch.device('cpu') |  | ||||||
|     device(type='cpu') |  | ||||||
|  |  | ||||||
|     >>> torch.device('cuda')  # current cuda device |  | ||||||
|     device(type='cuda') |  | ||||||
|  |  | ||||||
| Via a string and device ordinal: |  | ||||||
|  |  | ||||||
| :: |  | ||||||
|  |  | ||||||
|     >>> torch.device('cuda', 0) |  | ||||||
|     device(type='cuda', index=0) |  | ||||||
|  |  | ||||||
|     >>> torch.device('cpu', 0) |  | ||||||
|     device(type='cpu', index=0) |  | ||||||
|  |  | ||||||
| .. note:: |  | ||||||
|    For legacy reasons, a device can be constructed via a single device ordinal, which is treated |  | ||||||
|    as a cuda device.  This matches :meth:`Tensor.get_device`, which returns an ordinal for cuda |  | ||||||
|    tensors and is not supported for cpu tensors. |  | ||||||
|  |  | ||||||
|    >>> torch.device(1) |  | ||||||
|    device(type='cuda', index=1) |  | ||||||
|  |  | ||||||
| .. note:: |  | ||||||
|    Methods which take a device will generally accept a (properly formatted) string |  | ||||||
|    or (legacy) integer device ordinal, i.e. the following are all equivalent: |  | ||||||
|  |  | ||||||
|    >>> torch.randn((2,3), device=torch.device('cuda:1')) |  | ||||||
|    >>> torch.randn((2,3), device='cuda:1') |  | ||||||
|    >>> torch.randn((2,3), device=1)  # legacy |  | ||||||
| @ -24,7 +24,9 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. | |||||||
|  |  | ||||||
|    torch |    torch | ||||||
|    tensors |    tensors | ||||||
|  |    tensor_attributes | ||||||
|    sparse |    sparse | ||||||
|  |    cuda | ||||||
|    storage |    storage | ||||||
|    nn |    nn | ||||||
|    optim |    optim | ||||||
| @ -32,9 +34,6 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. | |||||||
|    torch.distributions <distributions> |    torch.distributions <distributions> | ||||||
|    torch.multiprocessing <multiprocessing> |    torch.multiprocessing <multiprocessing> | ||||||
|    torch.distributed <distributed> |    torch.distributed <distributed> | ||||||
|    torch.legacy <legacy> |  | ||||||
|    cuda |  | ||||||
|    device |  | ||||||
|    bottleneck |    bottleneck | ||||||
|    checkpoint |    checkpoint | ||||||
|    cpp_extension |    cpp_extension | ||||||
| @ -42,6 +41,7 @@ PyTorch is an optimized tensor library for deep learning using GPUs and CPUs. | |||||||
|    ffi |    ffi | ||||||
|    model_zoo |    model_zoo | ||||||
|    onnx |    onnx | ||||||
|  |    torch.legacy <legacy> | ||||||
|  |  | ||||||
| .. toctree:: | .. toctree:: | ||||||
|    :glob: |    :glob: | ||||||
|  | |||||||
| @ -19,17 +19,17 @@ Two tensors are "broadcastable" if the following rules hold: | |||||||
|  |  | ||||||
| For Example:: | For Example:: | ||||||
|  |  | ||||||
|     >>> x=torch.FloatTensor(5,7,3) |     >>> x=torch.empty(5,7,3) | ||||||
|     >>> y=torch.FloatTensor(5,7,3) |     >>> y=torch.empty(5,7,3) | ||||||
|     # same shapes are always broadcastable (i.e. the above rules always hold) |     # same shapes are always broadcastable (i.e. the above rules always hold) | ||||||
|  |  | ||||||
|     >>> x=torch.FloatTensor() |     >>> x=torch.empty((0,)) | ||||||
|     >>> y=torch.FloatTensor(2,2) |     >>> y=torch.empty(2,2) | ||||||
|     # x and y are not broadcastable, because x does not have at least 1 dimension |     # x and y are not broadcastable, because x does not have at least 1 dimension | ||||||
|  |  | ||||||
|     # can line up trailing dimensions |     # can line up trailing dimensions | ||||||
|     >>> x=torch.FloatTensor(5,3,4,1) |     >>> x=torch.empty(5,3,4,1) | ||||||
|     >>> y=torch.FloatTensor(  3,1,1) |     >>> y=torch.empty(  3,1,1) | ||||||
|     # x and y are broadcastable. |     # x and y are broadcastable. | ||||||
|     # 1st trailing dimension: both have size 1 |     # 1st trailing dimension: both have size 1 | ||||||
|     # 2nd trailing dimension: y has size 1 |     # 2nd trailing dimension: y has size 1 | ||||||
| @ -37,8 +37,8 @@ For Example:: | |||||||
|     # 4th trailing dimension: y dimension doesn't exist |     # 4th trailing dimension: y dimension doesn't exist | ||||||
|  |  | ||||||
|     # but: |     # but: | ||||||
|     >>> x=torch.FloatTensor(5,2,4,1) |     >>> x=torch.empty(5,2,4,1) | ||||||
|     >>> y=torch.FloatTensor(  3,1,1) |     >>> y=torch.empty(  3,1,1) | ||||||
|     # x and y are not broadcastable, because in the 3rd trailing dimension 2 != 3 |     # x and y are not broadcastable, because in the 3rd trailing dimension 2 != 3 | ||||||
|  |  | ||||||
| If two tensors :attr:`x`, :attr:`y` are "broadcastable", the resulting tensor size | If two tensors :attr:`x`, :attr:`y` are "broadcastable", the resulting tensor size | ||||||
| @ -52,19 +52,19 @@ is calculated as follows: | |||||||
| For Example:: | For Example:: | ||||||
|  |  | ||||||
|     # can line up trailing dimensions to make reading easier |     # can line up trailing dimensions to make reading easier | ||||||
|     >>> x=torch.FloatTensor(5,1,4,1) |     >>> x=torch.empty(5,1,4,1) | ||||||
|     >>> y=torch.FloatTensor(  3,1,1) |     >>> y=torch.empty(  3,1,1) | ||||||
|     >>> (x+y).size() |     >>> (x+y).size() | ||||||
|     torch.Size([5, 3, 4, 1]) |     torch.Size([5, 3, 4, 1]) | ||||||
|  |  | ||||||
|     # but not necessary: |     # but not necessary: | ||||||
|     >>> x=torch.FloatTensor(1) |     >>> x=torch.empty(1) | ||||||
|     >>> y=torch.FloatTensor(3,1,7) |     >>> y=torch.empty(3,1,7) | ||||||
|     >>> (x+y).size() |     >>> (x+y).size() | ||||||
|     torch.Size([3, 1, 7]) |     torch.Size([3, 1, 7]) | ||||||
|  |  | ||||||
|     >>> x=torch.FloatTensor(5,2,4,1) |     >>> x=torch.empty(5,2,4,1) | ||||||
|     >>> y=torch.FloatTensor(3,1,1) |     >>> y=torch.empty(3,1,1) | ||||||
|     >>> (x+y).size() |     >>> (x+y).size() | ||||||
|     RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 1 |     RuntimeError: The size of tensor a (2) must match the size of tensor b (3) at non-singleton dimension 1 | ||||||
|  |  | ||||||
| @ -75,14 +75,14 @@ as a result of the broadcast. | |||||||
|  |  | ||||||
| For Example:: | For Example:: | ||||||
|  |  | ||||||
|     >>> x=torch.FloatTensor(5,3,4,1) |     >>> x=torch.empty(5,3,4,1) | ||||||
|     >>> y=torch.FloatTensor(3,1,1) |     >>> y=torch.empty(3,1,1) | ||||||
|     >>> (x.add_(y)).size() |     >>> (x.add_(y)).size() | ||||||
|     torch.Size([5, 3, 4, 1]) |     torch.Size([5, 3, 4, 1]) | ||||||
|  |  | ||||||
|     # but: |     # but: | ||||||
|     >>> x=torch.FloatTensor(1,3,1) |     >>> x=torch.empty(1,3,1) | ||||||
|     >>> y=torch.FloatTensor(3,1,7) |     >>> y=torch.empty(3,1,7) | ||||||
|     >>> (x.add_(y)).size() |     >>> (x.add_(y)).size() | ||||||
|     RuntimeError: The expanded size of the tensor (1) must match the existing size (7) at non-singleton dimension 2. |     RuntimeError: The expanded size of the tensor (1) must match the existing size (7) at non-singleton dimension 2. | ||||||
|  |  | ||||||
|  | |||||||
| @ -12,35 +12,47 @@ However, once a tensor is allocated, you can do operations on it irrespective | |||||||
| of the selected device, and the results will be always placed in on the same | of the selected device, and the results will be always placed in on the same | ||||||
| device as the tensor. | device as the tensor. | ||||||
|  |  | ||||||
| Cross-GPU operations are not allowed by default, with the only exception of | Cross-GPU operations are not allowed by default, with the exception of | ||||||
| :meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory access, any | :meth:`~torch.Tensor.copy_` and other methods with copy-like functionality | ||||||
| attempts to launch ops on tensors spread across different devices will raise an | such as :meth:`~torch.Tensor.to` and :meth:`~torch.Tensor.cuda`. | ||||||
| error. | Unless you enable peer-to-peer memory access, any attempts to launch ops on | ||||||
|  | tensors spread across different devices will raise an error. | ||||||
|  |  | ||||||
| Below you can find a small example showcasing this:: | Below you can find a small example showcasing this:: | ||||||
|  |  | ||||||
|     x = torch.cuda.FloatTensor(1) |     cuda = torch.device('cuda')     # Default CUDA device | ||||||
|     # x.get_device() == 0 |     cuda0 = torch.device('cuda:0') | ||||||
|     y = torch.FloatTensor(1).cuda() |     cuda2 = torch.device('cuda:2')  # GPU 2 (these are 0-indexed) | ||||||
|     # y.get_device() == 0 |  | ||||||
|  |     x = torch.tensor([1., 2.], device=cuda0) | ||||||
|  |     # x.device is device(type='cuda', index=0) | ||||||
|  |     y = torch.tensor([1., 2.]).cuda() | ||||||
|  |     # y.device is device(type='cuda', index=0) | ||||||
|  |  | ||||||
|     with torch.cuda.device(1): |     with torch.cuda.device(1): | ||||||
|         # allocates a tensor on GPU 1 |         # allocates a tensor on GPU 1 | ||||||
|         a = torch.cuda.FloatTensor(1) |         a = torch.tensor([1., 2.], device=cuda) | ||||||
|  |  | ||||||
|         # transfers a tensor from CPU to GPU 1 |         # transfers a tensor from CPU to GPU 1 | ||||||
|         b = torch.FloatTensor(1).cuda() |         b = torch.tensor([1., 2.]).cuda() | ||||||
|         # a.get_device() == b.get_device() == 1 |         # a.device and b.device are device(type='cuda', index=1) | ||||||
|  |  | ||||||
|  |         # You can also use ``Tensor.to`` to transfer a tensor: | ||||||
|  |         b2 = torch.tensor([1., 2.]).to(device=cuda) | ||||||
|  |         # b.device and b2.device are device(type='cuda', index=1) | ||||||
|  |  | ||||||
|         c = a + b |         c = a + b | ||||||
|         # c.get_device() == 1 |         # c.device is device(type='cuda', index=1) | ||||||
|  |  | ||||||
|         z = x + y |         z = x + y | ||||||
|         # z.get_device() == 0 |         # z.device is device(type='cuda', index=0) | ||||||
|  |  | ||||||
|         # even within a context, you can give a GPU id to the .cuda call |         # even within a context, you can specify the device | ||||||
|         d = torch.randn(2).cuda(2) |         # (or give a GPU index to the .cuda call) | ||||||
|         # d.get_device() == 2 |         d = torch.randn(2, device=cuda2) | ||||||
|  |         e = torch.randn(2).to(cuda2) | ||||||
|  |         f = torch.randn(2).cuda(cuda2) | ||||||
|  |         # d.device, e.device, and f.device are all device(type='cuda', index=2) | ||||||
|  |  | ||||||
| Asynchronous execution | Asynchronous execution | ||||||
| ---------------------- | ---------------------- | ||||||
| @ -79,8 +91,9 @@ relative order, unless explicit synchronization functions (such as | |||||||
| :meth:`~torch.cuda.synchronize` or :meth:`~torch.cuda.Stream.wait_stream`) are | :meth:`~torch.cuda.synchronize` or :meth:`~torch.cuda.Stream.wait_stream`) are | ||||||
| used.  For example, the following code is incorrect:: | used.  For example, the following code is incorrect:: | ||||||
|  |  | ||||||
|  |     cuda = torch.device('cuda') | ||||||
|     s = torch.cuda.stream()  # Create a new stream. |     s = torch.cuda.stream()  # Create a new stream. | ||||||
|     A = torch.cuda.FloatTensor(100, 100).normal_(0.0, 1.0) |     A = torch.empty((100, 100), device=cuda).normal_(0.0, 1.0) | ||||||
|     with torch.cuda.stream(s): |     with torch.cuda.stream(s): | ||||||
|         # sum() may start execution before normal_() finishes! |         # sum() may start execution before normal_() finishes! | ||||||
|         B = torch.sum(A) |         B = torch.sum(A) | ||||||
| @ -122,8 +135,10 @@ the initial hidden state of a recurrent neural network. | |||||||
| The first step is to determine whether the GPU should be used or not. A common | The first step is to determine whether the GPU should be used or not. A common | ||||||
| pattern is to use Python's ``argparse`` module to read in user arguments, and | pattern is to use Python's ``argparse`` module to read in user arguments, and | ||||||
| have a flag that can be used to disable CUDA, in combination with | have a flag that can be used to disable CUDA, in combination with | ||||||
| :meth:`~torch.cuda.is_available`. In the following, ``args.cuda`` results in a | :meth:`~torch.cuda.is_available`. In the following, ``args.device`` results in a | ||||||
| flag that can be used to cast tensors and modules to CUDA if desired:: | :class:`torch.device` object that can be used to move tensors to CPU or CUDA. | ||||||
|  |  | ||||||
|  | :: | ||||||
|  |  | ||||||
|     import argparse |     import argparse | ||||||
|     import torch |     import torch | ||||||
| @ -132,29 +147,35 @@ flag that can be used to cast tensors and modules to CUDA if desired:: | |||||||
|     parser.add_argument('--disable-cuda', action='store_true', |     parser.add_argument('--disable-cuda', action='store_true', | ||||||
|                         help='Disable CUDA') |                         help='Disable CUDA') | ||||||
|     args = parser.parse_args() |     args = parser.parse_args() | ||||||
|     args.cuda = not args.disable_cuda and torch.cuda.is_available() |     args.device = None | ||||||
|  |     if not args.disable_cuda and torch.cuda.is_available(): | ||||||
|  |         args.device = torch.device('cuda') | ||||||
|  |     else: | ||||||
|  |         args.device = torch.device('cpu') | ||||||
|  |  | ||||||
| If modules or tensors need to be sent to the GPU, ``args.cuda`` can be used as | Now that we have ``args.device``, we can use it to create a Tensor on the | ||||||
| follows:: | desired device. | ||||||
|  |  | ||||||
|     x = torch.Tensor(8, 42) | :: | ||||||
|     net = Network() |  | ||||||
|     if args.cuda: |  | ||||||
|       x = x.cuda() |  | ||||||
|       net.cuda() |  | ||||||
|  |  | ||||||
| When creating tensors, an alternative to the if statement is to have a default |     x = torch.empty((8, 42), device=args.device) | ||||||
| datatype defined, and cast all tensors using that. An example when using a |     net = Network().to(device=args.device) | ||||||
| dataloader would be as follows:: |  | ||||||
|  |  | ||||||
|     dtype = torch.cuda.FloatTensor | This can be used in a number of cases to produce device agnostic code. Below | ||||||
|  | is an example when using a dataloader: | ||||||
|  |  | ||||||
|  | :: | ||||||
|  |  | ||||||
|  |     cuda0 = torch.device('cuda:0')  # CUDA GPU 0 | ||||||
|     for i, x in enumerate(train_loader): |     for i, x in enumerate(train_loader): | ||||||
|         x = Variable(x.type(dtype)) |         x = x.to(cuda0) | ||||||
|  |  | ||||||
| When working with multiple GPUs on a system, you can use the | When working with multiple GPUs on a system, you can use the | ||||||
| ``CUDA_VISIBLE_DEVICES`` environment flag to manage which GPUs are available to | ``CUDA_VISIBLE_DEVICES`` environment flag to manage which GPUs are available to | ||||||
| PyTorch. As mentioned above, to manually control which GPU a tensor is created | PyTorch. As mentioned above, to manually control which GPU a tensor is created | ||||||
| on, the best practice is to use a :any:`torch.cuda.device` context manager:: | on, the best practice is to use a :any:`torch.cuda.device` context manager. | ||||||
|  |  | ||||||
|  | :: | ||||||
|  |  | ||||||
|     print("Outside device is 0")  # On device 0 (default in most scenarios) |     print("Outside device is 0")  # On device 0 (default in most scenarios) | ||||||
|     with torch.cuda.device(1): |     with torch.cuda.device(1): | ||||||
| @ -162,29 +183,52 @@ on, the best practice is to use a :any:`torch.cuda.device` context manager:: | |||||||
|     print("Outside device is still 0")  # On device 0 |     print("Outside device is still 0")  # On device 0 | ||||||
|  |  | ||||||
| If you have a tensor and would like to create a new tensor of the same type on | If you have a tensor and would like to create a new tensor of the same type on | ||||||
| the same device, then you can use the :meth:`~torch.Tensor.new` method, which | the same device, then you can use a ``torch.Tensor.new_*`` method | ||||||
| acts the same as a normal tensor constructor. Whilst the previously mentioned | (see :class:`torch.Tensor`). | ||||||
| methods depend on the current GPU context, :meth:`~torch.Tensor.new` preserves | Whilst the previously mentioned ``torch.*`` factory functions | ||||||
| the device of the original tensor. | (:ref:`tensor-creation-ops`) depend on the current GPU context and | ||||||
|  | the attributes arguments you pass in, ``torch.Tensor.new_*`` methods preserve | ||||||
|  | the device and other attributes of the tensor. | ||||||
|  |  | ||||||
| This is the recommended practice when creating modules in which new | This is the recommended practice when creating modules in which new | ||||||
| tensors/variables need to be created internally during the forward pass:: | tensors need to be created internally during the forward pass. | ||||||
|  |  | ||||||
|     x_cpu = torch.FloatTensor(1) | :: | ||||||
|     x_gpu = torch.cuda.FloatTensor(1) |  | ||||||
|     x_cpu_long = torch.LongTensor(1) |     cuda = torch.device('cuda') | ||||||
|  |     x_cpu = torch.empty(2) | ||||||
|  |     x_gpu = torch.empty(2, device=cuda) | ||||||
|  |     x_cpu_long = torch.empty(2, dtype=torch.int64) | ||||||
|  |  | ||||||
|  |     y_cpu = x_cpu.new_full([3, 2], fill_value=0.3) | ||||||
|  |     print(y_cpu) | ||||||
|  |  | ||||||
|  |         tensor([[ 0.3000,  0.3000], | ||||||
|  |                 [ 0.3000,  0.3000], | ||||||
|  |                 [ 0.3000,  0.3000]]) | ||||||
|  |  | ||||||
|  |     y_gpu = x_gpu.new_full([3, 2], fill_value=-5) | ||||||
|  |     print(y_gpu) | ||||||
|  |  | ||||||
|  |         tensor([[-5.0000, -5.0000], | ||||||
|  |                 [-5.0000, -5.0000], | ||||||
|  |                 [-5.0000, -5.0000]], device='cuda:0') | ||||||
|  |  | ||||||
|  |     y_cpu_long = x_cpu_long.new_tensor([[1, 2, 3]]) | ||||||
|  |     print(y_cpu_long) | ||||||
|  |  | ||||||
|  |         tensor([[ 1,  2,  3]]) | ||||||
|  |  | ||||||
|     y_cpu = x_cpu.new(8, 10, 10).fill_(0.3) |  | ||||||
|     y_gpu = x_gpu.new(x_gpu.size()).fill_(-5) |  | ||||||
|     y_cpu_long = x_cpu_long.new([[1, 2, 3]]) |  | ||||||
|  |  | ||||||
| If you want to create a tensor of the same type and size of another tensor, and | If you want to create a tensor of the same type and size of another tensor, and | ||||||
| fill it with either ones or zeros, :meth:`~torch.ones_like` or | fill it with either ones or zeros, :meth:`~torch.ones_like` or | ||||||
| :meth:`~torch.zeros_like` are provided as convenient helper functions (which | :meth:`~torch.zeros_like` are provided as convenient helper functions (which | ||||||
| also preserve device):: | also preserve :class:`torch.device` and :class:`torch.dtype` of a Tensor). | ||||||
|  |  | ||||||
|     x_cpu = torch.FloatTensor(1) | :: | ||||||
|     x_gpu = torch.cuda.FloatTensor(1) |  | ||||||
|  |     x_cpu = torch.empty(2, 3) | ||||||
|  |     x_gpu = torch.empty(2, 3) | ||||||
|  |  | ||||||
|     y_cpu = torch.ones_like(x_cpu) |     y_cpu = torch.ones_like(x_cpu) | ||||||
|     y_gpu = torch.zeros_like(x_gpu) |     y_gpu = torch.zeros_like(x_gpu) | ||||||
| @ -204,7 +248,7 @@ memory. CPU tensors and storages expose a :meth:`~torch.Tensor.pin_memory` | |||||||
| method, that returns a copy of the object, with data put in a pinned region. | method, that returns a copy of the object, with data put in a pinned region. | ||||||
|  |  | ||||||
| Also, once you pin a tensor or storage, you can use asynchronous GPU copies. | Also, once you pin a tensor or storage, you can use asynchronous GPU copies. | ||||||
| Just pass an additional ``async=True`` argument to a :meth:`~torch.Tensor.cuda` | Just pass an additional ``non_blocking=True`` argument to a :meth:`~torch.Tensor.cuda` | ||||||
| call. This can be used to overlap data transfers with computation. | call. This can be used to overlap data transfers with computation. | ||||||
|  |  | ||||||
| You can make the :class:`~torch.utils.data.DataLoader` return batches placed in | You can make the :class:`~torch.utils.data.DataLoader` return batches placed in | ||||||
|  | |||||||
| @ -9,8 +9,8 @@ memory and will only send a handle to another process. | |||||||
|  |  | ||||||
| .. note:: | .. note:: | ||||||
|  |  | ||||||
|     When a :class:`~torch.autograd.Variable` is sent to another process, both |     When a :class:`~torch.Tensor` is sent to another process, both | ||||||
|     the :attr:`Variable.data` and :attr:`Variable.grad.data` are going to be |     the :attr:`~torch.Tensor` data and :attr:`torch.Tensor.grad` are going to be | ||||||
|     shared. |     shared. | ||||||
|  |  | ||||||
| This allows to implement various training methods, like Hogwild, A3C, or any | This allows to implement various training methods, like Hogwild, A3C, or any | ||||||
|  | |||||||
							
								
								
									
										261
									
								
								docs/source/notes/windows.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										261
									
								
								docs/source/notes/windows.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,261 @@ | |||||||
|  | Windows FAQ | ||||||
|  | ========================== | ||||||
|  |  | ||||||
|  | Building from source | ||||||
|  | -------------------- | ||||||
|  |  | ||||||
|  | Include optional components | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | There are two supported components for Windows PyTorch: | ||||||
|  | MKL and MAGMA. Here are the steps to build with them. | ||||||
|  |  | ||||||
|  | .. code-block:: bat | ||||||
|  |  | ||||||
|  |     REM Make sure you have 7z and curl installed. | ||||||
|  |  | ||||||
|  |     REM Download MKL files | ||||||
|  |     curl https://s3.amazonaws.com/ossci-windows/mkl_2018.2.185.7z -k -O | ||||||
|  |     7z x -aoa mkl_2018.2.185.7z -omkl | ||||||
|  |  | ||||||
|  |     REM Download MAGMA files | ||||||
|  |     REM cuda90/cuda91 is also available in the following line. | ||||||
|  |     set CUDA_PREFIX=cuda80  | ||||||
|  |     curl -k https://s3.amazonaws.com/ossci-windows/magma_%CUDA_PREFIX%_release_mkl_2018.2.185.7z -o magma.7z | ||||||
|  |     7z x -aoa magma.7z -omagma | ||||||
|  |      | ||||||
|  |     REM Setting essential environment variables | ||||||
|  |     set "CMAKE_INCLUDE_PATH=%cd%\\mkl\\include" | ||||||
|  |     set "LIB=%cd%\\mkl\\lib;%LIB%" | ||||||
|  |     set "MAGMA_HOME=%cd%\\magma" | ||||||
|  |  | ||||||
|  | Speeding CUDA build for Windows | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | Visual Studio doesn't support parallel custom task currently. | ||||||
|  | As an alternative, we can use ``Ninja`` to parallelize CUDA | ||||||
|  | build tasks. It can be used by typing only a few lines of code. | ||||||
|  |  | ||||||
|  | .. code-block:: bat | ||||||
|  |      | ||||||
|  |     REM Let's install ninja first. | ||||||
|  |     pip install ninja | ||||||
|  |  | ||||||
|  |     REM Set it as the cmake generator | ||||||
|  |     set CMAKE_GENERATOR=Ninja | ||||||
|  |  | ||||||
|  |  | ||||||
|  | One key install script | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | You can take a look at the script `here | ||||||
|  | <https://github.com/peterjc123/pytorch-scripts>`_.  | ||||||
|  | It will lead the way for you. | ||||||
|  |  | ||||||
|  | Extension | ||||||
|  | --------- | ||||||
|  |  | ||||||
|  | CFFI Extension | ||||||
|  | ^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | The support for CFFI Extension is very experimental. There're  | ||||||
|  | generally two steps to enable it under Windows. | ||||||
|  |  | ||||||
|  | First, specify additional ``libraries`` in ``Extension`` | ||||||
|  | object to make it build on Windows. | ||||||
|  |  | ||||||
|  | .. code-block:: python | ||||||
|  |  | ||||||
|  |    ffi = create_extension( | ||||||
|  |        '_ext.my_lib', | ||||||
|  |        headers=headers, | ||||||
|  |        sources=sources, | ||||||
|  |        define_macros=defines, | ||||||
|  |        relative_to=__file__, | ||||||
|  |        with_cuda=with_cuda, | ||||||
|  |        extra_compile_args=["-std=c99"], | ||||||
|  |        libraries=['ATen', '_C'] # Append cuda libaries when necessary, like cudart | ||||||
|  |    ) | ||||||
|  |  | ||||||
|  | Second, here is a workground for "unresolved external symbol  | ||||||
|  | state caused by ``extern THCState *state;``" | ||||||
|  |  | ||||||
|  | Change the source code from C to C++. An example is listed below. | ||||||
|  |  | ||||||
|  | .. code-block:: cpp | ||||||
|  |  | ||||||
|  |     #include <THC/THC.h> | ||||||
|  |     #include <ATen/ATen.h> | ||||||
|  |  | ||||||
|  |     THCState *state = at::globalContext().thc_state; | ||||||
|  |  | ||||||
|  |     extern "C" int my_lib_add_forward_cuda(THCudaTensor *input1, THCudaTensor *input2, | ||||||
|  |                                             THCudaTensor *output) | ||||||
|  |     { | ||||||
|  |         if (!THCudaTensor_isSameSizeAs(state, input1, input2)) | ||||||
|  |         return 0; | ||||||
|  |         THCudaTensor_resizeAs(state, output, input1); | ||||||
|  |         THCudaTensor_cadd(state, output, input1, 1.0, input2); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  |     extern "C" int my_lib_add_backward_cuda(THCudaTensor *grad_output, THCudaTensor *grad_input) | ||||||
|  |     { | ||||||
|  |         THCudaTensor_resizeAs(state, grad_input, grad_output); | ||||||
|  |         THCudaTensor_fill(state, grad_input, 1); | ||||||
|  |         return 1; | ||||||
|  |     } | ||||||
|  |  | ||||||
|  | Cpp Extension | ||||||
|  | ^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | This type of extension has better support compared with | ||||||
|  | the previous one. However, it still needs some manual | ||||||
|  | configuration. First, you should open the | ||||||
|  | **x86_x64 Cross Tools Command Prompt for VS 2017**. | ||||||
|  | And then, you can open the Git-Bash in it. It is | ||||||
|  | usually located in ``C:\Program Files\Git\git-bash.exe``. | ||||||
|  | Finally, you can start your compiling process. | ||||||
|  |  | ||||||
|  | Installation | ||||||
|  | ------------ | ||||||
|  |  | ||||||
|  | Package not found in win-32 channel. | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | .. code-block:: bat | ||||||
|  |  | ||||||
|  |     Solving environment: failed | ||||||
|  |  | ||||||
|  |     PackagesNotFoundError: The following packages are not available from current channels: | ||||||
|  |  | ||||||
|  |     - pytorch | ||||||
|  |  | ||||||
|  |     Current channels: | ||||||
|  |     - https://conda.anaconda.org/pytorch/win-32 | ||||||
|  |     - https://conda.anaconda.org/pytorch/noarch | ||||||
|  |     - https://repo.continuum.io/pkgs/main/win-32 | ||||||
|  |     - https://repo.continuum.io/pkgs/main/noarch | ||||||
|  |     - https://repo.continuum.io/pkgs/free/win-32 | ||||||
|  |     - https://repo.continuum.io/pkgs/free/noarch | ||||||
|  |     - https://repo.continuum.io/pkgs/r/win-32 | ||||||
|  |     - https://repo.continuum.io/pkgs/r/noarch | ||||||
|  |     - https://repo.continuum.io/pkgs/pro/win-32 | ||||||
|  |     - https://repo.continuum.io/pkgs/pro/noarch | ||||||
|  |     - https://repo.continuum.io/pkgs/msys2/win-32 | ||||||
|  |     - https://repo.continuum.io/pkgs/msys2/noarch | ||||||
|  |  | ||||||
|  | PyTorch doesn't work on 32-bit system. Please use Windows and | ||||||
|  | Python 64-bit version. | ||||||
|  |  | ||||||
|  | Why are there no Python 2 packages for Windows? | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | Because it's not stable enough. There're some issues that need to | ||||||
|  | be solved before we officially release it. You can build it by yourself. | ||||||
|  |  | ||||||
|  | Import error | ||||||
|  | ^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | .. code-block:: py3tb | ||||||
|  |  | ||||||
|  |     from torch._C import * | ||||||
|  |  | ||||||
|  |     ImportError: DLL load failed: The specified module could not be found. | ||||||
|  |  | ||||||
|  |  | ||||||
|  | The problem is caused by the missing of the essential files. Actually, | ||||||
|  | we include almost all the essential files that PyTorch need except VC2017 | ||||||
|  | redistributable. You can resolve this by typing the following command. | ||||||
|  |  | ||||||
|  | .. code-block:: bat | ||||||
|  |  | ||||||
|  |     conda install -c peterjc123 vc vs2017_runtime | ||||||
|  |  | ||||||
|  | Another possible cause may be you are using GPU version without NVIDIA | ||||||
|  | graphics cards. Please replace your GPU package with the CPU one. | ||||||
|  |  | ||||||
|  | Usage (multiprocessing) | ||||||
|  | ------------------------------------------------------- | ||||||
|  |  | ||||||
|  | Multiprocessing error without if-clause protection | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | .. code-block:: py3tb | ||||||
|  |  | ||||||
|  |     RuntimeError: | ||||||
|  |    	An attempt has been made to start a new process before the | ||||||
|  |    	current process has finished its bootstrapping phase. | ||||||
|  |  | ||||||
|  |        This probably means that you are not using fork to start your | ||||||
|  |        child processes and you have forgotten to use the proper idiom | ||||||
|  |        in the main module: | ||||||
|  |  | ||||||
|  |            if __name__ == '__main__': | ||||||
|  |                freeze_support() | ||||||
|  |                ... | ||||||
|  |  | ||||||
|  |        The "freeze_support()" line can be omitted if the program | ||||||
|  |        is not going to be frozen to produce an executable. | ||||||
|  |  | ||||||
|  | The implementation of ``multiprocessing`` is different on Windows, which | ||||||
|  | uses ``spawn`` instead of ``fork``. So we have to wrap the code with an | ||||||
|  | if-clause to protect the code from executing multiple times. Refactor | ||||||
|  | your code into the following structure. | ||||||
|  |  | ||||||
|  | .. code-block:: python | ||||||
|  |  | ||||||
|  |     import torch | ||||||
|  |  | ||||||
|  |     def main() | ||||||
|  |         for i, data in enumerate(dataloader): | ||||||
|  |             # do something here | ||||||
|  |  | ||||||
|  |     if __name__ == '__main__': | ||||||
|  |         main() | ||||||
|  |  | ||||||
|  |  | ||||||
|  | Multiprocessing error "Broken pipe" | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | .. code-block:: py3tb | ||||||
|  |  | ||||||
|  |     ForkingPickler(file, protocol).dump(obj) | ||||||
|  |  | ||||||
|  |     BrokenPipeError: [Errno 32] Broken pipe | ||||||
|  |  | ||||||
|  | This issue happens when the child process ends before the parent process | ||||||
|  | finishes sending data. There may be something wrong with your code. You | ||||||
|  | can debug your code by reducing the ``num_worker`` of  | ||||||
|  | :class:`~torch.utils.data.DataLoader` to zero and see if the issue persists. | ||||||
|  |  | ||||||
|  | Multiprocessing error "driver shut down" | ||||||
|  | ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | .. code-block:: py3tb | ||||||
|  |  | ||||||
|  |     Couldn’t open shared file mapping: <torch_14808_1591070686>, error code: <1455> at torch\lib\TH\THAllocator.c:154 | ||||||
|  |  | ||||||
|  |     [windows] driver shut down | ||||||
|  |  | ||||||
|  | Please update your graphics driver. If this persists, this may be that your | ||||||
|  | graphics card is too old or the calculation is too heavy for your card. Please | ||||||
|  | update the TDR settings according to this `post | ||||||
|  | <https://www.pugetsystems.com/labs/hpc/Working-around-TDR-in-Windows-for-a-better-GPU-computing-experience-777/>`_. | ||||||
|  |  | ||||||
|  | CUDA IPC operations | ||||||
|  | ^^^^^^^^^^^^^^^^^^^ | ||||||
|  |  | ||||||
|  | .. code-block:: py3tb | ||||||
|  |  | ||||||
|  |    THCudaCheck FAIL file=torch\csrc\generic\StorageSharing.cpp line=252 error=63 : OS call failed or operation not supported on this OS | ||||||
|  |  | ||||||
|  | They are not supported on Windows. Something like doing multiprocessing on CUDA | ||||||
|  | tensors cannot succeed, there are two alternatives for this. | ||||||
|  |  | ||||||
|  | 1. Don't use ``multiprocessing``. Set the ``num_worker`` of  | ||||||
|  | :class:`~torch.utils.data.DataLoader` to zero. | ||||||
|  |  | ||||||
|  | 2. Share CPU tensors instead. Make sure your custom | ||||||
|  | :class:`~torch.utils.data.DataSet` returns CPU tensors. | ||||||
|  |  | ||||||
| @ -1,5 +1,7 @@ | |||||||
| .. currentmodule:: torch.sparse | .. currentmodule:: torch.sparse | ||||||
|  |  | ||||||
|  | .. _sparse-docs: | ||||||
|  |  | ||||||
| torch.sparse | torch.sparse | ||||||
| ============ | ============ | ||||||
|  |  | ||||||
|  | |||||||
							
								
								
									
										131
									
								
								docs/source/tensor_attributes.rst
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										131
									
								
								docs/source/tensor_attributes.rst
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,131 @@ | |||||||
|  | .. currentmodule:: torch | ||||||
|  |  | ||||||
|  | .. _tensor-attributes-doc: | ||||||
|  |  | ||||||
|  | Tensor Attributes | ||||||
|  | ================= | ||||||
|  |  | ||||||
|  | Each ``torch.Tensor`` has a :class:`torch.dtype`, :class:`torch.device`, and :class:`torch.layout`. | ||||||
|  |  | ||||||
|  | .. _dtype-doc: | ||||||
|  |  | ||||||
|  | torch.dtype | ||||||
|  | ----------- | ||||||
|  |  | ||||||
|  | .. class:: torch.dtype | ||||||
|  |  | ||||||
|  | A :class:`torch.dtype` is an object that represents the data type of a | ||||||
|  | :class:`torch.Tensor`. PyTorch has eight different data types: | ||||||
|  |  | ||||||
|  | ========================   ===========================================   =========================== | ||||||
|  | Data type                  dtype                                         Tensor types | ||||||
|  | ========================   ===========================================   =========================== | ||||||
|  | 32-bit floating point      ``torch.float32`` or ``torch.float``          ``torch.*.FloatTensor`` | ||||||
|  | 64-bit floating point      ``torch.float64`` or ``torch.double``         ``torch.*.DoubleTensor`` | ||||||
|  | 16-bit floating point      ``torch.float16`` or ``torch.half``           ``torch.*.HalfTensor`` | ||||||
|  | 8-bit integer (unsigned)   ``torch.uint8``                               ``torch.*.ByteTensor`` | ||||||
|  | 8-bit integer (signed)     ``torch.int8``                                ``torch.*.CharTensor`` | ||||||
|  | 16-bit integer (signed)    ``torch.int16`` or ``torch.short``            ``torch.*.ShortTensor`` | ||||||
|  | 32-bit integer (signed)    ``torch.int32`` or ``torch.int``              ``torch.*.IntTensor`` | ||||||
|  | 64-bit integer (signed)    ``torch.int64`` or ``torch.long``             ``torch.*.LongTensor`` | ||||||
|  | ========================   ===========================================   =========================== | ||||||
|  |  | ||||||
|  | .. _device-doc: | ||||||
|  |  | ||||||
|  | torch.device | ||||||
|  | ------------ | ||||||
|  |  | ||||||
|  | .. class:: torch.device | ||||||
|  |  | ||||||
|  | A :class:`torch.device` is an object representing the device on which a :class:`torch.Tensor` is | ||||||
|  | or will be allocated. | ||||||
|  |  | ||||||
|  | The :class:`torch.device` contains a device type (``'cpu'`` or ``'cuda'``) and optional device ordinal for the | ||||||
|  | device type.  If the device ordinal is not present, this represents the current device for the device type; | ||||||
|  | e.g. a :class:`torch.Tensor` constructed with device ``'cuda'`` is equivalent to ``'cuda:X'`` where X is the result of | ||||||
|  | :func:`torch.cuda.current_device()`. | ||||||
|  |  | ||||||
|  | A :class:`torch.Tensor`'s device can be accessed via the :attr:`Tensor.device` property. | ||||||
|  |  | ||||||
|  | A :class:`torch.device` can be constructed via a string or via a string and device ordinal | ||||||
|  |  | ||||||
|  | Via a string: | ||||||
|  | :: | ||||||
|  |  | ||||||
|  |     >>> torch.device('cuda:0') | ||||||
|  |     device(type='cuda', index=0) | ||||||
|  |  | ||||||
|  |     >>> torch.device('cpu') | ||||||
|  |     device(type='cpu') | ||||||
|  |  | ||||||
|  |     >>> torch.device('cuda')  # current cuda device | ||||||
|  |     device(type='cuda') | ||||||
|  |  | ||||||
|  | Via a string and device ordinal: | ||||||
|  |  | ||||||
|  | :: | ||||||
|  |  | ||||||
|  |     >>> torch.device('cuda', 0) | ||||||
|  |     device(type='cuda', index=0) | ||||||
|  |  | ||||||
|  |     >>> torch.device('cpu', 0) | ||||||
|  |     device(type='cpu', index=0) | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |    The :class:`torch.device` argument in functions can generally be substituted with a string. | ||||||
|  |    This allows for fast prototyping of code. | ||||||
|  |  | ||||||
|  |    >>> # Example of a function that takes in a torch.device | ||||||
|  |    >>> cuda1 = torch.device('cuda:1') | ||||||
|  |    >>> torch.randn((2,3), device=cuda1) | ||||||
|  |  | ||||||
|  |    >>> # You can substitute the torch.device with a string | ||||||
|  |    >>> torch.randn((2,3), 'cuda:1') | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |    For legacy reasons, a device can be constructed via a single device ordinal, which is treated | ||||||
|  |    as a cuda device.  This matches :meth:`Tensor.get_device`, which returns an ordinal for cuda | ||||||
|  |    tensors and is not supported for cpu tensors. | ||||||
|  |  | ||||||
|  |    >>> torch.device(1) | ||||||
|  |    device(type='cuda', index=1) | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |    Methods which take a device will generally accept a (properly formatted) string | ||||||
|  |    or (legacy) integer device ordinal, i.e. the following are all equivalent: | ||||||
|  |  | ||||||
|  |    >>> torch.randn((2,3), device=torch.device('cuda:1')) | ||||||
|  |    >>> torch.randn((2,3), device='cuda:1') | ||||||
|  |    >>> torch.randn((2,3), device=1)  # legacy | ||||||
|  |  | ||||||
|  |  | ||||||
|  | .. _layout-doc: | ||||||
|  |  | ||||||
|  | torch.layout | ||||||
|  | ------------ | ||||||
|  |  | ||||||
|  | .. class:: torch.layout | ||||||
|  |  | ||||||
|  | A :class:`torch.layout` is an object that represents the memory layout of a | ||||||
|  | :class:`torch.Tensor`. Currently, we support ``torch.strided`` (dense Tensors) | ||||||
|  | and have experimental support for ``torch.sparse_coo`` (sparse COO Tensors). | ||||||
|  |  | ||||||
|  | ``torch.strided`` represents dense Tensors and is the memory layout that | ||||||
|  | is most commonly used. Each strided tensor has an associated | ||||||
|  | :class:`torch.Storage`, which holds its data. These tensors provide | ||||||
|  | multi-dimensional, `strided <https://en.wikipedia.org/wiki/Stride_of_an_array>`_ | ||||||
|  | view of a storage. Strides are a list of integers: the k-th stride | ||||||
|  | represents the jump in the memory necessary to go from one element to the | ||||||
|  | next one in the k-th dimension of the Tensor. This concept makes it possible | ||||||
|  | to perform many tensor operations efficiently. | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> x = torch.Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) | ||||||
|  |     >>> x.stride() | ||||||
|  |     (5, 1) | ||||||
|  |  | ||||||
|  |     >>> x.t().stride() | ||||||
|  |     (1, 5) | ||||||
|  |  | ||||||
|  | For more information on ``torch.sparse_coo`` tensors, see :ref:`sparse-docs`. | ||||||
| @ -10,18 +10,18 @@ a single data type. | |||||||
|  |  | ||||||
| Torch defines eight CPU tensor types and eight GPU tensor types: | Torch defines eight CPU tensor types and eight GPU tensor types: | ||||||
|  |  | ||||||
| ========================   ===================   ===========================   ================================ | ========================   ===========================================   ===========================   ================================ | ||||||
| Data type                  dtype                         CPU tensor                    GPU tensor | Data type                  dtype                                         CPU tensor                    GPU tensor | ||||||
| ========================   ===================   ===========================   ================================ | ========================   ===========================================   ===========================   ================================ | ||||||
| 32-bit floating point      ``torch.float32``     :class:`torch.FloatTensor`    :class:`torch.cuda.FloatTensor` | 32-bit floating point      ``torch.float32`` or ``torch.float``          :class:`torch.FloatTensor`    :class:`torch.cuda.FloatTensor` | ||||||
| 64-bit floating point      ``torch.float64``     :class:`torch.DoubleTensor`   :class:`torch.cuda.DoubleTensor` | 64-bit floating point      ``torch.float64`` or ``torch.double``         :class:`torch.DoubleTensor`   :class:`torch.cuda.DoubleTensor` | ||||||
| 16-bit floating point      ``torch.float16``     :class:`torch.HalfTensor`     :class:`torch.cuda.HalfTensor` | 16-bit floating point      ``torch.float16`` or ``torch.half``           :class:`torch.HalfTensor`     :class:`torch.cuda.HalfTensor` | ||||||
| 8-bit integer (unsigned)   ``torch.uint8``       :class:`torch.ByteTensor`     :class:`torch.cuda.ByteTensor` | 8-bit integer (unsigned)   ``torch.uint8``                               :class:`torch.ByteTensor`     :class:`torch.cuda.ByteTensor` | ||||||
| 8-bit integer (signed)     ``torch.int8``        :class:`torch.CharTensor`     :class:`torch.cuda.CharTensor` | 8-bit integer (signed)     ``torch.int8``                                :class:`torch.CharTensor`     :class:`torch.cuda.CharTensor` | ||||||
| 16-bit integer (signed)    ``torch.int16``       :class:`torch.ShortTensor`    :class:`torch.cuda.ShortTensor` | 16-bit integer (signed)    ``torch.int16`` or ``torch.short``            :class:`torch.ShortTensor`    :class:`torch.cuda.ShortTensor` | ||||||
| 32-bit integer (signed)    ``torch.int32``       :class:`torch.IntTensor`      :class:`torch.cuda.IntTensor` | 32-bit integer (signed)    ``torch.int32`` or ``torch.int``              :class:`torch.IntTensor`      :class:`torch.cuda.IntTensor` | ||||||
| 64-bit integer (signed)    ``torch.int64``       :class:`torch.LongTensor`     :class:`torch.cuda.LongTensor` | 64-bit integer (signed)    ``torch.int64`` or ``torch.long``             :class:`torch.LongTensor`     :class:`torch.cuda.LongTensor` | ||||||
| ========================   ===================   ===========================   ================================ | ========================   ===========================================   ===========================   ================================ | ||||||
|  |  | ||||||
| :class:`torch.Tensor` is an alias for the default tensor type (:class:`torch.FloatTensor`). | :class:`torch.Tensor` is an alias for the default tensor type (:class:`torch.FloatTensor`). | ||||||
|  |  | ||||||
| @ -31,16 +31,20 @@ A tensor can be constructed from a Python :class:`list` or sequence using the | |||||||
| :: | :: | ||||||
|  |  | ||||||
|     >>> torch.tensor([[1., -1.], [1., -1.]]) |     >>> torch.tensor([[1., -1.], [1., -1.]]) | ||||||
|  |     tensor([[ 1.0000, -1.0000], | ||||||
|      1 -1 |             [ 1.0000, -1.0000]]) | ||||||
|      1 -1 |  | ||||||
|     [torch.FloatTensor of size (2,2)] |  | ||||||
|  |  | ||||||
|     >>> torch.tensor(np.array([[1, 2, 3], [4, 5, 6]])) |     >>> torch.tensor(np.array([[1, 2, 3], [4, 5, 6]])) | ||||||
|  |     tensor([[ 1,  2,  3], | ||||||
|  |             [ 4,  5,  6]]) | ||||||
|  |  | ||||||
|      1 -1 | .. warning:: | ||||||
|      1 -1 |  | ||||||
|     [torch.FloatTensor of size (2,2)] |     :func:`torch.tensor` always copies :attr:`data`. If you have a Tensor | ||||||
|  |     :attr:`data` and just want to change its ``requires_grad`` flag, use | ||||||
|  |     :meth:`~torch.Tensor.requires_grad_` or | ||||||
|  |     :meth:`~torch.Tensor.detach` to avoid a copy. | ||||||
|  |     If you have a numpy array and want to avoid a copy, use | ||||||
|  |     :func:`torch.from_numpy`. | ||||||
|  |  | ||||||
| An tensor of specific data type can be constructed by passing a | An tensor of specific data type can be constructed by passing a | ||||||
| :class:`torch.dtype` and/or a :class:`torch.device` to a | :class:`torch.dtype` and/or a :class:`torch.device` to a | ||||||
| @ -49,16 +53,12 @@ constructor or tensor creation op: | |||||||
| :: | :: | ||||||
|  |  | ||||||
|     >>> torch.zeros([2, 4], dtype=torch.int32) |     >>> torch.zeros([2, 4], dtype=torch.int32) | ||||||
|  |     tensor([[ 0,  0,  0,  0], | ||||||
|     0  0  0  0 |             [ 0,  0,  0,  0]], dtype=torch.int32) | ||||||
|     0  0  0  0 |     >>> cuda0 = torch.device('cuda:0') | ||||||
|     [torch.IntTensor of size 2x4] |     >>> torch.ones([2, 4], dtype=torch.float64, device=cuda0) | ||||||
|  |     tensor([[ 1.0000,  1.0000,  1.0000,  1.0000], | ||||||
|     >>> torch.ones([2, 4], dtype=torch.float64, device=torch.device('cuda:0')) |             [ 1.0000,  1.0000,  1.0000,  1.0000]], dtype=torch.float64, device='cuda:0') | ||||||
|  |  | ||||||
|     1  1  1  1 |  | ||||||
|     1  1  1  1 |  | ||||||
|     [torch.cuda.DoubleTensor of size 2x4] |  | ||||||
|  |  | ||||||
| The contents of a tensor can be accessed and modified using Python's indexing | The contents of a tensor can be accessed and modified using Python's indexing | ||||||
| and slicing notation: | and slicing notation: | ||||||
| @ -67,14 +67,27 @@ and slicing notation: | |||||||
|  |  | ||||||
|     >>> x = torch.tensor([[1, 2, 3], [4, 5, 6]]) |     >>> x = torch.tensor([[1, 2, 3], [4, 5, 6]]) | ||||||
|     >>> print(x[1][2]) |     >>> print(x[1][2]) | ||||||
|  |     tensor(6) | ||||||
|     6.0 |  | ||||||
|     >>> x[0][1] = 8 |     >>> x[0][1] = 8 | ||||||
|     >>> print(x) |     >>> print(x) | ||||||
|  |     tensor([[ 1,  8,  3], | ||||||
|  |             [ 4,  5,  6]]) | ||||||
|  |  | ||||||
|      1  8  3 | Use :meth:`torch.Tensor.item` to get a Python number from a tensor containing a | ||||||
|      4  5  6 | single value: | ||||||
|     [torch.FloatTensor of size 2x3] |  | ||||||
|  | :: | ||||||
|  |  | ||||||
|  |     >>> x = torch.tensor([[1]]) | ||||||
|  |     >>> x | ||||||
|  |     tensor([[ 1]]) | ||||||
|  |     >>> x.item() | ||||||
|  |     1 | ||||||
|  |     >>> x = torch.tensor(2.5) | ||||||
|  |     >>> x | ||||||
|  |     tensor(2.5000) | ||||||
|  |     >>> x.item() | ||||||
|  |     2.5 | ||||||
|  |  | ||||||
| A tensor can be created with :attr:`requires_grad=True` so that | A tensor can be created with :attr:`requires_grad=True` so that | ||||||
| :mod:`torch.autograd` records operations on them for automatic differentiation. | :mod:`torch.autograd` records operations on them for automatic differentiation. | ||||||
| @ -84,26 +97,47 @@ A tensor can be created with :attr:`requires_grad=True` so that | |||||||
|     >>> x = torch.tensor([[1., -1.], [1., 1.]], requires_grad=True) |     >>> x = torch.tensor([[1., -1.], [1., 1.]], requires_grad=True) | ||||||
|     >>> out = x.pow(2).sum() |     >>> out = x.pow(2).sum() | ||||||
|     >>> out.backward() |     >>> out.backward() | ||||||
|     >>> out.grad |     >>> x.grad | ||||||
|  |     tensor([[ 2.0000, -2.0000], | ||||||
|      2 -2 |             [ 2.0000,  2.0000]]) | ||||||
|      2  2 |  | ||||||
|     [torch.FloatTensor of size (2,2)] |  | ||||||
|  |  | ||||||
| Each tensor has an associated :class:`torch.Storage`, which holds its data. | Each tensor has an associated :class:`torch.Storage`, which holds its data. | ||||||
| The tensor class provides multi-dimensional, `strided <https://en.wikipedia.org/wiki/Stride_of_an_array>`_ | The tensor class provides multi-dimensional, `strided <https://en.wikipedia.org/wiki/Stride_of_an_array>`_ | ||||||
| view of a storage and defines numeric operations on it. | view of a storage and defines numeric operations on it. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |    For more information on the :class:`torch.dtype`, :class:`torch.device`, and | ||||||
|  |    :class:`torch.layout` attributes of a :class:`torch.Tensor`, see | ||||||
|  |    :ref:`tensor-attributes-doc`. | ||||||
|  |  | ||||||
| .. note:: | .. note:: | ||||||
|    Methods which mutate a tensor are marked with an underscore suffix. |    Methods which mutate a tensor are marked with an underscore suffix. | ||||||
|    For example, :func:`torch.FloatTensor.abs_` computes the absolute value |    For example, :func:`torch.FloatTensor.abs_` computes the absolute value | ||||||
|    in-place and returns the modified tensor, while :func:`torch.FloatTensor.abs` |    in-place and returns the modified tensor, while :func:`torch.FloatTensor.abs` | ||||||
|    computes the result in a new tensor. |    computes the result in a new tensor. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |     To change an existing tensor's :class:`torch.device` and/or :class:`torch.dtype`, consider using | ||||||
|  |     :meth:`~torch.Tensor.to` method on the tensor. | ||||||
|  |  | ||||||
| .. class:: Tensor() | .. class:: Tensor() | ||||||
|  |  | ||||||
|   Create a tensor using the :func:`torch.tensor` constructor or with |    There are a few main ways to create a tensor, depending on your use case. | ||||||
|   tensor creation ops (see :ref:`tensor-creation-ops`) |  | ||||||
|  |    - To create a tensor with pre-existing data, use :func:`torch.tensor`. | ||||||
|  |    - To create a tensor with specific size, use ``torch.*`` tensor creation | ||||||
|  |      ops (see :ref:`tensor-creation-ops`). | ||||||
|  |    - To create a tensor with the same size (and similar types) as another tensor, | ||||||
|  |      use ``torch.*_like`` tensor creation ops | ||||||
|  |      (see :ref:`tensor-creation-ops`). | ||||||
|  |    - To create a tensor with similar type but different size as another tensor, | ||||||
|  |      use ``tensor.new_*`` creation ops. | ||||||
|  |  | ||||||
|  |    .. automethod:: new_tensor | ||||||
|  |    .. automethod:: new_full | ||||||
|  |    .. automethod:: new_empty | ||||||
|  |    .. automethod:: new_ones | ||||||
|  |    .. automethod:: new_zeros | ||||||
|  |  | ||||||
|    .. automethod:: abs |    .. automethod:: abs | ||||||
|    .. automethod:: abs_ |    .. automethod:: abs_ | ||||||
| @ -262,7 +296,6 @@ view of a storage and defines numeric operations on it. | |||||||
|    .. automethod:: neg |    .. automethod:: neg | ||||||
|    .. automethod:: neg_ |    .. automethod:: neg_ | ||||||
|    .. automethod:: nelement |    .. automethod:: nelement | ||||||
|    .. automethod:: new |  | ||||||
|    .. automethod:: nonzero |    .. automethod:: nonzero | ||||||
|    .. automethod:: norm |    .. automethod:: norm | ||||||
|    .. automethod:: normal_ |    .. automethod:: normal_ | ||||||
| @ -289,6 +322,7 @@ view of a storage and defines numeric operations on it. | |||||||
|    .. automethod:: renorm |    .. automethod:: renorm | ||||||
|    .. automethod:: renorm_ |    .. automethod:: renorm_ | ||||||
|    .. automethod:: repeat |    .. automethod:: repeat | ||||||
|  |    .. automethod:: requires_grad_ | ||||||
|    .. automethod:: reshape |    .. automethod:: reshape | ||||||
|    .. automethod:: resize_ |    .. automethod:: resize_ | ||||||
|    .. automethod:: resize_as_ |    .. automethod:: resize_as_ | ||||||
| @ -329,6 +363,7 @@ view of a storage and defines numeric operations on it. | |||||||
|    .. automethod:: symeig |    .. automethod:: symeig | ||||||
|    .. automethod:: t |    .. automethod:: t | ||||||
|    .. automethod:: t_ |    .. automethod:: t_ | ||||||
|  |    .. automethod:: to | ||||||
|    .. automethod:: take |    .. automethod:: take | ||||||
|    .. automethod:: tan |    .. automethod:: tan | ||||||
|    .. automethod:: tan_ |    .. automethod:: tan_ | ||||||
|  | |||||||
| @ -6,8 +6,9 @@ Tensors | |||||||
| ---------------------------------- | ---------------------------------- | ||||||
| .. autofunction:: is_tensor | .. autofunction:: is_tensor | ||||||
| .. autofunction:: is_storage | .. autofunction:: is_storage | ||||||
| .. autofunction:: set_default_tensor_type |  | ||||||
| .. autofunction:: set_default_dtype | .. autofunction:: set_default_dtype | ||||||
|  | .. autofunction:: get_default_dtype | ||||||
|  | .. autofunction:: set_default_tensor_type | ||||||
| .. autofunction:: numel | .. autofunction:: numel | ||||||
| .. autofunction:: set_printoptions | .. autofunction:: set_printoptions | ||||||
| .. autofunction:: set_flush_denormal | .. autofunction:: set_flush_denormal | ||||||
| @ -27,6 +28,9 @@ Creation Ops | |||||||
|     :func:`torch.randint` |     :func:`torch.randint` | ||||||
|     :func:`torch.randint_like` |     :func:`torch.randint_like` | ||||||
|     :func:`torch.randperm` |     :func:`torch.randperm` | ||||||
|  |     You may also use :func:`torch.empty` with the :ref:`inplace-random-sampling` | ||||||
|  |     methods to create :class:`torch.Tensor` s with values sampled from a broader | ||||||
|  |     range of distributions. | ||||||
|  |  | ||||||
| .. autofunction:: tensor | .. autofunction:: tensor | ||||||
| .. autofunction:: from_numpy | .. autofunction:: from_numpy | ||||||
| @ -83,6 +87,8 @@ Random sampling | |||||||
| .. autofunction:: randn_like | .. autofunction:: randn_like | ||||||
| .. autofunction:: randperm | .. autofunction:: randperm | ||||||
|  |  | ||||||
|  | .. _inplace-random-sampling: | ||||||
|  |  | ||||||
| In-place random sampling | In-place random sampling | ||||||
| ~~~~~~~~~~~~~~~~~~~~~~~~ | ~~~~~~~~~~~~~~~~~~~~~~~~ | ||||||
|  |  | ||||||
| @ -109,6 +115,37 @@ Parallelism | |||||||
| .. autofunction:: get_num_threads | .. autofunction:: get_num_threads | ||||||
| .. autofunction:: set_num_threads | .. autofunction:: set_num_threads | ||||||
|  |  | ||||||
|  | Locally disabling gradient computation | ||||||
|  | -------------------------------------- | ||||||
|  | The context managers :func:`torch.no_grad`, :func:`torch.enable_grad`, and | ||||||
|  | :func:`torch.set_grad_enabled` are helpful for locally disabling and enabling | ||||||
|  | gradient computation. See :ref:`locally-disable-grad` for more details on | ||||||
|  | their usage. | ||||||
|  |  | ||||||
|  | Examples:: | ||||||
|  |  | ||||||
|  |   >>> x = torch.zeros(1, requires_grad=True) | ||||||
|  |   >>> with torch.no_grad(): | ||||||
|  |   ...     y = x * 2 | ||||||
|  |   >>> y.requires_grad | ||||||
|  |   False | ||||||
|  |  | ||||||
|  |   >>> is_train = False | ||||||
|  |   >>> with torch.set_grad_enabled(is_train): | ||||||
|  |   ...     y = x * 2 | ||||||
|  |   >>> y.requires_grad | ||||||
|  |   False | ||||||
|  |  | ||||||
|  |   >>> torch.set_grad_enabled(True)  # this can also be used as a function | ||||||
|  |   >>> y = x * 2 | ||||||
|  |   >>> y.requires_grad | ||||||
|  |   True | ||||||
|  |  | ||||||
|  |   >>> torch.set_grad_enabled(False) | ||||||
|  |   >>> y = x * 2 | ||||||
|  |   >>> y.requires_grad | ||||||
|  |   False | ||||||
|  |  | ||||||
|  |  | ||||||
| Math operations | Math operations | ||||||
| ---------------------------------- | ---------------------------------- | ||||||
|  | |||||||
							
								
								
									
										19
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										19
									
								
								setup.py
									
									
									
									
									
								
							| @ -43,10 +43,6 @@ | |||||||
| #   WITH_GLOO_IBVERBS | #   WITH_GLOO_IBVERBS | ||||||
| #     toggle features related to distributed support | #     toggle features related to distributed support | ||||||
| # | # | ||||||
| #   PYTORCH_BINARY_BUILD |  | ||||||
| #     toggle static linking against libstdc++, used when we're building |  | ||||||
| #     binaries for distribution |  | ||||||
| # |  | ||||||
| #   PYTORCH_BUILD_VERSION | #   PYTORCH_BUILD_VERSION | ||||||
| #   PYTORCH_BUILD_NUMBER | #   PYTORCH_BUILD_NUMBER | ||||||
| #     specify the version of PyTorch, rather than the hard-coded version | #     specify the version of PyTorch, rather than the hard-coded version | ||||||
| @ -780,19 +776,6 @@ if DEBUG: | |||||||
|         extra_compile_args += ['-O0', '-g'] |         extra_compile_args += ['-O0', '-g'] | ||||||
|         extra_link_args += ['-O0', '-g'] |         extra_link_args += ['-O0', '-g'] | ||||||
|  |  | ||||||
| if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux': |  | ||||||
|     print('PYTORCH_BINARY_BUILD found. Static linking libstdc++ on Linux') |  | ||||||
|     # get path of libstdc++ and link manually. |  | ||||||
|     # for reasons unknown, -static-libstdc++ doesn't fully link some symbols |  | ||||||
|     CXXNAME = os.getenv('CXX', 'g++') |  | ||||||
|     STDCPP_LIB = subprocess.check_output([CXXNAME, '-print-file-name=libstdc++.a']) |  | ||||||
|     STDCPP_LIB = STDCPP_LIB[:-1] |  | ||||||
|     if type(STDCPP_LIB) != str:  # python 3 |  | ||||||
|         STDCPP_LIB = STDCPP_LIB.decode(sys.stdout.encoding) |  | ||||||
|     main_link_args += [STDCPP_LIB] |  | ||||||
|     version_script = os.path.abspath("tools/pytorch.version") |  | ||||||
|     extra_link_args += ['-Wl,--version-script=' + version_script] |  | ||||||
|  |  | ||||||
|  |  | ||||||
| def make_relative_rpath(path): | def make_relative_rpath(path): | ||||||
|     if IS_DARWIN: |     if IS_DARWIN: | ||||||
| @ -807,7 +790,7 @@ def make_relative_rpath(path): | |||||||
| ################################################################################ | ################################################################################ | ||||||
|  |  | ||||||
| extensions = [] | extensions = [] | ||||||
| packages = find_packages(exclude=('tools', 'tools.*', 'caffe2', 'caffe')) | packages = find_packages(exclude=('tools', 'tools.*', 'caffe2', 'caffe2.*', 'caffe', 'caffe.*')) | ||||||
| C = Extension("torch._C", | C = Extension("torch._C", | ||||||
|               libraries=main_libraries, |               libraries=main_libraries, | ||||||
|               sources=main_sources, |               sources=main_sources, | ||||||
|  | |||||||
| @ -8,7 +8,7 @@ import warnings | |||||||
| from copy import deepcopy | from copy import deepcopy | ||||||
| from collections import OrderedDict | from collections import OrderedDict | ||||||
| from itertools import product | from itertools import product | ||||||
| from operator import mul | from operator import mul, itemgetter | ||||||
| from functools import reduce, wraps | from functools import reduce, wraps | ||||||
| from torch.autograd.gradcheck import gradgradcheck, gradcheck | from torch.autograd.gradcheck import gradgradcheck, gradcheck | ||||||
| from torch.autograd.function import once_differentiable | from torch.autograd.function import once_differentiable | ||||||
| @ -1289,6 +1289,12 @@ class TestAutograd(TestCase): | |||||||
|         Identity.apply(v).backward() |         Identity.apply(v).backward() | ||||||
|         self.assertEqual(device[0], 1) |         self.assertEqual(device[0], 1) | ||||||
|  |  | ||||||
|  |     @unittest.skipIf(torch.cuda.device_count() < 2, "no multi-GPU") | ||||||
|  |     def test_inputbuffer_add_multigpu(self): | ||||||
|  |         input = torch.randn(1).cuda(0).requires_grad_() | ||||||
|  |         output = input.cuda(1) + input.cuda(1) | ||||||
|  |         output.backward() | ||||||
|  |  | ||||||
|     def test_detach(self): |     def test_detach(self): | ||||||
|         x = torch.randn(10, 10, requires_grad=True) |         x = torch.randn(10, 10, requires_grad=True) | ||||||
|         y = x + 2 |         y = x + 2 | ||||||
| @ -2267,9 +2273,9 @@ S = 5 | |||||||
| #   method name, | #   method name, | ||||||
| #   input size/constructing fn, | #   input size/constructing fn, | ||||||
| #   args (tuple represents shape of a tensor arg), | #   args (tuple represents shape of a tensor arg), | ||||||
| #   test variant name (will be used at test name suffix),  // optional | #   test variant name (will be used at test name suffix),    // optional | ||||||
| #   indices for possible dim arg,                          // optional | #   indices for possible dim arg,                            // optional | ||||||
| #   output indices that should be gradcheck'ed,            // optional | #   fn mapping output to part that should be gradcheck'ed,   // optional | ||||||
| # ) | # ) | ||||||
| method_tests = [ | method_tests = [ | ||||||
|     ('add', (S, S, S), ((S, S, S),)), |     ('add', (S, S, S), ((S, S, S),)), | ||||||
| @ -2700,18 +2706,31 @@ method_tests = [ | |||||||
|      'symmetric_pd', NO_ARGS, [skipIfNoLapack]), |      'symmetric_pd', NO_ARGS, [skipIfNoLapack]), | ||||||
|     ('logdet', lambda: make_nonzero_det(random_fullrank_matrix_distinct_singular_value(S), 1, 0), NO_ARGS, |     ('logdet', lambda: make_nonzero_det(random_fullrank_matrix_distinct_singular_value(S), 1, 0), NO_ARGS, | ||||||
|      'distinct_singular_values', NO_ARGS, [skipIfNoLapack]), |      'distinct_singular_values', NO_ARGS, [skipIfNoLapack]), | ||||||
|     ('slogdet', lambda: make_nonzero_det(torch.randn(1, 1), 1), NO_ARGS, '1x1_pos_det', NO_ARGS, [skipIfNoLapack], [1]), |     ('slogdet', lambda: make_nonzero_det(torch.randn(1, 1), 1), NO_ARGS, | ||||||
|  |      '1x1_pos_det', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|     ('slogdet', lambda: make_nonzero_det(torch.randn(1, 1), -1), NO_ARGS, |     ('slogdet', lambda: make_nonzero_det(torch.randn(1, 1), -1), NO_ARGS, | ||||||
|      '1x1_neg_det', NO_ARGS, [skipIfNoLapack], [1]), |      '1x1_neg_det', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|     ('slogdet', lambda: make_nonzero_det(torch.randn(S, S), 1), NO_ARGS, 'pos_det', NO_ARGS, [skipIfNoLapack], [1]), |     ('slogdet', lambda: make_nonzero_det(torch.randn(S, S), 1), NO_ARGS, | ||||||
|     ('slogdet', lambda: make_nonzero_det(torch.randn(S, S), -1), NO_ARGS, 'neg_det', NO_ARGS, [skipIfNoLapack], [1]), |      'pos_det', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|  |     ('slogdet', lambda: make_nonzero_det(torch.randn(S, S), -1), NO_ARGS, | ||||||
|  |      'neg_det', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|     ('slogdet', lambda: make_nonzero_det(random_symmetric_matrix(S)), NO_ARGS, |     ('slogdet', lambda: make_nonzero_det(random_symmetric_matrix(S)), NO_ARGS, | ||||||
|      'symmetric', NO_ARGS, [skipIfNoLapack], [1]), |      'symmetric', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|     ('slogdet', lambda: random_symmetric_pd_matrix(S), NO_ARGS, 'symmetric_pd', NO_ARGS, [skipIfNoLapack], [1]), |     ('slogdet', lambda: random_symmetric_pd_matrix(S), NO_ARGS, | ||||||
|  |      'symmetric_pd', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|     ('slogdet', lambda: random_fullrank_matrix_distinct_singular_value(S), NO_ARGS, |     ('slogdet', lambda: random_fullrank_matrix_distinct_singular_value(S), NO_ARGS, | ||||||
|      'distinct_singular_values', NO_ARGS, [skipIfNoLapack], [1]), |      'distinct_singular_values', NO_ARGS, [skipIfNoLapack], itemgetter(1)), | ||||||
|     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(S), NO_ARGS, '', NO_ARGS, [skipIfNoLapack]), |     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(S), NO_ARGS, '', NO_ARGS, [skipIfNoLapack]), | ||||||
|     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(M), NO_ARGS, 'large', NO_ARGS, [skipIfNoLapack]), |     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(S)[:(S - 2)], NO_ARGS, | ||||||
|  |      'wide', NO_ARGS, [skipIfNoLapack]), | ||||||
|  |     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(S)[:, :(S - 2)], NO_ARGS, | ||||||
|  |      'tall', NO_ARGS, [skipIfNoLapack]), | ||||||
|  |     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(S)[:(S - 2)], (False,), | ||||||
|  |      'wide_all', NO_ARGS, [skipIfNoLapack], lambda usv: (usv[0], usv[1], usv[2][:, :(S - 2)])), | ||||||
|  |     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(S)[:, :(S - 2)], (False,), | ||||||
|  |      'tall_all', NO_ARGS, [skipIfNoLapack], lambda usv: (usv[0][:, :(S - 2)], usv[1], usv[2])), | ||||||
|  |     ('svd', lambda: random_fullrank_matrix_distinct_singular_value(M), NO_ARGS, | ||||||
|  |      'large', NO_ARGS, [skipIfNoLapack]), | ||||||
|     ('gesv', (S, S), ((S, S),), '', NO_ARGS, [skipIfNoLapack]), |     ('gesv', (S, S), ((S, S),), '', NO_ARGS, [skipIfNoLapack]), | ||||||
|     ('fill_', (S, S, S), (1,), 'number'), |     ('fill_', (S, S, S), (1,), 'number'), | ||||||
|     ('fill_', (), (1,), 'number_scalar'), |     ('fill_', (), (1,), 'number_scalar'), | ||||||
| @ -3028,7 +3047,7 @@ for test in method_tests: | |||||||
|  |  | ||||||
|     skipTestIf = test[5] if len(test) >= 6 else [] |     skipTestIf = test[5] if len(test) >= 6 else [] | ||||||
|  |  | ||||||
|     test_output_indices = test[6] if len(test) >= 7 else None |     output_process_fn = test[6] if len(test) >= 7 else lambda x: x | ||||||
|  |  | ||||||
|     for dim_perm in product([-1, 1], repeat=len(dim_args_idx)): |     for dim_perm in product([-1, 1], repeat=len(dim_args_idx)): | ||||||
|         test_name = basic_test_name |         test_name = basic_test_name | ||||||
| @ -3039,7 +3058,7 @@ for test in method_tests: | |||||||
|         # for-loop bodies don't define scopes, so we have to save the variables |         # for-loop bodies don't define scopes, so we have to save the variables | ||||||
|         # we want to close over in some way |         # we want to close over in some way | ||||||
|         def do_test(self, name=name, self_size=self_size, args=new_args, test_name=test_name, |         def do_test(self, name=name, self_size=self_size, args=new_args, test_name=test_name, | ||||||
|                     test_output_indices=test_output_indices): |                     output_process_fn=output_process_fn): | ||||||
|             def check(name): |             def check(name): | ||||||
|                 is_magic_method = name[:2] == '__' and name[-2:] == '__' |                 is_magic_method = name[:2] == '__' and name[-2:] == '__' | ||||||
|                 is_inplace = name[-1] == "_" and not is_magic_method |                 is_inplace = name[-1] == "_" and not is_magic_method | ||||||
| @ -3061,10 +3080,7 @@ for test in method_tests: | |||||||
|  |  | ||||||
|                 def fn(*inputs): |                 def fn(*inputs): | ||||||
|                     output = getattr(inputs[0], name)(*inputs[1:]) |                     output = getattr(inputs[0], name)(*inputs[1:]) | ||||||
|                     if test_output_indices is None: |                     return output_process_fn(output) | ||||||
|                         return output |  | ||||||
|                     else: |  | ||||||
|                         return tuple(output[i] for i in test_output_indices) |  | ||||||
|  |  | ||||||
|                 if not is_inplace and name not in EXCLUDE_GRADCHECK: |                 if not is_inplace and name not in EXCLUDE_GRADCHECK: | ||||||
|                     run_grad_and_gradgrad_checks(self, name, test_name, fn, |                     run_grad_and_gradgrad_checks(self, name, test_name, fn, | ||||||
| @ -3074,10 +3090,7 @@ for test in method_tests: | |||||||
|                 if hasattr(torch, name) and name not in EXCLUDE_FUNCTIONAL: |                 if hasattr(torch, name) and name not in EXCLUDE_FUNCTIONAL: | ||||||
|                     def fn(*inputs): |                     def fn(*inputs): | ||||||
|                         output = getattr(torch, name)(*inputs) |                         output = getattr(torch, name)(*inputs) | ||||||
|                         if test_output_indices is None: |                         return output_process_fn(output) | ||||||
|                             return output |  | ||||||
|                         else: |  | ||||||
|                             return tuple(output[i] for i in test_output_indices) |  | ||||||
|  |  | ||||||
|                     f_args_variable = (self_variable,) + args_variable |                     f_args_variable = (self_variable,) + args_variable | ||||||
|                     f_args_tensor = (self_tensor,) + args_tensor |                     f_args_tensor = (self_tensor,) + args_tensor | ||||||
|  | |||||||
| @ -1370,22 +1370,11 @@ class TestCuda(TestCase): | |||||||
|             # test setitem |             # test setitem | ||||||
|             x_clone1 = x.clone() |             x_clone1 = x.clone() | ||||||
|             x_clone2 = x.clone() |             x_clone2 = x.clone() | ||||||
|             x_clone3 = x.clone() |  | ||||||
|             first_shape = x[:, ia, None, ib, 0].shape |             first_shape = x[:, ia, None, ib, 0].shape | ||||||
|             second_shape = x[ia].shape |             second_shape = x[ia].shape | ||||||
|             x_clone1[:, ia, None, ib, 0] = torch.randn(first_shape).to(x_clone1) |             x_clone1[:, ia, None, ib, 0] = torch.randn(first_shape).to(x_clone1) | ||||||
|             x_clone2[ia] = torch.randn(second_shape).to(x_clone2) |             x_clone2[ia] = torch.randn(second_shape).to(x_clone2) | ||||||
|  |  | ||||||
|             # fill equivalents |  | ||||||
|             x_clone1[:, ia, None, ib, 0] = 5 |  | ||||||
|             x_clone2[ia] = 7 |  | ||||||
|  |  | ||||||
|             # mask equivalents |  | ||||||
|             mask = (torch.randn(x_clone3.size()) < 0).to(ia.device) |  | ||||||
|             x_clone3[mask] |  | ||||||
|             self.assertEqual(x_clone3[mask].cpu(), x_clone3.cpu()[mask.cpu()]) |  | ||||||
|             x_clone3[mask] = 6 |  | ||||||
|  |  | ||||||
|         cpu = torch.device('cpu') |         cpu = torch.device('cpu') | ||||||
|         for device in ['cuda:0', 'cuda:1'] if torch.cuda.device_count() > 1 else ['cuda']: |         for device in ['cuda:0', 'cuda:1'] if torch.cuda.device_count() > 1 else ['cuda']: | ||||||
|             # Index cpu tensor with cuda tensor |             # Index cpu tensor with cuda tensor | ||||||
|  | |||||||
| @ -1746,6 +1746,35 @@ class TestDistributions(TestCase): | |||||||
|             x = Beta(Tensor([1e-6]), Tensor([1e-6])).sample()[0] |             x = Beta(Tensor([1e-6]), Tensor([1e-6])).sample()[0] | ||||||
|             self.assertTrue(np.isfinite(x) and x > 0, 'Invalid Beta.sample(): {}'.format(x)) |             self.assertTrue(np.isfinite(x) and x > 0, 'Invalid Beta.sample(): {}'.format(x)) | ||||||
|  |  | ||||||
|  |     def test_independent_shape(self): | ||||||
|  |         for Dist, params in EXAMPLES: | ||||||
|  |             for i, param in enumerate(params): | ||||||
|  |                 base_dist = Dist(**param) | ||||||
|  |                 x = base_dist.sample() | ||||||
|  |                 base_log_prob_shape = base_dist.log_prob(x).shape | ||||||
|  |                 for reinterpreted_batch_ndims in range(len(base_dist.batch_shape) + 1): | ||||||
|  |                     indep_dist = Independent(base_dist, reinterpreted_batch_ndims) | ||||||
|  |                     indep_log_prob_shape = base_log_prob_shape[:len(base_log_prob_shape) - reinterpreted_batch_ndims] | ||||||
|  |                     self.assertEqual(indep_dist.log_prob(x).shape, indep_log_prob_shape) | ||||||
|  |                     self.assertEqual(indep_dist.sample().shape, base_dist.sample().shape) | ||||||
|  |                     self.assertEqual(indep_dist.has_rsample, base_dist.has_rsample) | ||||||
|  |                     if indep_dist.has_rsample: | ||||||
|  |                         self.assertEqual(indep_dist.sample().shape, base_dist.sample().shape) | ||||||
|  |                     if indep_dist.has_enumerate_support: | ||||||
|  |                         self.assertEqual(indep_dist.enumerate_support().shape, base_dist.enumerate_support().shape) | ||||||
|  |                     try: | ||||||
|  |                         self.assertEqual(indep_dist.mean.shape, base_dist.mean.shape) | ||||||
|  |                     except NotImplementedError: | ||||||
|  |                         pass | ||||||
|  |                     try: | ||||||
|  |                         self.assertEqual(indep_dist.variance.shape, base_dist.variance.shape) | ||||||
|  |                     except NotImplementedError: | ||||||
|  |                         pass | ||||||
|  |                     try: | ||||||
|  |                         self.assertEqual(indep_dist.entropy().shape, indep_log_prob_shape) | ||||||
|  |                     except NotImplementedError: | ||||||
|  |                         pass | ||||||
|  |  | ||||||
|     def test_cdf_icdf_inverse(self): |     def test_cdf_icdf_inverse(self): | ||||||
|         # Tests the invertibility property on the distributions |         # Tests the invertibility property on the distributions | ||||||
|         for Dist, params in EXAMPLES: |         for Dist, params in EXAMPLES: | ||||||
|  | |||||||
| @ -254,32 +254,6 @@ class TestIndexing(TestCase): | |||||||
|             self.assertEqual(x, x[0]) |             self.assertEqual(x, x[0]) | ||||||
|             self.assertEqual(len(w), 1) |             self.assertEqual(len(w), 1) | ||||||
|  |  | ||||||
|     def test_legacy_dispatch(self): |  | ||||||
|         # compare with indexing using index_select / index_fill etc |  | ||||||
|         x = torch.arange(0, 9).view(3, 3) |  | ||||||
|         idx = torch.tensor([0, 2]) |  | ||||||
|         self.assertEqual(x[idx], x.index_select(0, idx)) |  | ||||||
|         self.assertEqual(x[:, idx], x.index_select(1, idx)) |  | ||||||
|  |  | ||||||
|         mask = x > 4 |  | ||||||
|         self.assertEqual(x[mask], x.masked_select(mask)) |  | ||||||
|  |  | ||||||
|         y = x.clone() |  | ||||||
|         yr = x.clone() |  | ||||||
|         y[idx] = 0 |  | ||||||
|         yr.index_fill_(0, idx, 0) |  | ||||||
|         self.assertEqual(y, yr) |  | ||||||
|         y[:, idx] = 2 |  | ||||||
|         yr.index_fill_(1, idx, 2) |  | ||||||
|         self.assertEqual(y, yr) |  | ||||||
|  |  | ||||||
|         mask = x > 4 |  | ||||||
|         y = x.clone() |  | ||||||
|         yr = x.clone() |  | ||||||
|         y[mask] = 10 |  | ||||||
|         yr.masked_fill_(mask, 10) |  | ||||||
|         self.assertEqual(y, yr) |  | ||||||
|  |  | ||||||
|  |  | ||||||
| # The tests below are from NumPy test_indexing.py with some modifications to | # The tests below are from NumPy test_indexing.py with some modifications to | ||||||
| # make them compatible with PyTorch. It's licensed under the BDS license below: | # make them compatible with PyTorch. It's licensed under the BDS license below: | ||||||
|  | |||||||
| @ -121,17 +121,16 @@ class TestJit(TestCase): | |||||||
|     # index-2 is not implemented in interpreter |     # index-2 is not implemented in interpreter | ||||||
|     @unittest.expectedFailure |     @unittest.expectedFailure | ||||||
|     def test_index(self): |     def test_index(self): | ||||||
|         x = Variable(torch.rand(2, 2, 2), requires_grad=True) |         x = Variable(torch.Tensor([0.4]), requires_grad=True) | ||||||
|         y = Variable(torch.LongTensor([0]), requires_grad=True) |         y = Variable(torch.LongTensor([0]), requires_grad=True) | ||||||
|         y2 = Variable(torch.LongTensor([1]), requires_grad=True) |  | ||||||
|  |  | ||||||
|         @torch.jit.compile(nderivs=0) |         @torch.jit.compile(nderivs=0) | ||||||
|         def fn(x, y, y2): |         def fn(x, y): | ||||||
|             return x[y, y2] |             return x[y] | ||||||
|  |  | ||||||
|         z = fn(x, y, y2) |         z = fn(x, y) | ||||||
|         with self.assertCompiled(fn): |         with self.assertCompiled(fn): | ||||||
|             z2 = fn(x, y, y2) |             z2 = fn(x, y) | ||||||
|         self.assertEqual(z, z2) |         self.assertEqual(z, z2) | ||||||
|  |  | ||||||
|     # Backwards tracing was broken for indexing by a constant, |     # Backwards tracing was broken for indexing by a constant, | ||||||
|  | |||||||
| @ -859,20 +859,26 @@ Tensor svd_backward(const std::vector<torch::autograd::Variable> &grads, const T | |||||||
|   auto m = self.size(0); |   auto m = self.size(0); | ||||||
|   auto n = self.size(1); |   auto n = self.size(1); | ||||||
|   auto k = sigma.size(0); |   auto k = sigma.size(0); | ||||||
|  |   auto gsigma = grads[1]; | ||||||
|  |  | ||||||
|  |   auto u = raw_u; | ||||||
|  |   auto v = raw_v; | ||||||
|  |   auto gu = grads[0]; | ||||||
|  |   auto gv = grads[2]; | ||||||
|  |  | ||||||
|   Tensor u, v; |  | ||||||
|   if (!some) { |   if (!some) { | ||||||
|     // ignore the free subspace |     // We ignore the free subspace here because possible base vectors cancel | ||||||
|  |     // each other, e.g., both -v and +v are valid base for a dimension. | ||||||
|  |     // Don't assume behavior of any particular implementation of svd. | ||||||
|     u = raw_u.narrow(1, 0, k); |     u = raw_u.narrow(1, 0, k); | ||||||
|     v = raw_v.narrow(1, 0, k); |     v = raw_v.narrow(1, 0, k); | ||||||
|   } else { |     if (gu.defined()) { | ||||||
|     u = raw_u; |       gu = gu.narrow(1, 0, k); | ||||||
|     v = raw_v; |     } | ||||||
|  |     if (gv.defined()) { | ||||||
|  |       gv = gv.narrow(1, 0, k); | ||||||
|  |     } | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   auto gu = grads[0]; |  | ||||||
|   auto gsigma = grads[1]; |  | ||||||
|   auto gv = grads[2]; |  | ||||||
|   auto vt = v.t(); |   auto vt = v.t(); | ||||||
|  |  | ||||||
|   Tensor sigma_term; |   Tensor sigma_term; | ||||||
|  | |||||||
| @ -284,12 +284,4 @@ if [ -d "$INSTALL_DIR/bin/" ]; then | |||||||
|     cp "$INSTALL_DIR/bin/"/* . |     cp "$INSTALL_DIR/bin/"/* . | ||||||
| fi | fi | ||||||
|  |  | ||||||
| # this is for binary builds |  | ||||||
| if [[ $PYTORCH_BINARY_BUILD && $PYTORCH_SO_DEPS ]] |  | ||||||
| then |  | ||||||
|     echo "Copying over dependency libraries $PYTORCH_SO_DEPS" |  | ||||||
|     # copy over dependency libraries into the current dir |  | ||||||
|     cp "$PYTORCH_SO_DEPS" . |  | ||||||
| fi |  | ||||||
|  |  | ||||||
| popd | popd | ||||||
|  | |||||||
| @ -129,21 +129,22 @@ def is_storage(obj): | |||||||
|  |  | ||||||
|  |  | ||||||
| def set_default_tensor_type(t): | def set_default_tensor_type(t): | ||||||
|     r"""Sets the default ``torch.Tensor`` type to type :attr:`t`. |     r"""Sets the default ``torch.Tensor`` type to floating point tensor type | ||||||
|  |     :attr:`t`. This type will also be used as default floating point type for | ||||||
|  |     type inference in :func:`torch.tensor`. | ||||||
|  |  | ||||||
|     The default tensor type is initially ``"torch.FloatTensor"``. |     The default floating point tensor type is initially ``torch.FloatTensor``. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         t (type or string): the tensor type or its name |         t (type or string): the floating point tensor type or its name | ||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> torch.set_default_tensor_type("torch.FloatTensor") |         >>> torch.tensor([1.2, 3]).dtype    # initial default for floating point is torch.float32 | ||||||
|         >>> torch.Tensor([1.2, 3]) |         torch.float32 | ||||||
|  |         >>> torch.set_default_tensor_type(torch.DoubleTensor) | ||||||
|          1.2000 |         >>> torch.tensor([1.2, 3]).dtype    # a new floating point tensor | ||||||
|          3.0000 |         torch.float64 | ||||||
|         [torch.FloatTensor of size (2,)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     if isinstance(t, _string_classes): |     if isinstance(t, _string_classes): | ||||||
| @ -152,19 +153,22 @@ def set_default_tensor_type(t): | |||||||
|  |  | ||||||
|  |  | ||||||
| def set_default_dtype(d): | def set_default_dtype(d): | ||||||
|     r"""Sets the default ``torch.dtype`` type to type :attr:`d`. |     r"""Sets the default floating point dtype to :attr:`d`. This type will be | ||||||
|  |     used as default floating point type for type inference in | ||||||
|  |     :func:`torch.tensor`. | ||||||
|  |  | ||||||
|  |     The default floating point dtype is initially ``torch.float32``. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         d (dtype): the dtype to make the default |         d (:class:`torch.dtype`): the floating point dtype to make the default | ||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> torch.set_default_tensor_type(torch.double) |         >>> torch.tensor([1.2, 3]).dtype           # initial default for floating point is torch.float32 | ||||||
|         >>> torch.tensor([1.2, 3], device='cpu') |         torch.float32 | ||||||
|  |         >>> torch.set_default_dtype(torch.float64) | ||||||
|          1.2000 |         >>> torch.tensor([1.2, 3]).dtype           # a new floating point tensor | ||||||
|          3.0000 |         torch.float64 | ||||||
|         [torch.DoubleTensor of size (2,)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     _C._set_default_dtype(d) |     _C._set_default_dtype(d) | ||||||
|  | |||||||
| @ -2,11 +2,148 @@ | |||||||
|  |  | ||||||
| import torch._C | import torch._C | ||||||
| from torch._C import _add_docstr as add_docstr | from torch._C import _add_docstr as add_docstr | ||||||
|  | from ._torch_docs import parse_kwargs | ||||||
|  |  | ||||||
|  |  | ||||||
| def add_docstr_all(method, docstr): | def add_docstr_all(method, docstr): | ||||||
|     add_docstr(getattr(torch._C._TensorBase, method), docstr) |     add_docstr(getattr(torch._C._TensorBase, method), docstr) | ||||||
|  |  | ||||||
|  | new_common_args = parse_kwargs(""" | ||||||
|  |     size (int...): a list, tuple, or :class:`torch.Size` of integers defining the | ||||||
|  |         shape of the output tensor. | ||||||
|  |     dtype (:class:`torch.dtype`, optional): the desired type of returned tensor. | ||||||
|  |         Default: if None, same :class:`torch.dtype` as this tensor. | ||||||
|  |     device (:class:`torch.device`, optional): the desired device of returned tensor. | ||||||
|  |         Default: if None, same :class:`torch.device` as this tensor. | ||||||
|  |     requires_grad (bool, optional): If autograd should record operations on the | ||||||
|  |         returned tensor. Default: ``False``. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('new_tensor', | ||||||
|  |                r""" | ||||||
|  | new_tensor(data, dtype=None, device=None, requires_grad=False) -> Tensor | ||||||
|  |  | ||||||
|  | Returns a new Tensor with :attr:`data` as the tensor data. | ||||||
|  | By default, the returned Tensor has the same :class:`torch.dtype` and | ||||||
|  | :class:`torch.device` as this tensor. | ||||||
|  |  | ||||||
|  | .. warning:: | ||||||
|  |  | ||||||
|  |     :func:`new_tensor` always copies :attr:`data`. If you have a Tensor | ||||||
|  |     ``data`` and want to avoid a copy, use :func:`torch.Tensor.requires_grad_` | ||||||
|  |     or :func:`torch.Tensor.detach`. | ||||||
|  |     If you have a numpy array and want to avoid a copy, use | ||||||
|  |     :func:`torch.from_numpy`. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     data (array_like): The returned Tensor copies :attr:`data`. | ||||||
|  |     {dtype} | ||||||
|  |     {device} | ||||||
|  |     {requires_grad} | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> tensor = torch.ones((2,), dtype=torch.int8) | ||||||
|  |     >>> data = [[0, 1], [2, 3]] | ||||||
|  |     >>> tensor.new_tensor(data) | ||||||
|  |     tensor([[ 0,  1], | ||||||
|  |             [ 2,  3]], dtype=torch.int8) | ||||||
|  |  | ||||||
|  | """.format(**new_common_args)) | ||||||
|  |  | ||||||
|  | add_docstr_all('new_full', | ||||||
|  |                r""" | ||||||
|  | new_full(size, fill_value, dtype=None, device=None, requires_grad=False) -> Tensor | ||||||
|  |  | ||||||
|  | Returns a Tensor of size :attr:`size` filled with :attr:`fill_value`. | ||||||
|  | By default, the returned Tensor has the same :class:`torch.dtype` and | ||||||
|  | :class:`torch.device` as this tensor. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     fill_value (scalar): the number to fill the output tensor with. | ||||||
|  |     {dtype} | ||||||
|  |     {device} | ||||||
|  |     {requires_grad} | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> tensor = torch.ones((2,), dtype=torch.float64) | ||||||
|  |     >>> tensor.new_full((3, 4), 3.141592) | ||||||
|  |     tensor([[ 3.1416,  3.1416,  3.1416,  3.1416], | ||||||
|  |             [ 3.1416,  3.1416,  3.1416,  3.1416], | ||||||
|  |             [ 3.1416,  3.1416,  3.1416,  3.1416]], dtype=torch.float64) | ||||||
|  |  | ||||||
|  | """.format(**new_common_args)) | ||||||
|  |  | ||||||
|  | add_docstr_all('new_empty', | ||||||
|  |                r""" | ||||||
|  | new_empty(size, dtype=None, device=None, requires_grad=False) -> Tensor | ||||||
|  |  | ||||||
|  | Returns a Tensor of size :attr:`size` filled with uninitialized data. | ||||||
|  | By default, the returned Tensor has the same :class:`torch.dtype` and | ||||||
|  | :class:`torch.device` as this tensor. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     {dtype} | ||||||
|  |     {device} | ||||||
|  |     {requires_grad} | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> tensor = torch.ones(()) | ||||||
|  |     >>> tensor.new_empty((2, 3)) | ||||||
|  |     tensor([[ 5.8182e-18,  4.5765e-41, -1.0545e+30], | ||||||
|  |             [ 3.0949e-41,  4.4842e-44,  0.0000e+00]]) | ||||||
|  |  | ||||||
|  | """.format(**new_common_args)) | ||||||
|  |  | ||||||
|  | add_docstr_all('new_ones', | ||||||
|  |                r""" | ||||||
|  | new_ones(size, dtype=None, device=None, requires_grad=False) -> Tensor | ||||||
|  |  | ||||||
|  | Returns a Tensor of size :attr:`size` filled with ``1``. | ||||||
|  | By default, the returned Tensor has the same :class:`torch.dtype` and | ||||||
|  | :class:`torch.device` as this tensor. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     size (int...): a list, tuple, or :class:`torch.Size` of integers defining the | ||||||
|  |         shape of the output tensor. | ||||||
|  |     {dtype} | ||||||
|  |     {device} | ||||||
|  |     {requires_grad} | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> tensor = torch.tensor((), dtype=torch.int32) | ||||||
|  |     >>> tensor.new_ones((2, 3)) | ||||||
|  |     tensor([[ 1,  1,  1], | ||||||
|  |             [ 1,  1,  1]], dtype=torch.int32) | ||||||
|  |  | ||||||
|  | """.format(**new_common_args)) | ||||||
|  |  | ||||||
|  | add_docstr_all('new_zeros', | ||||||
|  |                r""" | ||||||
|  | new_zeros(size, dtype=None, device=None, requires_grad=False) -> Tensor | ||||||
|  |  | ||||||
|  | Returns a Tensor of size :attr:`size` filled with ``0``. | ||||||
|  | By default, the returned Tensor has the same :class:`torch.dtype` and | ||||||
|  | :class:`torch.device` as this tensor. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     size (int...): a list, tuple, or :class:`torch.Size` of integers defining the | ||||||
|  |         shape of the output tensor. | ||||||
|  |     {dtype} | ||||||
|  |     {device} | ||||||
|  |     {requires_grad} | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> tensor = torch.tensor((), dtype=torch.float64) | ||||||
|  |     >>> tensor.new_ones((2, 3)) | ||||||
|  |     tensor([[ 1.,  1.,  1.], | ||||||
|  |             [ 1.,  1.,  1.]], dtype=torch.float64) | ||||||
|  |  | ||||||
|  | """.format(**new_common_args)) | ||||||
|  |  | ||||||
| add_docstr_all('abs', | add_docstr_all('abs', | ||||||
|                r""" |                r""" | ||||||
| @ -448,9 +585,9 @@ Returns the size in bytes of an individual element. | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> torch.FloatTensor().element_size() |     >>> torch.tensor([]).element_size() | ||||||
|     4 |     4 | ||||||
|     >>> torch.ByteTensor().element_size() |     >>> torch.tensor([], dtype=torch.uint8).element_size() | ||||||
|     1 |     1 | ||||||
|  |  | ||||||
| """) | """) | ||||||
| @ -691,19 +828,15 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor(5, 3).fill_(1) |     >>> x = torch.ones(5, 3) | ||||||
|     >>> t = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) |     >>> t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float) | ||||||
|     >>> index = torch.LongTensor([0, 4, 2]) |     >>> index = torch.tensor([0, 4, 2]) | ||||||
|     >>> x.index_add_(0, index, t) |     >>> x.index_add_(0, index, t) | ||||||
|     >>> x |     tensor([[  2.,   3.,   4.], | ||||||
|  |             [  1.,   1.,   1.], | ||||||
|       2   3   4 |             [  8.,   9.,  10.], | ||||||
|       1   1   1 |             [  1.,   1.,   1.], | ||||||
|       8   9  10 |             [  5.,   6.,   7.]]) | ||||||
|       1   1   1 |  | ||||||
|       5   6   7 |  | ||||||
|     [torch.FloatTensor of size (5,3)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('index_copy_', | add_docstr_all('index_copy_', | ||||||
| @ -727,18 +860,14 @@ Args: | |||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.zeros(5, 3) |     >>> x = torch.zeros(5, 3) | ||||||
|     >>> t = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) |     >>> t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float) | ||||||
|     >>> index = torch.LongTensor([0, 4, 2]) |     >>> index = torch.tensor([0, 4, 2]) | ||||||
|     >>> x.index_copy_(0, index, t) |     >>> x.index_copy_(0, index, t) | ||||||
|     >>> x |     tensor([[ 1.,  2.,  3.], | ||||||
|  |             [ 0.,  0.,  0.], | ||||||
|      1  2  3 |             [ 7.,  8.,  9.], | ||||||
|      0  0  0 |             [ 0.,  0.,  0.], | ||||||
|      7  8  9 |             [ 4.,  5.,  6.]]) | ||||||
|      0  0  0 |  | ||||||
|      4  5  6 |  | ||||||
|     [torch.FloatTensor of size (5,3)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('index_fill_', | add_docstr_all('index_fill_', | ||||||
| @ -754,16 +883,12 @@ Args: | |||||||
|     val (float): the value to fill with |     val (float): the value to fill with | ||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|     >>> x = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) |     >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.float) | ||||||
|     >>> index = torch.LongTensor([0, 2]) |     >>> index = torch.tensor([0, 2]) | ||||||
|     >>> x.index_fill_(1, index, -1) |     >>> x.index_fill_(1, index, -1) | ||||||
|     >>> x |     tensor([[-1.,  2., -1.], | ||||||
|  |             [-1.,  5., -1.], | ||||||
|     -1  2 -1 |             [-1.,  8., -1.]]) | ||||||
|     -1  5 -1 |  | ||||||
|     -1  8 -1 |  | ||||||
|     [torch.FloatTensor of size (3,3)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('index_put_', | add_docstr_all('index_put_', | ||||||
| @ -819,7 +944,7 @@ This operation is not differentiable. | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([1.0]) |     >>> x = torch.tensor([1.0]) | ||||||
|     >>> x.item() |     >>> x.item() | ||||||
|     1.0 |     1.0 | ||||||
|  |  | ||||||
| @ -1081,20 +1206,14 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) |     >>> x = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]]) | ||||||
|     >>> x.narrow(0, 0, 2) |     >>> x.narrow(0, 0, 2) | ||||||
|  |     tensor([[ 1,  2,  3], | ||||||
|      1  2  3 |             [ 4,  5,  6]]) | ||||||
|      4  5  6 |  | ||||||
|     [torch.FloatTensor of size (2,3)] |  | ||||||
|  |  | ||||||
|     >>> x.narrow(1, 1, 2) |     >>> x.narrow(1, 1, 2) | ||||||
|  |     tensor([[ 2,  3], | ||||||
|      2  3 |             [ 5,  6], | ||||||
|      5  6 |             [ 8,  9]]) | ||||||
|      8  9 |  | ||||||
|     [torch.FloatTensor of size (3,2)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('ndimension', | add_docstr_all('ndimension', | ||||||
| @ -1259,13 +1378,11 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> src = torch.Tensor([[4, 3, 5], |     >>> src = torch.tensor([[4, 3, 5], | ||||||
|                             [6, 7, 8]]) |                             [6, 7, 8]]) | ||||||
|     >>> src.put_(torch.LongTensor([1, 3]), torch.Tensor([9, 10])) |     >>> src.put_(torch.tensor([1, 3]), torch.tensor([9, 10])) | ||||||
|  |     tensor([[  4,   9,   5], | ||||||
|       4   9   5 |             [ 10,   7,   8]]) | ||||||
|      10   7   8 |  | ||||||
|     [torch.FloatTensor of size (2,3)] |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('qr', | add_docstr_all('qr', | ||||||
| @ -1283,8 +1400,8 @@ Fills :attr:`self` tensor with numbers sampled from the discrete uniform | |||||||
| distribution over ``[from, to - 1]``. If not specified, the values are usually | distribution over ``[from, to - 1]``. If not specified, the values are usually | ||||||
| only bounded by :attr:`self` tensor's data type. However, for floating point | only bounded by :attr:`self` tensor's data type. However, for floating point | ||||||
| types, if unspecified, range will be ``[0, 2^mantissa]`` to ensure that every | types, if unspecified, range will be ``[0, 2^mantissa]`` to ensure that every | ||||||
| value is representable. For example, `torch.DoubleTensor(1).random_()` will be | value is representable. For example, `torch.tensor(1, dtype=torch.double).random_()` | ||||||
| uniform in ``[0, 2^53]``. | will be uniform in ``[0, 2^53]``. | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('reciprocal', | add_docstr_all('reciprocal', | ||||||
| @ -1343,18 +1460,49 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([1, 2, 3]) |     >>> x = torch.tensor([1, 2, 3]) | ||||||
|     >>> x.repeat(4, 2) |     >>> x.repeat(4, 2) | ||||||
|  |     tensor([[ 1,  2,  3,  1,  2,  3], | ||||||
|      1  2  3  1  2  3 |             [ 1,  2,  3,  1,  2,  3], | ||||||
|      1  2  3  1  2  3 |             [ 1,  2,  3,  1,  2,  3], | ||||||
|      1  2  3  1  2  3 |             [ 1,  2,  3,  1,  2,  3]]) | ||||||
|      1  2  3  1  2  3 |  | ||||||
|     [torch.FloatTensor of size (4,6)] |  | ||||||
|  |  | ||||||
|     >>> x.repeat(4, 2, 1).size() |     >>> x.repeat(4, 2, 1).size() | ||||||
|  |  | ||||||
|     torch.Size([4, 2, 3]) |     torch.Size([4, 2, 3]) | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('requires_grad_', | ||||||
|  |                r""" | ||||||
|  | requires_grad_(requires_grad=True) -> Tensor | ||||||
|  |  | ||||||
|  | Change if autograd should record operations on this tensor: sets this tensor's | ||||||
|  | :attr:`requires_grad` attribute in-place. Returns this tensor. | ||||||
|  |  | ||||||
|  | :func:`require_grad_`'s main use case is to tell autograd to begin recording | ||||||
|  | operations on a Tensor ``tensor``. If ``tensor`` has ``requires_grad=False`` | ||||||
|  | (because it was obtained through a DataLoader, or required preprocessing or | ||||||
|  | initialization), ``tensor.requires_grad_()`` makes it so that autograd will | ||||||
|  | begin to record operations on ``tensor``. | ||||||
|  |  | ||||||
|  | Args: | ||||||
|  |     requires_grad (bool): If autograd should record operations on this tensor. | ||||||
|  |         Default: ``True``. | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> # Let's say we want to preprocess some saved weights and use | ||||||
|  |     >>> # the result as new weights. | ||||||
|  |     >>> saved_weights = [0.1, 0.2, 0.3, 0.25] | ||||||
|  |     >>> loaded_weights = torch.tensor(saved_weights) | ||||||
|  |     >>> weights = preprocess(loaded_weights)  # some function | ||||||
|  |     >>> weights | ||||||
|  |     tensor([-0.5503,  0.4926, -2.1158, -0.8303]) | ||||||
|  |  | ||||||
|  |     >>> # Now, start to record operations done to weights | ||||||
|  |     >>> weights.requires_grad_() | ||||||
|  |     >>> out = weights.pow(2).sum() | ||||||
|  |     >>> out.backward() | ||||||
|  |     >>> weights.grad | ||||||
|  |     tensor([-1.1007,  0.9853, -4.2316, -1.6606]) | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| @ -1386,14 +1534,10 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([[1, 2], [3, 4], [5, 6]]) |     >>> x = torch.tensor([[1, 2], [3, 4], [5, 6]]) | ||||||
|     >>> x.resize_(2, 2) |     >>> x.resize_(2, 2) | ||||||
|     >>> x |     tensor([[ 1,  2], | ||||||
|  |             [ 3,  4]]) | ||||||
|      1  2 |  | ||||||
|      3  4 |  | ||||||
|     [torch.FloatTensor of size (2,2)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('resize_as_', | add_docstr_all('resize_as_', | ||||||
| @ -1468,25 +1612,17 @@ Example:: | |||||||
|  |  | ||||||
|     >>> x = torch.rand(2, 5) |     >>> x = torch.rand(2, 5) | ||||||
|     >>> x |     >>> x | ||||||
|  |     tensor([[ 0.3992,  0.2908,  0.9044,  0.4850,  0.6004], | ||||||
|  |             [ 0.5735,  0.9006,  0.6797,  0.4152,  0.1732]]) | ||||||
|  |     >>> torch.zeros(3, 5).scatter_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x) | ||||||
|  |     tensor([[ 0.3992,  0.9006,  0.6797,  0.4850,  0.6004], | ||||||
|  |             [ 0.0000,  0.2908,  0.0000,  0.4152,  0.0000], | ||||||
|  |             [ 0.5735,  0.0000,  0.9044,  0.0000,  0.1732]]) | ||||||
|  |  | ||||||
|      0.4319  0.6500  0.4080  0.8760  0.2355 |     >>> z = torch.zeros(2, 4).scatter_(1, torch.tensor([[2], [3]]), 1.23) | ||||||
|      0.2609  0.4711  0.8486  0.8573  0.1029 |  | ||||||
|     [torch.FloatTensor of size (2,5)] |  | ||||||
|  |  | ||||||
|     >>> torch.zeros(3, 5).scatter_(0, torch.LongTensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]]), x) |  | ||||||
|  |  | ||||||
|      0.4319  0.4711  0.8486  0.8760  0.2355 |  | ||||||
|      0.0000  0.6500  0.0000  0.8573  0.0000 |  | ||||||
|      0.2609  0.0000  0.4080  0.0000  0.1029 |  | ||||||
|     [torch.FloatTensor of size (3,5)] |  | ||||||
|  |  | ||||||
|     >>> z = torch.zeros(2, 4).scatter_(1, torch.LongTensor([[2], [3]]), 1.23) |  | ||||||
|     >>> z |     >>> z | ||||||
|  |     tensor([[ 0.0000,  0.0000,  1.2300,  0.0000], | ||||||
|      0.0000  0.0000  1.2300  0.0000 |             [ 0.0000,  0.0000,  0.0000,  1.2300]]) | ||||||
|      0.0000  0.0000  0.0000  1.2300 |  | ||||||
|     [torch.FloatTensor of size (2,4)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('select', | add_docstr_all('select', | ||||||
| @ -1591,7 +1727,7 @@ Returns the size of the :attr:`self` tensor. The returned value is a subclass of | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> torch.Tensor(3, 4, 5).size() |     >>> torch.empty(3, 4, 5).size() | ||||||
|     torch.Size([3, 4, 5]) |     torch.Size([3, 4, 5]) | ||||||
|  |  | ||||||
| """) | """) | ||||||
| @ -1654,7 +1790,7 @@ number of storage elements (not bytes). | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([1, 2, 3, 4, 5]) |     >>> x = torch.tensor([1, 2, 3, 4, 5]) | ||||||
|     >>> x.storage_offset() |     >>> x.storage_offset() | ||||||
|     0 |     0 | ||||||
|     >>> x[3:].storage_offset() |     >>> x[3:].storage_offset() | ||||||
| @ -1678,7 +1814,7 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) |     >>> x = torch.tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 10]]) | ||||||
|     >>> x.stride() |     >>> x.stride() | ||||||
|     (5, 1) |     (5, 1) | ||||||
|     >>>x.stride(0) |     >>>x.stride(0) | ||||||
| @ -1744,6 +1880,115 @@ t_() -> Tensor | |||||||
| In-place version of :meth:`~Tensor.t` | In-place version of :meth:`~Tensor.t` | ||||||
| """) | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('to', | ||||||
|  |                r""" | ||||||
|  | to(*args, **kwargs) -> Tensor | ||||||
|  |  | ||||||
|  | Performs Tensor dtype and/or device conversion. A :class:`torch.dtype` and :class:`torch.device` are | ||||||
|  | inferred from the arguments of ``self.to(*args, **kwargs)``. | ||||||
|  |  | ||||||
|  | .. note:: | ||||||
|  |  | ||||||
|  |     If the ``self`` Tensor already | ||||||
|  |     has the correct :class:`torch.dtype` and :class:`torch.device`, then ``self`` is returned. | ||||||
|  |     Otherwise, the returned tensor is a copy of ``self`` with the desired | ||||||
|  |     :class:`torch.dtype` and :class:`torch.device`. | ||||||
|  |  | ||||||
|  | Here are the ways to call ``to``: | ||||||
|  |  | ||||||
|  | .. function:: to(dtype) -> Tensor | ||||||
|  |  | ||||||
|  |     Returns a Tensor with the specified :attr:`dtype` | ||||||
|  |  | ||||||
|  | .. function:: to(device, dtype=None) -> Tensor | ||||||
|  |  | ||||||
|  |     Returns a Tensor with the specified :attr:`device` and (optional) | ||||||
|  |     :attr:`dtype`. If :attr:`dtype` is ``None`` it is inferred to be ``self.dtype``. | ||||||
|  |  | ||||||
|  | .. function:: to(other) -> Tensor | ||||||
|  |  | ||||||
|  |     Returns a Tensor with same :class:`torch.dtype` and :class:`torch.device` as the Tensor | ||||||
|  |     :attr:`other`. | ||||||
|  |  | ||||||
|  | Example:: | ||||||
|  |  | ||||||
|  |     >>> tensor = torch.randn(2, 2)  # Initially dtype=float32, device=cpu | ||||||
|  |     >>> tensor.to(torch.float64) | ||||||
|  |     tensor([[-0.5044,  0.0005], | ||||||
|  |             [ 0.3310, -0.0584]], dtype=torch.float64) | ||||||
|  |  | ||||||
|  |     >>> cuda0 = torch.device('cuda:0') | ||||||
|  |     >>> tensor.to(cuda0) | ||||||
|  |     tensor([[-0.5044,  0.0005], | ||||||
|  |             [ 0.3310, -0.0584]], device='cuda:0') | ||||||
|  |  | ||||||
|  |     >>> tensor.to(cuda0, dtype=torch.float64) | ||||||
|  |     tensor([[-0.5044,  0.0005], | ||||||
|  |             [ 0.3310, -0.0584]], dtype=torch.float64, device='cuda:0') | ||||||
|  |  | ||||||
|  |     >>> other = torch.randn((), dtype=torch.float64, device=cuda0) | ||||||
|  |     >>> tensor.to(other) | ||||||
|  |     tensor([[-0.5044,  0.0005], | ||||||
|  |             [ 0.3310, -0.0584]], dtype=torch.float64, device='cuda:0') | ||||||
|  |  | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('byte', | ||||||
|  |                r""" | ||||||
|  | byte() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.byte()`` is equivalent to ``self.to(torch.uint8)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('char', | ||||||
|  |                r""" | ||||||
|  | char() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.char()`` is equivalent to ``self.to(torch.int8)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('double', | ||||||
|  |                r""" | ||||||
|  | double() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.double()`` is equivalent to ``self.to(torch.float64)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('float', | ||||||
|  |                r""" | ||||||
|  | float() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.float()`` is equivalent to ``self.to(torch.float32)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('half', | ||||||
|  |                r""" | ||||||
|  | half() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.half()`` is equivalent to ``self.to(torch.float16)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('int', | ||||||
|  |                r""" | ||||||
|  | int() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.int()`` is equivalent to ``self.to(torch.int32)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('long', | ||||||
|  |                r""" | ||||||
|  | long() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.long()`` is equivalent to ``self.to(torch.int64)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
|  | add_docstr_all('short', | ||||||
|  |                r""" | ||||||
|  | short() -> Tensor | ||||||
|  |  | ||||||
|  | ``self.short()`` is equivalent to ``self.to(torch.int16)``. See :func:`to`. | ||||||
|  | """) | ||||||
|  |  | ||||||
| add_docstr_all('take', | add_docstr_all('take', | ||||||
|                r""" |                r""" | ||||||
| take(indices) -> Tensor | take(indices) -> Tensor | ||||||
| @ -1907,33 +2152,18 @@ Example:: | |||||||
|  |  | ||||||
|     >>> x = torch.arange(1, 8) |     >>> x = torch.arange(1, 8) | ||||||
|     >>> x |     >>> x | ||||||
|  |     tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.]) | ||||||
|      1 |  | ||||||
|      2 |  | ||||||
|      3 |  | ||||||
|      4 |  | ||||||
|      5 |  | ||||||
|      6 |  | ||||||
|      7 |  | ||||||
|     [torch.FloatTensor of size (7,)] |  | ||||||
|  |  | ||||||
|     >>> x.unfold(0, 2, 1) |     >>> x.unfold(0, 2, 1) | ||||||
|  |     tensor([[ 1.,  2.], | ||||||
|      1  2 |             [ 2.,  3.], | ||||||
|      2  3 |             [ 3.,  4.], | ||||||
|      3  4 |             [ 4.,  5.], | ||||||
|      4  5 |             [ 5.,  6.], | ||||||
|      5  6 |             [ 6.,  7.]]) | ||||||
|      6  7 |  | ||||||
|     [torch.FloatTensor of size (6,2)] |  | ||||||
|  |  | ||||||
|     >>> x.unfold(0, 2, 2) |     >>> x.unfold(0, 2, 2) | ||||||
|  |     tensor([[ 1.,  2.], | ||||||
|      1  2 |             [ 3.,  4.], | ||||||
|      3  4 |             [ 5.,  6.]]) | ||||||
|      5  6 |  | ||||||
|     [torch.FloatTensor of size (3,2)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('uniform_', | add_docstr_all('uniform_', | ||||||
| @ -2031,23 +2261,17 @@ Args: | |||||||
|  |  | ||||||
| Example:: | Example:: | ||||||
|  |  | ||||||
|     >>> x = torch.Tensor([[1], [2], [3]]) |     >>> x = torch.tensor([[1], [2], [3]]) | ||||||
|     >>> x.size() |     >>> x.size() | ||||||
|     torch.Size([3, 1]) |     torch.Size([3, 1]) | ||||||
|     >>> x.expand(3, 4) |     >>> x.expand(3, 4) | ||||||
|  |     tensor([[ 1,  1,  1,  1], | ||||||
|      1  1  1  1 |             [ 2,  2,  2,  2], | ||||||
|      2  2  2  2 |             [ 3,  3,  3,  3]]) | ||||||
|      3  3  3  3 |  | ||||||
|     [torch.FloatTensor of size (3,4)] |  | ||||||
|  |  | ||||||
|     >>> x.expand(-1, 4)   # -1 means not changing the size of that dimension |     >>> x.expand(-1, 4)   # -1 means not changing the size of that dimension | ||||||
|  |     tensor([[ 1,  1,  1,  1], | ||||||
|      1  1  1  1 |             [ 2,  2,  2,  2], | ||||||
|      2  2  2  2 |             [ 3,  3,  3,  3]]) | ||||||
|      3  3  3  3 |  | ||||||
|     [torch.FloatTensor of size (3,4)] |  | ||||||
|  |  | ||||||
| """) | """) | ||||||
|  |  | ||||||
| add_docstr_all('zero_', | add_docstr_all('zero_', | ||||||
|  | |||||||
| @ -73,7 +73,7 @@ def _get_min_log_scale(): | |||||||
|  |  | ||||||
|  |  | ||||||
| def _number_format(tensor, min_sz=-1): | def _number_format(tensor, min_sz=-1): | ||||||
|     int_mode = not tensor.dtype.is_floating_point |     floating_dtype = tensor.dtype.is_floating_point  # save this because we cast later | ||||||
|     _min_log_scale = _get_min_log_scale() |     _min_log_scale = _get_min_log_scale() | ||||||
|     min_sz = max(min_sz, 2) |     min_sz = max(min_sz, 2) | ||||||
|     tensor = torch.DoubleTensor(tensor.size()).copy_(tensor).abs_().view(tensor.nelement()) |     tensor = torch.DoubleTensor(tensor.size()).copy_(tensor).abs_().view(tensor.nelement()) | ||||||
| @ -90,6 +90,13 @@ def _number_format(tensor, min_sz=-1): | |||||||
|     if invalid_value_mask.any(): |     if invalid_value_mask.any(): | ||||||
|         min_sz = max(min_sz, 3) |         min_sz = max(min_sz, 3) | ||||||
|  |  | ||||||
|  |     int_mode = True | ||||||
|  |     # TODO: use fmod? | ||||||
|  |     for value in tensor: | ||||||
|  |         if value != math.ceil(value.item()): | ||||||
|  |             int_mode = False | ||||||
|  |             break | ||||||
|  |  | ||||||
|     exp_min = tensor.min() |     exp_min = tensor.min() | ||||||
|     if exp_min != 0: |     if exp_min != 0: | ||||||
|         exp_min = math.floor(math.log10(exp_min)) + 1 |         exp_min = math.floor(math.log10(exp_min)) + 1 | ||||||
| @ -100,6 +107,7 @@ def _number_format(tensor, min_sz=-1): | |||||||
|         exp_max = math.floor(math.log10(exp_max)) + 1 |         exp_max = math.floor(math.log10(exp_max)) + 1 | ||||||
|     else: |     else: | ||||||
|         exp_max = 1 |         exp_max = 1 | ||||||
|  |     include_decimal_int_mode = floating_dtype and int_mode | ||||||
|  |  | ||||||
|     scale = 1 |     scale = 1 | ||||||
|     exp_max = int(exp_max) |     exp_max = int(exp_max) | ||||||
| @ -111,6 +119,9 @@ def _number_format(tensor, min_sz=-1): | |||||||
|         else: |         else: | ||||||
|             sz = max(min_sz, exp_max + 1) |             sz = max(min_sz, exp_max + 1) | ||||||
|             format = '{:' + str(sz) + '.0f}' |             format = '{:' + str(sz) + '.0f}' | ||||||
|  |             if include_decimal_int_mode: | ||||||
|  |                 format += '.' | ||||||
|  |                 sz += 1 | ||||||
|     else: |     else: | ||||||
|         if exp_max - exp_min > prec: |         if exp_max - exp_min > prec: | ||||||
|             sz = 7 + prec |             sz = 7 + prec | ||||||
| @ -179,7 +190,7 @@ def _tensor_str(self, indent, fmt, scale, sz, summarize): | |||||||
| def _str(self): | def _str(self): | ||||||
|     if self.is_sparse: |     if self.is_sparse: | ||||||
|         size_str = str(tuple(self.shape)).replace(' ', '') |         size_str = str(tuple(self.shape)).replace(' ', '') | ||||||
|         return '{} of size {} with indices:\n{}and values:\n{}'.format( |         return '{} of size {} with indices:\n{}\nand values:\n{}'.format( | ||||||
|             self.type(), size_str, self._indices(), self._values()) |             self.type(), size_str, self._indices(), self._values()) | ||||||
|  |  | ||||||
|     prefix = 'tensor(' |     prefix = 'tensor(' | ||||||
| @ -194,12 +205,16 @@ def _str(self): | |||||||
|         if self.device.type == 'cpu' or torch.cuda.current_device() != self.device.index: |         if self.device.type == 'cpu' or torch.cuda.current_device() != self.device.index: | ||||||
|             suffix = ', device=\'' + str(self.device) + '\'' + suffix |             suffix = ', device=\'' + str(self.device) + '\'' + suffix | ||||||
|  |  | ||||||
|     if self.dtype != torch.get_default_dtype() and self.dtype != torch.int64: |  | ||||||
|         suffix = ', dtype=' + str(self.dtype) + suffix |  | ||||||
|  |  | ||||||
|     if self.numel() == 0: |     if self.numel() == 0: | ||||||
|  |         # In an empty tensor, there are no elements to infer if the dtype should be int64, | ||||||
|  |         # so it must be shown explicitly. | ||||||
|  |         if self.dtype != torch.get_default_dtype(): | ||||||
|  |             suffix = ', dtype=' + str(self.dtype) + suffix | ||||||
|         tensor_str = '[]' |         tensor_str = '[]' | ||||||
|     else: |     else: | ||||||
|  |         if self.dtype != torch.get_default_dtype() and self.dtype != torch.int64: | ||||||
|  |             suffix = ', dtype=' + str(self.dtype) + suffix | ||||||
|  |  | ||||||
|         fmt, scale, sz = _number_format(self) |         fmt, scale, sz = _number_format(self) | ||||||
|         if scale != 1: |         if scale != 1: | ||||||
|             prefix = prefix + SCALE_FORMAT.format(scale) + ' ' * indent |             prefix = prefix + SCALE_FORMAT.format(scale) + ' ' * indent | ||||||
|  | |||||||
							
								
								
									
										2977
									
								
								torch/_torch_docs.py
									
									
									
									
									
								
							
							
						
						
									
										2977
									
								
								torch/_torch_docs.py
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -16,6 +16,7 @@ void InputBuffer::add(size_t pos, Variable var) { | |||||||
|   if (!old_var.defined()) { |   if (!old_var.defined()) { | ||||||
|     buffer[pos] = std::move(var); |     buffer[pos] = std::move(var); | ||||||
|   } else { |   } else { | ||||||
|  |     AutoGPU auto_gpu(var); | ||||||
|     // ATen doesn't route sparse additions correctly... |     // ATen doesn't route sparse additions correctly... | ||||||
|     if (old_var.type().is_sparse()) { |     if (old_var.type().is_sparse()) { | ||||||
|       buffer[pos] = var + old_var; |       buffer[pos] = var + old_var; | ||||||
|  | |||||||
| @ -9,8 +9,8 @@ | |||||||
| #include "torch/csrc/autograd/variable.h" | #include "torch/csrc/autograd/variable.h" | ||||||
| #include "torch/csrc/utils/python_compat.h" | #include "torch/csrc/utils/python_compat.h" | ||||||
| #include "torch/csrc/utils/python_numbers.h" | #include "torch/csrc/utils/python_numbers.h" | ||||||
| #include "torch/csrc/utils/tensor_conversion_dispatch.h" |  | ||||||
| #include "torch/csrc/utils/tensor_new.h" | #include "torch/csrc/utils/tensor_new.h" | ||||||
|  | #include "torch/csrc/utils/tensor_conversion_dispatch.h" | ||||||
|  |  | ||||||
| #include <ATen/ExpandUtils.h> | #include <ATen/ExpandUtils.h> | ||||||
| #include <vector> | #include <vector> | ||||||
| @ -169,16 +169,6 @@ static Variable applySlicing(const Variable& self, PyObject* index, variable_lis | |||||||
|   return result; |   return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| static Tensor typeConvertIndex(const Variable& self, const Variable& ind) { |  | ||||||
|   int64_t device = self.is_cuda() ? self.get_device() : -1; |  | ||||||
|   if (ind.defined()) { |  | ||||||
|     auto& new_type = ind.type().toBackend(self.type().backend()); |  | ||||||
|     return torch::utils::dispatch_type_conversion(ind, new_type, device, false); |  | ||||||
|   } else { |  | ||||||
|     return ind; |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static std::vector<Tensor> typeConvertIndices(const Variable& self, const variable_list& indices) { | static std::vector<Tensor> typeConvertIndices(const Variable& self, const variable_list& indices) { | ||||||
|   std::vector<Tensor> converted_inds(indices.size()); |   std::vector<Tensor> converted_inds(indices.size()); | ||||||
|   int64_t device = self.is_cuda() ? self.get_device() : -1; |   int64_t device = self.is_cuda() ? self.get_device() : -1; | ||||||
| @ -271,97 +261,6 @@ static PyObject* applyBoolGetitem(const Variable& self, bool index) { | |||||||
|   } |   } | ||||||
| } | } | ||||||
|  |  | ||||||
| enum class LegacyIndexingType { |  | ||||||
|   None, |  | ||||||
|   Mask, |  | ||||||
|   Index, |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| static std::pair<LegacyIndexingType, int64_t> |  | ||||||
| getLegacyIndexingType(const Variable& self, const variable_list& vars) { |  | ||||||
|   // TODO: this could be that the broadcasted size is the same. |  | ||||||
|   if (vars.size() == 1 && vars[0].type().scalarType() == ScalarType::Byte && vars[0].is_same_size(self)) { |  | ||||||
|     return std::make_pair(LegacyIndexingType::Mask, -1); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // single tensor indexing |  | ||||||
|   int num_defined_variables = 0; |  | ||||||
|   int64_t index_dim = -1; |  | ||||||
|   for (size_t i = 0; i < vars.size(); i++) { |  | ||||||
|     auto& variable = vars[i]; |  | ||||||
|     auto is_defined = variable.defined(); |  | ||||||
|     num_defined_variables += is_defined; |  | ||||||
|     if (is_defined) { |  | ||||||
|       index_dim = (int64_t)i; |  | ||||||
|       if (num_defined_variables > 1) { |  | ||||||
|         break; |  | ||||||
|       } |  | ||||||
|       if (variable.dim() != 1 || variable.type().scalarType() != ScalarType::Long || variable.numel() == 0) { |  | ||||||
|         num_defined_variables = -1; |  | ||||||
|         break; |  | ||||||
|       } |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   if (num_defined_variables == 1) { |  | ||||||
|     return std::make_pair(LegacyIndexingType::Index, index_dim); |  | ||||||
|   } |  | ||||||
|   // advanced indexing |  | ||||||
|   return std::make_pair(LegacyIndexingType::None, -1); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static Variable dispatch_legacy_index(const Variable& self, const variable_list& vars, |  | ||||||
|                                       std::pair<LegacyIndexingType, int64_t> legacyIndex) { |  | ||||||
|   LegacyIndexingType indexingType = std::get<0>(legacyIndex); |  | ||||||
|   switch(indexingType) { |  | ||||||
|     case LegacyIndexingType::Mask: { |  | ||||||
|       auto mask = vars[0]; |  | ||||||
|       auto mask_convert = typeConvertIndex(self, mask); |  | ||||||
|       AutoNoGIL no_gil; |  | ||||||
|       AutoGPU auto_gpu(self); |  | ||||||
|       return self.masked_select(mask_convert); |  | ||||||
|     } |  | ||||||
|     case LegacyIndexingType::Index: { |  | ||||||
|       int64_t index_dim = std::get<1>(legacyIndex); |  | ||||||
|       auto index = vars[index_dim]; |  | ||||||
|       auto index_convert = typeConvertIndex(self, index); |  | ||||||
|       AutoNoGIL no_gil; |  | ||||||
|       AutoGPU auto_gpu(self); |  | ||||||
|       return self.index_select(index_dim, index_convert); |  | ||||||
|     } |  | ||||||
|     case LegacyIndexingType::None: |  | ||||||
|     default: { |  | ||||||
|       throw std::runtime_error("Unexpected indexing type"); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static Variable dispatch_legacy_index_put_(Variable& self, const variable_list& vars, const Variable& value, |  | ||||||
|                                            std::pair<LegacyIndexingType, int64_t> legacyIndex) { |  | ||||||
|   LegacyIndexingType indexingType = std::get<0>(legacyIndex); |  | ||||||
|   switch(indexingType) { |  | ||||||
|     case LegacyIndexingType::Mask: { |  | ||||||
|       auto mask = vars[0]; |  | ||||||
|       auto mask_convert = typeConvertIndex(self, mask); |  | ||||||
|       AutoNoGIL no_gil; |  | ||||||
|       AutoGPU auto_gpu(self); |  | ||||||
|       return self.masked_fill_(mask_convert, value); |  | ||||||
|     } |  | ||||||
|     case LegacyIndexingType::Index: { |  | ||||||
|       int64_t index_dim = std::get<1>(legacyIndex); |  | ||||||
|       auto index = vars[index_dim]; |  | ||||||
|       auto index_convert = typeConvertIndex(self, index); |  | ||||||
|       AutoNoGIL no_gil; |  | ||||||
|       AutoGPU auto_gpu(self); |  | ||||||
|       return self.index_fill_(index_dim, index_convert, value); |  | ||||||
|     } |  | ||||||
|     case LegacyIndexingType::None: |  | ||||||
|     default: { |  | ||||||
|       throw std::runtime_error("Unexpected indexing type"); |  | ||||||
|     } |  | ||||||
|   } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| PyObject* THPVariable_getitem(PyObject* self, PyObject* index) { | PyObject* THPVariable_getitem(PyObject* self, PyObject* index) { | ||||||
|   HANDLE_TH_ERRORS |   HANDLE_TH_ERRORS | ||||||
|   auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata; |   auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata; | ||||||
| @ -396,12 +295,6 @@ PyObject* THPVariable_getitem(PyObject* self, PyObject* index) { | |||||||
|     return applyBoolGetitem(self_, variableIndices[0].toCByte()); |     return applyBoolGetitem(self_, variableIndices[0].toCByte()); | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // TODO move this to ATen |  | ||||||
|   auto legacy_index = getLegacyIndexingType(sliced, variableIndices); |  | ||||||
|   if (std::get<0>(legacy_index) != LegacyIndexingType::None) { |  | ||||||
|     return wrap(dispatch_legacy_index(sliced, variableIndices, legacy_index)); |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // indexing by tensors ("advanced" indexing) |   // indexing by tensors ("advanced" indexing) | ||||||
|   return wrap(dispatch_index(sliced, variableIndices)); |   return wrap(dispatch_index(sliced, variableIndices)); | ||||||
|   Py_RETURN_NONE; |   Py_RETURN_NONE; | ||||||
| @ -468,16 +361,6 @@ int THPVariable_setitem(PyObject* self, PyObject* index, PyObject* py_value) { | |||||||
|     return 0; |     return 0; | ||||||
|   } |   } | ||||||
|  |  | ||||||
|   // TODO move this to ATen |  | ||||||
|   // we are being overly cautious here and only considering the *_fill_ variants |  | ||||||
|   // (value is a scalar), as there could be broadcasting in the value that could |  | ||||||
|   // happen and is not handled by masked_scatter_ and index_copy_ |  | ||||||
|   auto legacy_index = getLegacyIndexingType(sliced, variableIndices); |  | ||||||
|   if (std::get<0>(legacy_index) != LegacyIndexingType::None && value.dim() == 0) { |  | ||||||
|     dispatch_legacy_index_put_(sliced, variableIndices, value, legacy_index); |  | ||||||
|     return 0; |  | ||||||
|   } |  | ||||||
|  |  | ||||||
|   // indexing by tensors ("advanced" indexing) |   // indexing by tensors ("advanced" indexing) | ||||||
|   dispatch_index_put_(sliced, variableIndices, value); |   dispatch_index_put_(sliced, variableIndices, value); | ||||||
|   return 0; |   return 0; | ||||||
|  | |||||||
| @ -112,7 +112,7 @@ def _check_capability(): | |||||||
|             warnings.warn(incorrect_binary_warn % (d, name, 8000, CUDA_VERSION)) |             warnings.warn(incorrect_binary_warn % (d, name, 8000, CUDA_VERSION)) | ||||||
|         elif CUDA_VERSION < 9000 and major >= 7: |         elif CUDA_VERSION < 9000 and major >= 7: | ||||||
|             warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION)) |             warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION)) | ||||||
|         elif capability == (3, 0) or capability == (5, 0) or major < 3: |         elif capability == (3, 0) or major < 3: | ||||||
|             warnings.warn(old_gpu_warn % (d, name, major, capability[1])) |             warnings.warn(old_gpu_warn % (d, name, major, capability[1])) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | |||||||
| @ -16,7 +16,7 @@ class Bernoulli(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Bernoulli(torch.Tensor([0.3])) |         >>> m = Bernoulli(torch.tensor([0.3])) | ||||||
|         >>> m.sample()  # 30% chance 1; 70% chance 0 |         >>> m.sample()  # 30% chance 1; 70% chance 0 | ||||||
|          0.0 |          0.0 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -13,7 +13,7 @@ class Beta(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Beta(torch.Tensor([0.5]), torch.Tensor([0.5])) |         >>> m = Beta(torch.tensor([0.5]), torch.tensor([0.5])) | ||||||
|         >>> m.sample()  # Beta distributed with concentration concentration1 and concentration0 |         >>> m.sample()  # Beta distributed with concentration concentration1 and concentration0 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
| @ -50,7 +50,7 @@ class Beta(ExponentialFamily): | |||||||
|     def rsample(self, sample_shape=()): |     def rsample(self, sample_shape=()): | ||||||
|         value = self._dirichlet.rsample(sample_shape).select(-1, 0) |         value = self._dirichlet.rsample(sample_shape).select(-1, 0) | ||||||
|         if isinstance(value, Number): |         if isinstance(value, Number): | ||||||
|             value = self._dirichlet.concentration.new([value]) |             value = self._dirichlet.concentration.new_tensor(value) | ||||||
|         return value |         return value | ||||||
|  |  | ||||||
|     def log_prob(self, value): |     def log_prob(self, value): | ||||||
|  | |||||||
| @ -17,7 +17,7 @@ class Binomial(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Binomial(100, torch.Tensor([0 , .2, .8, 1])) |         >>> m = Binomial(100, torch.tensor([0 , .2, .8, 1])) | ||||||
|         >>> x = m.sample() |         >>> x = m.sample() | ||||||
|          0 |          0 | ||||||
|          22 |          22 | ||||||
|  | |||||||
| @ -27,7 +27,7 @@ class Categorical(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Categorical(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ])) |         >>> m = Categorical(torch.tensor([ 0.25, 0.25, 0.25, 0.25 ])) | ||||||
|         >>> m.sample()  # equal probability of 0, 1, 2, 3 |         >>> m.sample()  # equal probability of 0, 1, 2, 3 | ||||||
|          3 |          3 | ||||||
|         [torch.LongTensor of size 1] |         [torch.LongTensor of size 1] | ||||||
|  | |||||||
| @ -15,7 +15,7 @@ class Cauchy(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Cauchy(torch.Tensor([0.0]), torch.Tensor([1.0])) |         >>> m = Cauchy(torch.tensor([0.0]), torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # sample from a Cauchy distribution with loc=0 and scale=1 |         >>> m.sample()  # sample from a Cauchy distribution with loc=0 and scale=1 | ||||||
|          2.3214 |          2.3214 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
| @ -38,11 +38,11 @@ class Cauchy(Distribution): | |||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def mean(self): |     def mean(self): | ||||||
|         return self.loc.new([float('nan')]).expand(self._extended_shape()) |         return self.loc.new_tensor(float('nan')).expand(self._extended_shape()) | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def variance(self): |     def variance(self): | ||||||
|         return self.loc.new([float('inf')]).expand(self._extended_shape()) |         return self.loc.new_tensor(float('inf')).expand(self._extended_shape()) | ||||||
|  |  | ||||||
|     def rsample(self, sample_shape=torch.Size()): |     def rsample(self, sample_shape=torch.Size()): | ||||||
|         shape = self._extended_shape(sample_shape) |         shape = self._extended_shape(sample_shape) | ||||||
|  | |||||||
| @ -9,7 +9,7 @@ class Chi2(Gamma): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Chi2(torch.Tensor([1.0])) |         >>> m = Chi2(torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # Chi2 distributed with shape df=1 |         >>> m.sample()  # Chi2 distributed with shape df=1 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -42,7 +42,7 @@ class Dirichlet(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Dirichlet(torch.Tensor([0.5, 0.5])) |         >>> m = Dirichlet(torch.tensor([0.5, 0.5])) | ||||||
|         >>> m.sample()  # Dirichlet distributed with concentrarion concentration |         >>> m.sample()  # Dirichlet distributed with concentrarion concentration | ||||||
|          0.1046 |          0.1046 | ||||||
|          0.8954 |          0.8954 | ||||||
| @ -77,11 +77,11 @@ class Dirichlet(ExponentialFamily): | |||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def mean(self): |     def mean(self): | ||||||
|         return self.concentration / self.concentration.sum(-1) |         return self.concentration / self.concentration.sum(-1, True) | ||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def variance(self): |     def variance(self): | ||||||
|         con0 = self.concentration.sum(-1) |         con0 = self.concentration.sum(-1, True) | ||||||
|         return self.concentration * (con0 - self.concentration) / (con0.pow(2) * (con0 + 1)) |         return self.concentration * (con0 - self.concentration) / (con0.pow(2) * (con0 + 1)) | ||||||
|  |  | ||||||
|     def entropy(self): |     def entropy(self): | ||||||
|  | |||||||
| @ -12,7 +12,7 @@ class Exponential(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Exponential(torch.Tensor([1.0])) |         >>> m = Exponential(torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # Exponential distributed with rate=1 |         >>> m.sample()  # Exponential distributed with rate=1 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -13,7 +13,7 @@ class FisherSnedecor(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = FisherSnedecor(torch.Tensor([1.0]), torch.Tensor([2.0])) |         >>> m = FisherSnedecor(torch.tensor([1.0]), torch.tensor([2.0])) | ||||||
|         >>> m.sample()  # Fisher-Snedecor-distributed with df1=1 and df2=2 |         >>> m.sample()  # Fisher-Snedecor-distributed with df1=1 and df2=2 | ||||||
|          0.2453 |          0.2453 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -18,7 +18,7 @@ class Gamma(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Gamma(torch.Tensor([1.0]), torch.Tensor([1.0])) |         >>> m = Gamma(torch.tensor([1.0]), torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # Gamma distributed with concentration=1 and rate=1 |         >>> m.sample()  # Gamma distributed with concentration=1 and rate=1 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -17,7 +17,7 @@ class Geometric(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Geometric(torch.Tensor([0.3])) |         >>> m = Geometric(torch.tensor([0.3])) | ||||||
|         >>> m.sample()  # underlying Bernoulli has 30% chance 1; 70% chance 0 |         >>> m.sample()  # underlying Bernoulli has 30% chance 1; 70% chance 0 | ||||||
|          2 |          2 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -16,7 +16,7 @@ class Gumbel(TransformedDistribution): | |||||||
|  |  | ||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> m = Gumbel(torch.Tensor([1.0]), torch.Tensor([2.0])) |         >>> m = Gumbel(torch.tensor([1.0]), torch.tensor([2.0])) | ||||||
|         >>> m.sample()  # sample from Gumbel distribution with loc=1, scale=2 |         >>> m.sample()  # sample from Gumbel distribution with loc=1, scale=2 | ||||||
|          1.0124 |          1.0124 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -52,6 +52,8 @@ class Independent(Distribution): | |||||||
|  |  | ||||||
|     @property |     @property | ||||||
|     def has_enumerate_support(self): |     def has_enumerate_support(self): | ||||||
|  |         if self.reinterpreted_batch_ndims > 0: | ||||||
|  |             return False | ||||||
|         return self.base_dist.has_enumerate_support |         return self.base_dist.has_enumerate_support | ||||||
|  |  | ||||||
|     @constraints.dependent_property |     @constraints.dependent_property | ||||||
| @ -70,7 +72,7 @@ class Independent(Distribution): | |||||||
|         return self.base_dist.sample(sample_shape) |         return self.base_dist.sample(sample_shape) | ||||||
|  |  | ||||||
|     def rsample(self, sample_shape=torch.Size()): |     def rsample(self, sample_shape=torch.Size()): | ||||||
|         return self.base_dist.rsample(self, sample_shape) |         return self.base_dist.rsample(sample_shape) | ||||||
|  |  | ||||||
|     def log_prob(self, value): |     def log_prob(self, value): | ||||||
|         log_prob = self.base_dist.log_prob(value) |         log_prob = self.base_dist.log_prob(value) | ||||||
| @ -81,4 +83,6 @@ class Independent(Distribution): | |||||||
|         return _sum_rightmost(entropy, self.reinterpreted_batch_ndims) |         return _sum_rightmost(entropy, self.reinterpreted_batch_ndims) | ||||||
|  |  | ||||||
|     def enumerate_support(self): |     def enumerate_support(self): | ||||||
|  |         if self.reinterpreted_batch_ndims > 0: | ||||||
|  |             raise NotImplementedError("Enumeration over cartesian product is not implemented") | ||||||
|         return self.base_dist.enumerate_support() |         return self.base_dist.enumerate_support() | ||||||
|  | |||||||
| @ -11,7 +11,7 @@ class Laplace(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Laplace(torch.Tensor([0.0]), torch.Tensor([1.0])) |         >>> m = Laplace(torch.tensor([0.0]), torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # Laplace distributed with loc=0, scale=1 |         >>> m.sample()  # Laplace distributed with loc=0, scale=1 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -14,7 +14,7 @@ class LogNormal(TransformedDistribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = LogNormal(torch.Tensor([0.0]), torch.Tensor([1.0])) |         >>> m = LogNormal(torch.tensor([0.0]), torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # log-normal distributed with mean=0 and stddev=1 |         >>> m.sample()  # log-normal distributed with mean=0 and stddev=1 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -14,20 +14,18 @@ class LogisticNormal(TransformedDistribution): | |||||||
|         X ~ LogisticNormal(loc, scale) |         X ~ LogisticNormal(loc, scale) | ||||||
|         Y = log(X / (1 - X.cumsum(-1)))[..., :-1] ~ Normal(loc, scale) |         Y = log(X / (1 - X.cumsum(-1)))[..., :-1] ~ Normal(loc, scale) | ||||||
|  |  | ||||||
|     Example:: |  | ||||||
|  |  | ||||||
|         >>> m = LogisticNormal(torch.Tensor([0.0] * 3), torch.Tensor([1.0] * 3)) |  | ||||||
|         >>> m.sample()  # logistic-normal distributed with mean=(0, 0, 0) and |  | ||||||
|                         # stddev=(1, 1, 1) of the base Normal distribution |  | ||||||
|          0.4163 |  | ||||||
|          0.1386 |  | ||||||
|          0.3539 |  | ||||||
|          0.0912 |  | ||||||
|         [torch.FloatTensor of size (4,)] |  | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         loc (float or Tensor): mean of the base distribution |         loc (float or Tensor): mean of the base distribution | ||||||
|         scale (float or Tensor): standard deviation of the base distribution |         scale (float or Tensor): standard deviation of the base distribution | ||||||
|  |  | ||||||
|  |     Example:: | ||||||
|  |  | ||||||
|  |         >>> # logistic-normal distributed with mean=(0, 0, 0) and stddev=(1, 1, 1) | ||||||
|  |         >>> # of the base Normal distribution | ||||||
|  |         >>> m = distributions.LogisticNormal(torch.tensor([0.0] * 3), torch.tensor([1.0] * 3)) | ||||||
|  |         >>> m.sample() | ||||||
|  |         tensor([ 0.7653,  0.0341,  0.0579,  0.1427]) | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     arg_constraints = {'loc': constraints.real, 'scale': constraints.positive} |     arg_constraints = {'loc': constraints.real, 'scale': constraints.positive} | ||||||
|     support = constraints.simplex |     support = constraints.simplex | ||||||
|  | |||||||
| @ -24,7 +24,7 @@ class Multinomial(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Multinomial(100, torch.Tensor([ 1, 1, 1, 1])) |         >>> m = Multinomial(100, torch.tensor([ 1, 1, 1, 1])) | ||||||
|         >>> x = m.sample()  # equal probability of 0, 1, 2, 3 |         >>> x = m.sample()  # equal probability of 0, 1, 2, 3 | ||||||
|          21 |          21 | ||||||
|          24 |          24 | ||||||
| @ -32,7 +32,7 @@ class Multinomial(Distribution): | |||||||
|          25 |          25 | ||||||
|         [torch.FloatTensor of size 4]] |         [torch.FloatTensor of size 4]] | ||||||
|  |  | ||||||
|         >>> Multinomial(probs=torch.Tensor([1, 1, 1, 1])).log_prob(x) |         >>> Multinomial(probs=torch.tensor([1, 1, 1, 1])).log_prob(x) | ||||||
|         -4.1338 |         -4.1338 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  |  | ||||||
|  | |||||||
| @ -14,7 +14,7 @@ class Normal(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Normal(torch.Tensor([0.0]), torch.Tensor([1.0])) |         >>> m = Normal(torch.tensor([0.0]), torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # normally distributed with loc=0 and scale=1 |         >>> m.sample()  # normally distributed with loc=0 and scale=1 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -18,7 +18,7 @@ class OneHotCategorical(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = OneHotCategorical(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ])) |         >>> m = OneHotCategorical(torch.tensor([ 0.25, 0.25, 0.25, 0.25 ])) | ||||||
|         >>> m.sample()  # equal probability of 0, 1, 2, 3 |         >>> m.sample()  # equal probability of 0, 1, 2, 3 | ||||||
|          0 |          0 | ||||||
|          0 |          0 | ||||||
|  | |||||||
| @ -16,7 +16,7 @@ class Pareto(TransformedDistribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Pareto(torch.Tensor([1.0]), torch.Tensor([1.0])) |         >>> m = Pareto(torch.tensor([1.0]), torch.tensor([1.0])) | ||||||
|         >>> m.sample()  # sample from a Pareto distribution with scale=1 and alpha=1 |         >>> m.sample()  # sample from a Pareto distribution with scale=1 and alpha=1 | ||||||
|          1.5623 |          1.5623 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -15,7 +15,7 @@ class Poisson(ExponentialFamily): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Poisson(torch.Tensor([4])) |         >>> m = Poisson(torch.tensor([4])) | ||||||
|         >>> m.sample() |         >>> m.sample() | ||||||
|          3 |          3 | ||||||
|         [torch.LongTensor of size 1] |         [torch.LongTensor of size 1] | ||||||
|  | |||||||
| @ -82,8 +82,8 @@ class RelaxedBernoulli(TransformedDistribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = RelaxedBernoulli(torch.Tensor([2.2]), |         >>> m = RelaxedBernoulli(torch.tensor([2.2]), | ||||||
|                                  torch.Tensor([0.1, 0.2, 0.3, 0.99])) |                                  torch.tensor([0.1, 0.2, 0.3, 0.99])) | ||||||
|         >>> m.sample() |         >>> m.sample() | ||||||
|          0.2951 |          0.2951 | ||||||
|          0.3442 |          0.3442 | ||||||
|  | |||||||
| @ -80,8 +80,8 @@ class RelaxedOneHotCategorical(TransformedDistribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = RelaxedOneHotCategorical(torch.Tensor([2.2]), |         >>> m = RelaxedOneHotCategorical(torch.tensor([2.2]), | ||||||
|                                          torch.Tensor([0.1, 0.2, 0.3, 0.4])) |                                          torch.tensor([0.1, 0.2, 0.3, 0.4])) | ||||||
|         >>> m.sample()  # equal probability of 1, 1, 2, 3 |         >>> m.sample()  # equal probability of 1, 1, 2, 3 | ||||||
|          0.1294 |          0.1294 | ||||||
|          0.2324 |          0.2324 | ||||||
|  | |||||||
| @ -13,7 +13,7 @@ class StudentT(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = StudentT(torch.Tensor([2.0])) |         >>> m = StudentT(torch.tensor([2.0])) | ||||||
|         >>> m.sample()  # Student's t-distributed with degrees of freedom=2 |         >>> m.sample()  # Student's t-distributed with degrees of freedom=2 | ||||||
|          0.1046 |          0.1046 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -14,7 +14,7 @@ class Uniform(Distribution): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> m = Uniform(torch.Tensor([0.0]), torch.Tensor([5.0])) |         >>> m = Uniform(torch.tensor([0.0]), torch.tensor([5.0])) | ||||||
|         >>> m.sample()  # uniformly distributed in the range [0.0, 5.0) |         >>> m.sample()  # uniformly distributed in the range [0.0, 5.0) | ||||||
|          2.3418 |          2.3418 | ||||||
|         [torch.FloatTensor of size 1] |         [torch.FloatTensor of size 1] | ||||||
|  | |||||||
| @ -72,24 +72,17 @@ def btrifact(A, info=None, pivot=True): | |||||||
|         >>> A = torch.randn(2, 3, 3) |         >>> A = torch.randn(2, 3, 3) | ||||||
|         >>> A_LU, pivots = torch.btrifact(A) |         >>> A_LU, pivots = torch.btrifact(A) | ||||||
|         >>> A_LU |         >>> A_LU | ||||||
|  |         tensor([[[ 1.3506,  2.5558, -0.0816], | ||||||
|  |                  [ 0.1684,  1.1551,  0.1940], | ||||||
|  |                  [ 0.1193,  0.6189, -0.5497]], | ||||||
|  |  | ||||||
|         (0 ,.,.) = |                 [[ 0.4526,  1.2526, -0.3285], | ||||||
|           0.7908 -0.0854  0.1522 |                  [-0.7988,  0.7175, -0.9701], | ||||||
|           0.2757 -1.2942 -1.3715 |                  [ 0.2634, -0.9255, -0.3459]]]) | ||||||
|          -0.6029  0.3609  0.3210 |  | ||||||
|  |  | ||||||
|         (1 ,.,.) = |  | ||||||
|           0.9091  0.1719  0.7741 |  | ||||||
|           0.1625  0.6720  0.1687 |  | ||||||
|          -0.1927 -0.9420 -0.4891 |  | ||||||
|         [torch.FloatTensor of size (2,3,3)] |  | ||||||
|  |  | ||||||
|         >>> pivots |         >>> pivots | ||||||
|  |         tensor([[ 3,  3,  3], | ||||||
|          2  2  3 |                 [ 3,  3,  3]], dtype=torch.int32) | ||||||
|          1  3  3 |  | ||||||
|         [torch.IntTensor of size (2,3)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     # Overwriting reason: |     # Overwriting reason: | ||||||
|     # `info` is being deprecated in favor of `btrifact_with_info`. This warning |     # `info` is being deprecated in favor of `btrifact_with_info`. This warning | ||||||
| @ -124,11 +117,10 @@ def btriunpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True): | |||||||
|  |  | ||||||
|         >>> A = torch.randn(2, 3, 3) |         >>> A = torch.randn(2, 3, 3) | ||||||
|         >>> A_LU, pivots = A.btrifact() |         >>> A_LU, pivots = A.btrifact() | ||||||
|         >>> P, a_L, a_U = torch.btriunpack(A_LU, pivots) |         >>> P, A_L, A_U = torch.btriunpack(A_LU, pivots) | ||||||
|         >>> |         >>> | ||||||
|         >>> # test that (P, A_L, A_U) gives LU factorization |         >>> # can recover A from factorization | ||||||
|         >>> A_ = torch.bmm(P, torch.bmm(A_L, A_U)) |         >>> A_ = torch.bmm(P, torch.bmm(A_L, A_U)) | ||||||
|         >>> assert torch.equal(A_, A) == True  # can recover A |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     nBatch, sz, _ = LU_data.size() |     nBatch, sz, _ = LU_data.size() | ||||||
| @ -311,11 +303,8 @@ def isnan(tensor): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> torch.isnan(torch.Tensor([1, float('nan'), 2])) |         >>> torch.isnan(torch.tensor([1, float('nan'), 2])) | ||||||
|          0 |         tensor([ 0,  1,  0], dtype=torch.uint8) | ||||||
|          1 |  | ||||||
|          0 |  | ||||||
|         [torch.ByteTensor of size 3] |  | ||||||
|     """ |     """ | ||||||
|     if not isinstance(tensor, torch.Tensor): |     if not isinstance(tensor, torch.Tensor): | ||||||
|         raise ValueError("The argument is not a tensor") |         raise ValueError("The argument is not a tensor") | ||||||
| @ -344,45 +333,25 @@ def unique(input, sorted=False, return_inverse=False): | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>>> output = torch.unique(torch.LongTensor([1, 3, 2, 3])) |         >>> output = torch.unique(torch.tensor([1, 3, 2, 3], dtype=torch.long)) | ||||||
|         >>>> output |         >>> output | ||||||
|  |         tensor([ 2,  3,  1]) | ||||||
|  |  | ||||||
|          2 |         >>> output, inverse_indices = torch.unique( | ||||||
|          3 |                 torch.tensor([1, 3, 2, 3], dtype=torch.long), sorted=True, return_inverse=True) | ||||||
|          1 |         >>> output | ||||||
|         [torch.LongTensor of size (3,)] |         tensor([ 1,  2,  3]) | ||||||
|  |         >>> inverse_indices | ||||||
|  |         tensor([ 0,  2,  1,  2]) | ||||||
|  |  | ||||||
|         >>>> output, inverse_indices = torch.unique( |         >>> output, inverse_indices = torch.unique( | ||||||
|                  torch.LongTensor([1, 3, 2, 3]), sorted=True, return_inverse=True) |                 torch.tensor([[1, 3], [2, 3]], dtype=torch.long), sorted=True, return_inverse=True) | ||||||
|         >>>> output |         >>> output | ||||||
|  |         tensor([ 1,  2,  3]) | ||||||
|  |         >>> inverse_indices | ||||||
|  |         tensor([[ 0,  2], | ||||||
|  |                 [ 1,  2]]) | ||||||
|  |  | ||||||
|          1 |  | ||||||
|          2 |  | ||||||
|          3 |  | ||||||
|         [torch.LongTensor of size (3,)] |  | ||||||
|  |  | ||||||
|         >>>> inverse_indices |  | ||||||
|  |  | ||||||
|          0 |  | ||||||
|          2 |  | ||||||
|          1 |  | ||||||
|          2 |  | ||||||
|         [torch.LongTensor of size (4,)] |  | ||||||
|  |  | ||||||
|         >>>> output, inverse_indices = torch.unique( |  | ||||||
|                  torch.LongTensor([[1, 3], [2, 3]]), sorted=True, return_inverse=True) |  | ||||||
|         >>>> output |  | ||||||
|  |  | ||||||
|          1 |  | ||||||
|          2 |  | ||||||
|          3 |  | ||||||
|         [torch.LongTensor of size (3,)] |  | ||||||
|  |  | ||||||
|         >>>> inverse_indices |  | ||||||
|  |  | ||||||
|          0  2 |  | ||||||
|          1  2 |  | ||||||
|         [torch.LongTensor of size (2,2)] |  | ||||||
|     """ |     """ | ||||||
|     output, inverse_indices = torch._unique( |     output, inverse_indices = torch._unique( | ||||||
|         input, |         input, | ||||||
| @ -412,19 +381,14 @@ def argmax(input, dim=None, keepdim=False): | |||||||
|  |  | ||||||
|         >>> a = torch.randn(4, 4) |         >>> a = torch.randn(4, 4) | ||||||
|         >>> a |         >>> a | ||||||
|  |         tensor([[ 1.3398,  0.2663, -0.2686,  0.2450], | ||||||
|  |                 [-0.7401, -0.8805, -0.3402, -1.1936], | ||||||
|  |                 [ 0.4907, -1.3948, -1.0691, -0.3132], | ||||||
|  |                 [-1.6092,  0.5419, -0.2993,  0.3195]]) | ||||||
|  |  | ||||||
|          2.3461  0.0056  1.4846  0.3911 |  | ||||||
|         -1.3584 -1.0066  0.0530  1.1754 |  | ||||||
|         -0.7929 -0.3194 -1.4865  0.4020 |  | ||||||
|          0.1101  0.6694  1.3456  0.8235 |  | ||||||
|         [torch.FloatTensor of size (4,4)] |  | ||||||
|  |  | ||||||
|         >>> torch.argmax(a, dim=1) |         >>> torch.argmax(a, dim=1) | ||||||
|         0 |         tensor([ 0,  2,  0,  1]) | ||||||
|         3 |  | ||||||
|         3 |  | ||||||
|         2 |  | ||||||
|         [torch.LongTensor of size (4,)] |  | ||||||
|     """ |     """ | ||||||
|     if dim is None: |     if dim is None: | ||||||
|         return torch._argmax(input.contiguous().view(-1), dim=0, keepdim=False) |         return torch._argmax(input.contiguous().view(-1), dim=0, keepdim=False) | ||||||
| @ -448,19 +412,14 @@ def argmin(input, dim=None, keepdim=False): | |||||||
|  |  | ||||||
|         >>> a = torch.randn(4, 4) |         >>> a = torch.randn(4, 4) | ||||||
|         >>> a |         >>> a | ||||||
|  |         tensor([[ 0.1139,  0.2254, -0.1381,  0.3687], | ||||||
|  |                 [ 1.0100, -1.1975, -0.0102, -0.4732], | ||||||
|  |                 [-0.9240,  0.1207, -0.7506, -1.0213], | ||||||
|  |                 [ 1.7809, -1.2960,  0.9384,  0.1438]]) | ||||||
|  |  | ||||||
|          2.3461  0.0056  1.4846  0.3911 |  | ||||||
|         -1.3584 -1.0066  0.0530  1.1754 |  | ||||||
|         -0.7929 -0.3194 -1.4865  0.4020 |  | ||||||
|          0.1101  0.6694  1.3456  0.8235 |  | ||||||
|         [torch.FloatTensor of size (4,4)] |  | ||||||
|  |  | ||||||
|         >>> torch.argmin(a, dim=1) |         >>> torch.argmin(a, dim=1) | ||||||
|          1 |         tensor([ 2,  1,  3,  1]) | ||||||
|          0 |  | ||||||
|          2 |  | ||||||
|          0 |  | ||||||
|         [torch.LongTensor of size (4,)] |  | ||||||
|     """ |     """ | ||||||
|     if dim is None: |     if dim is None: | ||||||
|         return torch._argmin(input.contiguous().view(-1), dim=0, keepdim=False) |         return torch._argmin(input.contiguous().view(-1), dim=0, keepdim=False) | ||||||
|  | |||||||
| @ -21,17 +21,6 @@ ELSE () | |||||||
|   SET(CMAKE_CXX_STANDARD 11) |   SET(CMAKE_CXX_STANDARD 11) | ||||||
| ENDIF () | ENDIF () | ||||||
|  |  | ||||||
| IF ($ENV{PYTORCH_BINARY_BUILD}) |  | ||||||
|   MESSAGE(STATUS "PYTORCH_BINARY_BUILD detected. Statically linking libstdc++") |  | ||||||
|   SET(CMAKE_CXX_FLAGS "-static-libstdc++ ${CMAKE_CXX_FLAGS}") |  | ||||||
|  |  | ||||||
|   IF (UNIX AND NOT APPLE) |  | ||||||
|     # hiding statically linked library symbols, this flag is not available for the linker under macOS |  | ||||||
|     SET(CMAKE_CXX_FLAGS "-Wl,--exclude-libs,libstdc++.a ${CMAKE_CXX_FLAGS}") |  | ||||||
|   ENDIF(UNIX AND NOT APPLE) |  | ||||||
|  |  | ||||||
| ENDIF() |  | ||||||
|  |  | ||||||
| ADD_LIBRARY(shm SHARED core.cpp) | ADD_LIBRARY(shm SHARED core.cpp) | ||||||
| ADD_EXECUTABLE(torch_shm_manager manager.cpp) | ADD_EXECUTABLE(torch_shm_manager manager.cpp) | ||||||
| INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) | INCLUDE_DIRECTORIES(${CMAKE_CURRENT_SOURCE_DIR}) | ||||||
|  | |||||||
| @ -232,11 +232,9 @@ def avg_pool1d(input, kernel_size, stride=None, padding=0, | |||||||
|  |  | ||||||
|     Example:: |     Example:: | ||||||
|         >>> # pool of square window of size=3, stride=2 |         >>> # pool of square window of size=3, stride=2 | ||||||
|         >>> input = torch.Tensor([[[1,2,3,4,5,6,7]]]) |         >>> input = torch.tensor([[[1,2,3,4,5,6,7]]]) | ||||||
|         >>> F.avg_pool1d(input, kernel_size=3, stride=2) |         >>> F.avg_pool1d(input, kernel_size=3, stride=2) | ||||||
|         (0 ,.,.) = |         tensor([[[ 2.,  4.,  6.]]]) | ||||||
|           2  4  6 |  | ||||||
|         [torch.FloatTensor of size (1,1,3)] |  | ||||||
|     """ |     """ | ||||||
|     if input.dim() != 3: |     if input.dim() != 3: | ||||||
|         raise ValueError('expected 3D input (got {} dimensions)' |         raise ValueError('expected 3D input (got {} dimensions)' | ||||||
| @ -1038,38 +1036,30 @@ def embedding(input, weight, padding_idx=None, max_norm=None, norm_type=2, | |||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> # a batch of 2 samples of 4 indices each |         >>> # a batch of 2 samples of 4 indices each | ||||||
|         >>> input = torch.LongTensor([[1,2,4,5],[4,3,2,9]]) |         >>> input = torch.tensor([[1,2,4,5],[4,3,2,9]]) | ||||||
|         >>> # an embedding matrix containing 10 tensors of size 3 |         >>> # an embedding matrix containing 10 tensors of size 3 | ||||||
|         >>> embedding_matrix = torch.rand(10, 3) |         >>> embedding_matrix = torch.rand(10, 3) | ||||||
|         >>> F.embedding(input, embedding_matrix) |         >>> F.embedding(input, embedding_matrix) | ||||||
|  |         tensor([[[ 0.8490,  0.9625,  0.6753], | ||||||
|  |                  [ 0.9666,  0.7761,  0.6108], | ||||||
|  |                  [ 0.6246,  0.9751,  0.3618], | ||||||
|  |                  [ 0.4161,  0.2419,  0.7383]], | ||||||
|  |  | ||||||
|         (0 ,.,.) = |                 [[ 0.6246,  0.9751,  0.3618], | ||||||
|          -1.0822  1.2522  0.2434 |                  [ 0.0237,  0.7794,  0.0528], | ||||||
|           0.8393 -0.6062 -0.3348 |                  [ 0.9666,  0.7761,  0.6108], | ||||||
|           0.6597  0.0350  0.0837 |                  [ 0.3385,  0.8612,  0.1867]]]) | ||||||
|           0.5521  0.9447  0.0498 |  | ||||||
|  |  | ||||||
|         (1 ,.,.) = |  | ||||||
|           0.6597  0.0350  0.0837 |  | ||||||
|          -0.1527  0.0877  0.4260 |  | ||||||
|           0.8393 -0.6062 -0.3348 |  | ||||||
|          -0.8738 -0.9054  0.4281 |  | ||||||
|         [torch.FloatTensor of size (2,4,3)] |  | ||||||
|  |  | ||||||
|         >>> # example with padding_idx |         >>> # example with padding_idx | ||||||
|         >>> weights = torch.rand(10, 3) |         >>> weights = torch.rand(10, 3) | ||||||
|         >>> weights[0, :].zero_() |         >>> weights[0, :].zero_() | ||||||
|         >>> embedding_matrix = weights |         >>> embedding_matrix = weights | ||||||
|         >>> input = torch.LongTensor([[0,2,0,5]]) |         >>> input = torch.tensor([[0,2,0,5]]) | ||||||
|         >>> F.embedding(input, embedding_matrix, padding_idx=0) |         >>> F.embedding(input, embedding_matrix, padding_idx=0) | ||||||
|  |         tensor([[[ 0.0000,  0.0000,  0.0000], | ||||||
|         (0 ,.,.) = |                  [ 0.5609,  0.5384,  0.8720], | ||||||
|           0.0000  0.0000  0.0000 |                  [ 0.0000,  0.0000,  0.0000], | ||||||
|           0.3452  0.4937 -0.9361 |                  [ 0.6262,  0.2438,  0.7471]]]) | ||||||
|           0.0000  0.0000  0.0000 |  | ||||||
|           0.0706 -2.1962 -0.6276 |  | ||||||
|         [torch.FloatTensor of size (1,4,3)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     input = input.contiguous() |     input = input.contiguous() | ||||||
|     if padding_idx is not None: |     if padding_idx is not None: | ||||||
| @ -1133,14 +1123,11 @@ def embedding_bag(embedding_matrix, indices, offsets=None, | |||||||
|             >>> # an Embedding module containing 10 tensors of size 3 |             >>> # an Embedding module containing 10 tensors of size 3 | ||||||
|             >>> embedding_matrix = torch.rand(10, 3) |             >>> embedding_matrix = torch.rand(10, 3) | ||||||
|             >>> # a batch of 2 samples of 4 indices each |             >>> # a batch of 2 samples of 4 indices each | ||||||
|             >>> input = torch.LongTensor([1,2,4,5,4,3,2,9]) |             >>> input = torch.tensor([1,2,4,5,4,3,2,9]) | ||||||
|             >>> offsets = torch.LongTensor([0,4]) |             >>> offsets = torch.tensor([0,4]) | ||||||
|             >>> embedding_bag(embedding_matrix, input, offsets) |             >>> F.embedding_bag(embedding_matrix, input, offsets) | ||||||
|  |             tensor([[ 0.3397,  0.3552,  0.5545], | ||||||
|             -1.1840 -0.2547 -0.5860 |                     [ 0.5893,  0.4386,  0.5882]]) | ||||||
|             -0.7126  0.0002 -0.3411 |  | ||||||
|             [torch.FloatTensor of size (2,3)] |  | ||||||
|  |  | ||||||
|         """ |         """ | ||||||
|     if indices.dim() == 2: |     if indices.dim() == 2: | ||||||
|         if offsets is not None: |         if offsets is not None: | ||||||
| @ -1328,9 +1315,9 @@ def nll_loss(input, target, weight=None, size_average=True, ignore_index=-100, r | |||||||
|     Example:: |     Example:: | ||||||
|  |  | ||||||
|         >>> # input is of size N x C = 3 x 5 |         >>> # input is of size N x C = 3 x 5 | ||||||
|         >>> input = torch.randn(3, 5) |         >>> input = torch.randn(3, 5, requires_grad=True) | ||||||
|         >>> # each element in target has to have 0 <= value < C |         >>> # each element in target has to have 0 <= value < C | ||||||
|         >>> target = torch.LongTensor([1, 0, 4]) |         >>> target = torch.tensor([1, 0, 4]) | ||||||
|         >>> output = F.nll_loss(F.log_softmax(input), target) |         >>> output = F.nll_loss(F.log_softmax(input), target) | ||||||
|         >>> output.backward() |         >>> output.backward() | ||||||
|     """ |     """ | ||||||
| @ -1448,7 +1435,7 @@ def cross_entropy(input, target, weight=None, size_average=True, ignore_index=-1 | |||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> input = torch.randn(3, 5, requires_grad=True) |         >>> input = torch.randn(3, 5, requires_grad=True) | ||||||
|         >>> target = torch.LongTensor(3).random_(5) |         >>> target = torch.randint(5, (3,), dtype=torch.int64) | ||||||
|         >>> loss = F.cross_entropy(input, target) |         >>> loss = F.cross_entropy(input, target) | ||||||
|         >>> loss.backward() |         >>> loss.backward() | ||||||
|     """ |     """ | ||||||
| @ -1477,8 +1464,8 @@ def binary_cross_entropy(input, target, weight=None, size_average=True, reduce=T | |||||||
|  |  | ||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> input = torch.randn(3, requires_grad=True) |         >>> input = torch.randn((3, 2), requires_grad=True) | ||||||
|         >>> target = torch.LongTensor(3).random_(2) |         >>> target = torch.rand((3, 2), requires_grad=False) | ||||||
|         >>> loss = F.binary_cross_entropy(F.sigmoid(input), target) |         >>> loss = F.binary_cross_entropy(F.sigmoid(input), target) | ||||||
|         >>> loss.backward() |         >>> loss.backward() | ||||||
|     """ |     """ | ||||||
| @ -1519,7 +1506,7 @@ def binary_cross_entropy_with_logits(input, target, weight=None, size_average=Tr | |||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|          >>> input = torch.randn(3, requires_grad=True) |          >>> input = torch.randn(3, requires_grad=True) | ||||||
|          >>> target = torch.FloatTensor(3).random_(2) |          >>> target = torch.empty(3).random_(2) | ||||||
|          >>> loss = F.binary_cross_entropy_with_logits(input, target) |          >>> loss = F.binary_cross_entropy_with_logits(input, target) | ||||||
|          >>> loss.backward() |          >>> loss.backward() | ||||||
|     """ |     """ | ||||||
| @ -1657,7 +1644,7 @@ def pixel_shuffle(input, upscale_factor): | |||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> ps = nn.PixelShuffle(3) |         >>> ps = nn.PixelShuffle(3) | ||||||
|         >>> input = torch.Tensor(1, 9, 4, 4) |         >>> input = torch.empty(1, 9, 4, 4) | ||||||
|         >>> output = ps(input) |         >>> output = ps(input) | ||||||
|         >>> print(output.size()) |         >>> print(output.size()) | ||||||
|         torch.Size([1, 1, 12, 12]) |         torch.Size([1, 1, 12, 12]) | ||||||
| @ -1920,7 +1907,7 @@ def pad(input, pad, mode='constant', value=0): | |||||||
|  |  | ||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> t4d = torch.Tensor(3, 3, 4, 2) |         >>> t4d = torch.empty(3, 3, 4, 2) | ||||||
|         >>> p1d = (1, 1) # pad last dim by 1 on each side |         >>> p1d = (1, 1) # pad last dim by 1 on each side | ||||||
|         >>> out = F.pad(t4d, p1d, "constant", 0)  # effectively zero padding |         >>> out = F.pad(t4d, p1d, "constant", 0)  # effectively zero padding | ||||||
|         >>> print(out.data.size()) |         >>> print(out.data.size()) | ||||||
| @ -1929,7 +1916,7 @@ def pad(input, pad, mode='constant', value=0): | |||||||
|         >>> out = F.pad(t4d, p2d, "constant", 0) |         >>> out = F.pad(t4d, p2d, "constant", 0) | ||||||
|         >>> print(out.data.size()) |         >>> print(out.data.size()) | ||||||
|         torch.Size([3, 3, 8, 4]) |         torch.Size([3, 3, 8, 4]) | ||||||
|         >>> t4d = torch.Tensor(3, 3, 4, 2) |         >>> t4d = torch.empty(3, 3, 4, 2) | ||||||
|         >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3) |         >>> p3d = (0, 1, 2, 1, 3, 3) # pad by (0, 1), (2, 1), and (3, 3) | ||||||
|         >>> out = F.pad(t4d, p3d, "constant", 0) |         >>> out = F.pad(t4d, p3d, "constant", 0) | ||||||
|         >>> print(out.data.size()) |         >>> print(out.data.size()) | ||||||
|  | |||||||
| @ -57,7 +57,7 @@ def uniform_(tensor, a=0, b=1): | |||||||
|         b: the upper bound of the uniform distribution |         b: the upper bound of the uniform distribution | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.uniform_(w) |         >>> nn.init.uniform_(w) | ||||||
|     """ |     """ | ||||||
|     with torch.no_grad(): |     with torch.no_grad(): | ||||||
| @ -74,7 +74,7 @@ def normal_(tensor, mean=0, std=1): | |||||||
|         std: the standard deviation of the normal distribution |         std: the standard deviation of the normal distribution | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.normal_(w) |         >>> nn.init.normal_(w) | ||||||
|     """ |     """ | ||||||
|     with torch.no_grad(): |     with torch.no_grad(): | ||||||
| @ -89,7 +89,7 @@ def constant_(tensor, val): | |||||||
|         val: the value to fill the tensor with |         val: the value to fill the tensor with | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.constant_(w, 0.3) |         >>> nn.init.constant_(w, 0.3) | ||||||
|     """ |     """ | ||||||
|     with torch.no_grad(): |     with torch.no_grad(): | ||||||
| @ -105,7 +105,7 @@ def eye_(tensor): | |||||||
|         tensor: a 2-dimensional `torch.Tensor` |         tensor: a 2-dimensional `torch.Tensor` | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.eye_(w) |         >>> nn.init.eye_(w) | ||||||
|     """ |     """ | ||||||
|     if tensor.ndimension() != 2: |     if tensor.ndimension() != 2: | ||||||
| @ -125,7 +125,7 @@ def dirac_(tensor): | |||||||
|         tensor: a {3, 4, 5}-dimensional `torch.Tensor` |         tensor: a {3, 4, 5}-dimensional `torch.Tensor` | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 16, 5, 5) |         >>> w = torch.empty(3, 16, 5, 5) | ||||||
|         >>> nn.init.dirac_(w) |         >>> nn.init.dirac_(w) | ||||||
|     """ |     """ | ||||||
|     dimensions = tensor.ndimension() |     dimensions = tensor.ndimension() | ||||||
| @ -184,7 +184,7 @@ def xavier_uniform_(tensor, gain=1): | |||||||
|         gain: an optional scaling factor |         gain: an optional scaling factor | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu')) |         >>> nn.init.xavier_uniform_(w, gain=nn.init.calculate_gain('relu')) | ||||||
|     """ |     """ | ||||||
|     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) | ||||||
| @ -211,7 +211,7 @@ def xavier_normal_(tensor, gain=1): | |||||||
|         gain: an optional scaling factor |         gain: an optional scaling factor | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.xavier_normal_(w) |         >>> nn.init.xavier_normal_(w) | ||||||
|     """ |     """ | ||||||
|     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) |     fan_in, fan_out = _calculate_fan_in_and_fan_out(tensor) | ||||||
| @ -254,7 +254,7 @@ def kaiming_uniform_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'): | |||||||
|             recommended to use only with 'relu' or 'leaky_relu' (default). |             recommended to use only with 'relu' or 'leaky_relu' (default). | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu') |         >>> nn.init.kaiming_uniform_(w, mode='fan_in', nonlinearity='relu') | ||||||
|     """ |     """ | ||||||
|     fan = _calculate_correct_fan(tensor, mode) |     fan = _calculate_correct_fan(tensor, mode) | ||||||
| @ -289,7 +289,7 @@ def kaiming_normal_(tensor, a=0, mode='fan_in', nonlinearity='leaky_relu'): | |||||||
|             recommended to use only with 'relu' or 'leaky_relu' (default). |             recommended to use only with 'relu' or 'leaky_relu' (default). | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu') |         >>> nn.init.kaiming_normal_(w, mode='fan_out', nonlinearity='relu') | ||||||
|     """ |     """ | ||||||
|     fan = _calculate_correct_fan(tensor, mode) |     fan = _calculate_correct_fan(tensor, mode) | ||||||
| @ -311,7 +311,7 @@ def orthogonal_(tensor, gain=1): | |||||||
|         gain: optional scaling factor |         gain: optional scaling factor | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.orthogonal_(w) |         >>> nn.init.orthogonal_(w) | ||||||
|     """ |     """ | ||||||
|     if tensor.ndimension() < 2: |     if tensor.ndimension() < 2: | ||||||
| @ -353,7 +353,7 @@ def sparse_(tensor, sparsity, std=0.01): | |||||||
|             the non-zero values |             the non-zero values | ||||||
|  |  | ||||||
|     Examples: |     Examples: | ||||||
|         >>> w = torch.Tensor(3, 5) |         >>> w = torch.empty(3, 5) | ||||||
|         >>> nn.init.sparse_(w, sparsity=0.1) |         >>> nn.init.sparse_(w, sparsity=0.1) | ||||||
|     """ |     """ | ||||||
|     if tensor.ndimension() != 2: |     if tensor.ndimension() != 2: | ||||||
|  | |||||||
| @ -166,7 +166,7 @@ class NLLLoss(_WeightedLoss): | |||||||
|         >>> # input is of size N x C = 3 x 5 |         >>> # input is of size N x C = 3 x 5 | ||||||
|         >>> input = torch.randn(3, 5, requires_grad=True) |         >>> input = torch.randn(3, 5, requires_grad=True) | ||||||
|         >>> # each element in target has to have 0 <= value < C |         >>> # each element in target has to have 0 <= value < C | ||||||
|         >>> target = torch.LongTensor([1, 0, 4]) |         >>> target = torch.tensor([1, 0, 4]) | ||||||
|         >>> output = loss(m(input), target) |         >>> output = loss(m(input), target) | ||||||
|         >>> output.backward() |         >>> output.backward() | ||||||
|         >>> |         >>> | ||||||
| @ -178,7 +178,7 @@ class NLLLoss(_WeightedLoss): | |||||||
|         >>> data = torch.randn(N, 16, 10, 10) |         >>> data = torch.randn(N, 16, 10, 10) | ||||||
|         >>> m = nn.Conv2d(16, C, (3, 3)) |         >>> m = nn.Conv2d(16, C, (3, 3)) | ||||||
|         >>> # each element in target has to have 0 <= value < C |         >>> # each element in target has to have 0 <= value < C | ||||||
|         >>> target = torch.LongTensor(N, 8, 8).random_(0, C) |         >>> target = torch.tensor(N, 8, 8).random_(0, C) | ||||||
|         >>> output = loss(m(data), target) |         >>> output = loss(m(data), target) | ||||||
|         >>> output.backward() |         >>> output.backward() | ||||||
|     """ |     """ | ||||||
| @ -419,7 +419,7 @@ class BCELoss(_WeightedLoss): | |||||||
|         >>> m = nn.Sigmoid() |         >>> m = nn.Sigmoid() | ||||||
|         >>> loss = nn.BCELoss() |         >>> loss = nn.BCELoss() | ||||||
|         >>> input = torch.randn(3, requires_grad=True) |         >>> input = torch.randn(3, requires_grad=True) | ||||||
|         >>> target = torch.FloatTensor(3).random_(2) |         >>> target = torch.empty(3).random_(2) | ||||||
|         >>> output = loss(m(input), target) |         >>> output = loss(m(input), target) | ||||||
|         >>> output.backward() |         >>> output.backward() | ||||||
|     """ |     """ | ||||||
| @ -480,7 +480,7 @@ class BCEWithLogitsLoss(_Loss): | |||||||
|  |  | ||||||
|         >>> loss = nn.BCEWithLogitsLoss() |         >>> loss = nn.BCEWithLogitsLoss() | ||||||
|         >>> input = torch.randn(3, requires_grad=True) |         >>> input = torch.randn(3, requires_grad=True) | ||||||
|         >>> target = torch.FloatTensor(3).random_(2) |         >>> target = torch.empty(3).random_(2) | ||||||
|         >>> output = loss(input, target) |         >>> output = loss(input, target) | ||||||
|         >>> output.backward() |         >>> output.backward() | ||||||
|     """ |     """ | ||||||
| @ -744,7 +744,7 @@ class CrossEntropyLoss(_WeightedLoss): | |||||||
|  |  | ||||||
|         >>> loss = nn.CrossEntropyLoss() |         >>> loss = nn.CrossEntropyLoss() | ||||||
|         >>> input = torch.randn(3, 5, requires_grad=True) |         >>> input = torch.randn(3, 5, requires_grad=True) | ||||||
|         >>> target = torch.LongTensor(3).random_(5) |         >>> target = torch.empty(3, dtype=torch.long).random_(5) | ||||||
|         >>> output = loss(input, target) |         >>> output = loss(input, target) | ||||||
|         >>> output.backward() |         >>> output.backward() | ||||||
|     """ |     """ | ||||||
|  | |||||||
| @ -211,17 +211,13 @@ class Module(object): | |||||||
|             >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2)) |             >>> net = nn.Sequential(nn.Linear(2, 2), nn.Linear(2, 2)) | ||||||
|             >>> net.apply(init_weights) |             >>> net.apply(init_weights) | ||||||
|             Linear(in_features=2, out_features=2, bias=True) |             Linear(in_features=2, out_features=2, bias=True) | ||||||
|  |             Parameter containing: | ||||||
|              1  1 |             tensor([[ 1.,  1.], | ||||||
|              1  1 |                     [ 1.,  1.]]) | ||||||
|             [torch.FloatTensor of size (2,2)] |  | ||||||
|  |  | ||||||
|             Linear(in_features=2, out_features=2, bias=True) |             Linear(in_features=2, out_features=2, bias=True) | ||||||
|  |             Parameter containing: | ||||||
|              1  1 |             tensor([[ 1.,  1.], | ||||||
|              1  1 |                     [ 1.,  1.]]) | ||||||
|             [torch.FloatTensor of size (2,2)] |  | ||||||
|  |  | ||||||
|             Sequential( |             Sequential( | ||||||
|               (0): Linear(in_features=2, out_features=2, bias=True) |               (0): Linear(in_features=2, out_features=2, bias=True) | ||||||
|               (1): Linear(in_features=2, out_features=2, bias=True) |               (1): Linear(in_features=2, out_features=2, bias=True) | ||||||
| @ -230,7 +226,6 @@ class Module(object): | |||||||
|               (0): Linear(in_features=2, out_features=2, bias=True) |               (0): Linear(in_features=2, out_features=2, bias=True) | ||||||
|               (1): Linear(in_features=2, out_features=2, bias=True) |               (1): Linear(in_features=2, out_features=2, bias=True) | ||||||
|             ) |             ) | ||||||
|  |  | ||||||
|         """ |         """ | ||||||
|         for module in self.children(): |         for module in self.children(): | ||||||
|             module.apply(fn) |             module.apply(fn) | ||||||
|  | |||||||
| @ -23,7 +23,7 @@ class PixelShuffle(Module): | |||||||
|     Examples:: |     Examples:: | ||||||
|  |  | ||||||
|         >>> ps = nn.PixelShuffle(3) |         >>> ps = nn.PixelShuffle(3) | ||||||
|         >>> input = torch.Tensor(1, 9, 4, 4) |         >>> input = torch.tensor(1, 9, 4, 4) | ||||||
|         >>> output = ps(input) |         >>> output = ps(input) | ||||||
|         >>> print(output.size()) |         >>> print(output.size()) | ||||||
|         torch.Size([1, 1, 12, 12]) |         torch.Size([1, 1, 12, 12]) | ||||||
|  | |||||||
| @ -256,29 +256,19 @@ class MaxUnpool1d(_MaxUnpoolNd): | |||||||
|  |  | ||||||
|         >>> pool = nn.MaxPool1d(2, stride=2, return_indices=True) |         >>> pool = nn.MaxPool1d(2, stride=2, return_indices=True) | ||||||
|         >>> unpool = nn.MaxUnpool1d(2, stride=2) |         >>> unpool = nn.MaxUnpool1d(2, stride=2) | ||||||
|         >>> input = torch.Tensor([[[1, 2, 3, 4, 5, 6, 7, 8]]]) |         >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8]]]) | ||||||
|         >>> output, indices = pool(input) |         >>> output, indices = pool(input) | ||||||
|         >>> unpool(output, indices) |         >>> unpool(output, indices) | ||||||
|  |         tensor([[[ 0.,  2.,  0.,  4.,  0.,  6.,  0., 8.]]]) | ||||||
|         (0 ,.,.) = |  | ||||||
|            0   2   0   4   0   6   0   8 |  | ||||||
|         [torch.FloatTensor of size (1,1,8)] |  | ||||||
|  |  | ||||||
|         >>> # Example showcasing the use of output_size |         >>> # Example showcasing the use of output_size | ||||||
|         >>> input = torch.Tensor([[[1, 2, 3, 4, 5, 6, 7, 8, 9]]]) |         >>> input = torch.tensor([[[1., 2, 3, 4, 5, 6, 7, 8, 9]]]) | ||||||
|         >>> output, indices = pool(input) |         >>> output, indices = pool(input) | ||||||
|         >>> unpool(output, indices, output_size=input.size()) |         >>> unpool(output, indices, output_size=input.size()) | ||||||
|  |         tensor([[[ 0.,  2.,  0.,  4.,  0.,  6.,  0., 8.,  0.]]]) | ||||||
|         (0 ,.,.) = |  | ||||||
|            0   2   0   4   0   6   0   8   0 |  | ||||||
|         [torch.FloatTensor of size (1,1,9)] |  | ||||||
|  |  | ||||||
|         >>> unpool(output, indices) |         >>> unpool(output, indices) | ||||||
|  |         tensor([[[ 0.,  2.,  0.,  4.,  0.,  6.,  0., 8.]]]) | ||||||
|         (0 ,.,.) = |  | ||||||
|            0   2   0   4   0   6   0   8 |  | ||||||
|         [torch.FloatTensor of size (1,1,8)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, kernel_size, stride=None, padding=0): |     def __init__(self, kernel_size, stride=None, padding=0): | ||||||
| @ -333,31 +323,24 @@ class MaxUnpool2d(_MaxUnpoolNd): | |||||||
|  |  | ||||||
|         >>> pool = nn.MaxPool2d(2, stride=2, return_indices=True) |         >>> pool = nn.MaxPool2d(2, stride=2, return_indices=True) | ||||||
|         >>> unpool = nn.MaxUnpool2d(2, stride=2) |         >>> unpool = nn.MaxUnpool2d(2, stride=2) | ||||||
|         >>> input = torch.Tensor([[[[ 1,  2,  3,  4], |         >>> input = torch.tensor([[[[ 1.,  2,  3,  4], | ||||||
|                                     [ 5,  6,  7,  8], |                                     [ 5,  6,  7,  8], | ||||||
|                                     [ 9, 10, 11, 12], |                                     [ 9, 10, 11, 12], | ||||||
|                                     [13, 14, 15, 16]]]]) |                                     [13, 14, 15, 16]]]]) | ||||||
|         >>> output, indices = pool(input) |         >>> output, indices = pool(input) | ||||||
|         >>> unpool(output, indices) |         >>> unpool(output, indices) | ||||||
|  |         tensor([[[[  0.,   0.,   0.,   0.], | ||||||
|         (0 ,0 ,.,.) = |                   [  0.,   6.,   0.,   8.], | ||||||
|            0   0   0   0 |                   [  0.,   0.,   0.,   0.], | ||||||
|            0   6   0   8 |                   [  0.,  14.,   0.,  16.]]]]) | ||||||
|            0   0   0   0 |  | ||||||
|            0  14   0  16 |  | ||||||
|         [torch.FloatTensor of size (1,1,4,4)] |  | ||||||
|  |  | ||||||
|         >>> # specify a different output size than input size |         >>> # specify a different output size than input size | ||||||
|         >>> unpool(output, indices, output_size=torch.Size([1, 1, 5, 5])) |         >>> unpool(output, indices, output_size=torch.Size([1, 1, 5, 5])) | ||||||
|  |         tensor([[[[  0.,   0.,   0.,   0.,   0.], | ||||||
|         (0 ,0 ,.,.) = |                   [  6.,   0.,   8.,   0.,   0.], | ||||||
|            0   0   0   0   0 |                   [  0.,   0.,   0.,  14.,   0.], | ||||||
|            6   0   8   0   0 |                   [ 16.,   0.,   0.,   0.,   0.], | ||||||
|            0   0   0  14   0 |                   [  0.,   0.,   0.,   0.,   0.]]]]) | ||||||
|           16   0   0   0   0 |  | ||||||
|            0   0   0   0   0 |  | ||||||
|         [torch.FloatTensor of size (1,1,5,5)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, kernel_size, stride=None, padding=0): |     def __init__(self, kernel_size, stride=None, padding=0): | ||||||
| @ -479,11 +462,8 @@ class AvgPool1d(_AvgPoolNd): | |||||||
|  |  | ||||||
|         >>> # pool with window of size=3, stride=2 |         >>> # pool with window of size=3, stride=2 | ||||||
|         >>> m = nn.AvgPool1d(3, stride=2) |         >>> m = nn.AvgPool1d(3, stride=2) | ||||||
|         >>> m(torch.Tensor([[[1,2,3,4,5,6,7]]])) |         >>> m(torch.tensor([[[1.,2,3,4,5,6,7]]])) | ||||||
|  |         tensor([[[ 2.,  4.,  6.]]]) | ||||||
|         (0 ,.,.) = |  | ||||||
|           2  4  6 |  | ||||||
|         [torch.FloatTensor of size (1,1,3)] |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, |     def __init__(self, kernel_size, stride=None, padding=0, ceil_mode=False, | ||||||
|  | |||||||
| @ -51,32 +51,25 @@ class Embedding(Module): | |||||||
|         >>> # a batch of 2 samples of 4 indices each |         >>> # a batch of 2 samples of 4 indices each | ||||||
|         >>> input = torch.LongTensor([[1,2,4,5],[4,3,2,9]]) |         >>> input = torch.LongTensor([[1,2,4,5],[4,3,2,9]]) | ||||||
|         >>> embedding(input) |         >>> embedding(input) | ||||||
|  |         tensor([[[-0.0251, -1.6902,  0.7172], | ||||||
|  |                  [-0.6431,  0.0748,  0.6969], | ||||||
|  |                  [ 1.4970,  1.3448, -0.9685], | ||||||
|  |                  [-0.3677, -2.7265, -0.1685]], | ||||||
|  |  | ||||||
|         (0 ,.,.) = |                 [[ 1.4970,  1.3448, -0.9685], | ||||||
|          -1.0822  1.2522  0.2434 |                  [ 0.4362, -0.4004,  0.9400], | ||||||
|           0.8393 -0.6062 -0.3348 |                  [-0.6431,  0.0748,  0.6969], | ||||||
|           0.6597  0.0350  0.0837 |                  [ 0.9124, -2.3616,  1.1151]]]) | ||||||
|           0.5521  0.9447  0.0498 |  | ||||||
|  |  | ||||||
|         (1 ,.,.) = |  | ||||||
|           0.6597  0.0350  0.0837 |  | ||||||
|          -0.1527  0.0877  0.4260 |  | ||||||
|           0.8393 -0.6062 -0.3348 |  | ||||||
|          -0.8738 -0.9054  0.4281 |  | ||||||
|         [torch.FloatTensor of size (2,4,3)] |  | ||||||
|  |  | ||||||
|         >>> # example with padding_idx |         >>> # example with padding_idx | ||||||
|         >>> embedding = nn.Embedding(10, 3, padding_idx=0) |         >>> embedding = nn.Embedding(10, 3, padding_idx=0) | ||||||
|         >>> input = torch.LongTensor([[0,2,0,5]]) |         >>> input = torch.LongTensor([[0,2,0,5]]) | ||||||
|         >>> embedding(input) |         >>> embedding(input) | ||||||
|  |         tensor([[[ 0.0000,  0.0000,  0.0000], | ||||||
|         (0 ,.,.) = |                  [ 0.1535, -2.0309,  0.9315], | ||||||
|           0.0000  0.0000  0.0000 |                  [ 0.0000,  0.0000,  0.0000], | ||||||
|           0.3452  0.4937 -0.9361 |                  [-0.1655,  0.9897,  0.0635]]]) | ||||||
|           0.0000  0.0000  0.0000 |  | ||||||
|           0.0706 -2.1962 -0.6276 |  | ||||||
|         [torch.FloatTensor of size (1,4,3)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, num_embeddings, embedding_dim, padding_idx=None, |     def __init__(self, num_embeddings, embedding_dim, padding_idx=None, | ||||||
| @ -140,15 +133,13 @@ class Embedding(Module): | |||||||
|  |  | ||||||
|         Examples:: |         Examples:: | ||||||
|  |  | ||||||
|             >> # FloatTensor containing pretrained weights |             >>> # FloatTensor containing pretrained weights | ||||||
|             >> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]]) |             >>> weight = torch.FloatTensor([[1, 2.3, 3], [4, 5.1, 6.3]]) | ||||||
|             >> embedding = nn.Embedding.from_pretrained(weight) |             >>> embedding = nn.Embedding.from_pretrained(weight) | ||||||
|             >> # Get embeddings for index 1 |             >>> # Get embeddings for index 1 | ||||||
|             >> input = torch.LongTensor([1]) |             >>> input = torch.LongTensor([1]) | ||||||
|             >> embedding(input) |             >>> embedding(input) | ||||||
|  |             tensor([[ 4.0000,  5.1000,  6.3000]]) | ||||||
|              4.0000  5.1000  6.3000 |  | ||||||
|             [torch.FloatTensor of size (1,3)] |  | ||||||
|         """ |         """ | ||||||
|         assert embeddings.dim() == 2, \ |         assert embeddings.dim() == 2, \ | ||||||
|             'Embeddings parameter is expected to be 2-dimensional' |             'Embeddings parameter is expected to be 2-dimensional' | ||||||
| @ -215,11 +206,8 @@ class EmbeddingBag(Module): | |||||||
|         >>> input = torch.LongTensor([1,2,4,5,4,3,2,9]) |         >>> input = torch.LongTensor([1,2,4,5,4,3,2,9]) | ||||||
|         >>> offsets = torch.LongTensor([0,4]) |         >>> offsets = torch.LongTensor([0,4]) | ||||||
|         >>> embedding_sum(input, offsets) |         >>> embedding_sum(input, offsets) | ||||||
|  |         tensor([[-0.8861, -5.4350, -0.0523], | ||||||
|         -0.7296 -4.6926  0.3295 |                 [ 1.1306, -2.5798, -1.0044]]) | ||||||
|         -0.5186 -0.5631 -0.2792 |  | ||||||
|         [torch.FloatTensor of size (2,3)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, num_embeddings, embedding_dim, |     def __init__(self, num_embeddings, embedding_dim, | ||||||
|  | |||||||
| @ -52,80 +52,60 @@ class Upsample(Module): | |||||||
|  |  | ||||||
|         >>> input = torch.arange(1, 5).view(1, 1, 2, 2) |         >>> input = torch.arange(1, 5).view(1, 1, 2, 2) | ||||||
|         >>> input |         >>> input | ||||||
|  |         tensor([[[[ 1.,  2.], | ||||||
|         (0 ,0 ,.,.) = |                   [ 3.,  4.]]]]) | ||||||
|           1  2 |  | ||||||
|           3  4 |  | ||||||
|         [torch.FloatTensor of size (1,1,2,2)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.Upsample(scale_factor=2, mode='nearest') |         >>> m = nn.Upsample(scale_factor=2, mode='nearest') | ||||||
|         >>> m(input) |         >>> m(input) | ||||||
|  |         tensor([[[[ 1.,  1.,  2.,  2.], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.,  1.,  2.,  2.], | ||||||
|           1  1  2  2 |                   [ 3.,  3.,  4.,  4.], | ||||||
|           1  1  2  2 |                   [ 3.,  3.,  4.,  4.]]]]) | ||||||
|           3  3  4  4 |  | ||||||
|           3  3  4  4 |  | ||||||
|         [torch.FloatTensor of size (1,1,4,4)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False |         >>> m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False | ||||||
|         >>> m(input) |         >>> m(input) | ||||||
|  |         tensor([[[[ 1.0000,  1.2500,  1.7500,  2.0000], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.5000,  1.7500,  2.2500,  2.5000], | ||||||
|           1.0000  1.2500  1.7500  2.0000 |                   [ 2.5000,  2.7500,  3.2500,  3.5000], | ||||||
|           1.5000  1.7500  2.2500  2.5000 |                   [ 3.0000,  3.2500,  3.7500,  4.0000]]]]) | ||||||
|           2.5000  2.7500  3.2500  3.5000 |  | ||||||
|           3.0000  3.2500  3.7500  4.0000 |  | ||||||
|         [torch.FloatTensor of size (1,1,4,4)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) |         >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) | ||||||
|         >>> m(input) |         >>> m(input) | ||||||
|  |         tensor([[[[ 1.0000,  1.3333,  1.6667,  2.0000], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.6667,  2.0000,  2.3333,  2.6667], | ||||||
|           1.0000  1.3333  1.6667  2.0000 |                   [ 2.3333,  2.6667,  3.0000,  3.3333], | ||||||
|           1.6667  2.0000  2.3333  2.6667 |                   [ 3.0000,  3.3333,  3.6667,  4.0000]]]]) | ||||||
|           2.3333  2.6667  3.0000  3.3333 |  | ||||||
|           3.0000  3.3333  3.6667  4.0000 |  | ||||||
|         [torch.FloatTensor of size (1,1,4,4)] |  | ||||||
|  |  | ||||||
|         >>> # Try scaling the same data in a larger tensor |         >>> # Try scaling the same data in a larger tensor | ||||||
|         >>> |         >>> | ||||||
|         >>> input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3) |         >>> input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3) | ||||||
|         >>> input_3x3[:, :, :2, :2].copy_(input) |         >>> input_3x3[:, :, :2, :2].copy_(input) | ||||||
|  |         tensor([[[[ 1.,  2.], | ||||||
|  |                   [ 3.,  4.]]]]) | ||||||
|         >>> input_3x3 |         >>> input_3x3 | ||||||
|  |         tensor([[[[ 1.,  2.,  0.], | ||||||
|         (0 ,0 ,.,.) = |                   [ 3.,  4.,  0.], | ||||||
|           1  2  0 |                   [ 0.,  0.,  0.]]]]) | ||||||
|           3  4  0 |  | ||||||
|           0  0  0 |  | ||||||
|         [torch.FloatTensor of size (1,1,3,3)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False |         >>> m = nn.Upsample(scale_factor=2, mode='bilinear')  # align_corners=False | ||||||
|         >>> # Notice that values in top left corner are the same with the small input (except at boundary) |         >>> # Notice that values in top left corner are the same with the small input (except at boundary) | ||||||
|         >>> m(input_3x3) |         >>> m(input_3x3) | ||||||
|  |         tensor([[[[ 1.0000,  1.2500,  1.7500,  1.5000,  0.5000,  0.0000], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.5000,  1.7500,  2.2500,  1.8750,  0.6250,  0.0000], | ||||||
|           1.0000  1.2500  1.7500  1.5000  0.5000  0.0000 |                   [ 2.5000,  2.7500,  3.2500,  2.6250,  0.8750,  0.0000], | ||||||
|           1.5000  1.7500  2.2500  1.8750  0.6250  0.0000 |                   [ 2.2500,  2.4375,  2.8125,  2.2500,  0.7500,  0.0000], | ||||||
|           2.5000  2.7500  3.2500  2.6250  0.8750  0.0000 |                   [ 0.7500,  0.8125,  0.9375,  0.7500,  0.2500,  0.0000], | ||||||
|           2.2500  2.4375  2.8125  2.2500  0.7500  0.0000 |                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]]]) | ||||||
|           0.7500  0.8125  0.9375  0.7500  0.2500  0.0000 |  | ||||||
|           0.0000  0.0000  0.0000  0.0000  0.0000  0.0000 |  | ||||||
|         [torch.FloatTensor of size (1,1,6,6)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) |         >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) | ||||||
|         >>> # Notice that values in top left corner are now changed |         >>> # Notice that values in top left corner are now changed | ||||||
|         >>> m(input_3x3) |         >>> m(input_3x3) | ||||||
|  |         tensor([[[[ 1.0000,  1.4000,  1.8000,  1.6000,  0.8000,  0.0000], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.8000,  2.2000,  2.6000,  2.2400,  1.1200,  0.0000], | ||||||
|           1.0000  1.4000  1.8000  1.6000  0.8000  0.0000 |                   [ 2.6000,  3.0000,  3.4000,  2.8800,  1.4400,  0.0000], | ||||||
|           1.8000  2.2000  2.6000  2.2400  1.1200  0.0000 |                   [ 2.4000,  2.7200,  3.0400,  2.5600,  1.2800,  0.0000], | ||||||
|           2.6000  3.0000  3.4000  2.8800  1.4400  0.0000 |                   [ 1.2000,  1.3600,  1.5200,  1.2800,  0.6400,  0.0000], | ||||||
|           2.4000  2.7200  3.0400  2.5600  1.2800  0.0000 |                   [ 0.0000,  0.0000,  0.0000,  0.0000,  0.0000,  0.0000]]]]) | ||||||
|           1.2000  1.3600  1.5200  1.2800  0.6400  0.0000 |  | ||||||
|           0.0000  0.0000  0.0000  0.0000  0.0000  0.0000 |  | ||||||
|         [torch.FloatTensor of size (1,1,6,6)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|  |  | ||||||
|     def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=None): |     def __init__(self, size=None, scale_factor=None, mode='nearest', align_corners=None): | ||||||
| @ -176,22 +156,15 @@ class UpsamplingNearest2d(Upsample): | |||||||
|  |  | ||||||
|         >>> input = torch.arange(1, 5).view(1, 1, 2, 2) |         >>> input = torch.arange(1, 5).view(1, 1, 2, 2) | ||||||
|         >>> input |         >>> input | ||||||
|  |         tensor([[[[ 1.,  2.], | ||||||
|         (0 ,0 ,.,.) = |                   [ 3.,  4.]]]]) | ||||||
|           1  2 |  | ||||||
|           3  4 |  | ||||||
|         [torch.FloatTensor of size (1,1,2,2)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.UpsamplingNearest2d(scale_factor=2) |         >>> m = nn.UpsamplingNearest2d(scale_factor=2) | ||||||
|         >>> m(input) |         >>> m(input) | ||||||
|  |         tensor([[[[ 1.,  1.,  2.,  2.], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.,  1.,  2.,  2.], | ||||||
|           1  1  2  2 |                   [ 3.,  3.,  4.,  4.], | ||||||
|           1  1  2  2 |                   [ 3.,  3.,  4.,  4.]]]]) | ||||||
|           3  3  4  4 |  | ||||||
|           3  3  4  4 |  | ||||||
|         [torch.FloatTensor of size (1,1,4,4)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     def __init__(self, size=None, scale_factor=None): |     def __init__(self, size=None, scale_factor=None): | ||||||
|         super(UpsamplingNearest2d, self).__init__(size, scale_factor, mode='nearest') |         super(UpsamplingNearest2d, self).__init__(size, scale_factor, mode='nearest') | ||||||
| @ -231,22 +204,15 @@ class UpsamplingBilinear2d(Upsample): | |||||||
|  |  | ||||||
|         >>> input = torch.arange(1, 5).view(1, 1, 2, 2) |         >>> input = torch.arange(1, 5).view(1, 1, 2, 2) | ||||||
|         >>> input |         >>> input | ||||||
|  |         tensor([[[[ 1.,  2.], | ||||||
|         (0 ,0 ,.,.) = |                   [ 3.,  4.]]]]) | ||||||
|           1  2 |  | ||||||
|           3  4 |  | ||||||
|         [torch.FloatTensor of size (1,1,2,2)] |  | ||||||
|  |  | ||||||
|         >>> m = nn.UpsamplingBilinear2d(scale_factor=2) |         >>> m = nn.UpsamplingBilinear2d(scale_factor=2) | ||||||
|         >>> m(input) |         >>> m(input) | ||||||
|  |         tensor([[[[ 1.0000,  1.3333,  1.6667,  2.0000], | ||||||
|         (0 ,0 ,.,.) = |                   [ 1.6667,  2.0000,  2.3333,  2.6667], | ||||||
|           1.0000  1.3333  1.6667  2.0000 |                   [ 2.3333,  2.6667,  3.0000,  3.3333], | ||||||
|           1.6667  2.0000  2.3333  2.6667 |                   [ 3.0000,  3.3333,  3.6667,  4.0000]]]]) | ||||||
|           2.3333  2.6667  3.0000  3.3333 |  | ||||||
|           3.0000  3.3333  3.6667  4.0000 |  | ||||||
|         [torch.FloatTensor of size (1,1,4,4)] |  | ||||||
|  |  | ||||||
|     """ |     """ | ||||||
|     def __init__(self, size=None, scale_factor=None): |     def __init__(self, size=None, scale_factor=None): | ||||||
|         super(UpsamplingBilinear2d, self).__init__(size, scale_factor, mode='bilinear', align_corners=True) |         super(UpsamplingBilinear2d, self).__init__(size, scale_factor, mode='bilinear', align_corners=True) | ||||||
|  | |||||||
| @ -318,19 +318,11 @@ def pack_sequence(sequences): | |||||||
|  |  | ||||||
|     Example: |     Example: | ||||||
|         >>> from torch.nn.utils.rnn import pack_sequence |         >>> from torch.nn.utils.rnn import pack_sequence | ||||||
|         >>> a = torch.Tensor([1,2,3]) |         >>> a = torch.tensor([1,2,3]) | ||||||
|         >>> b = torch.Tensor([4,5]) |         >>> b = torch.tensor([4,5]) | ||||||
|         >>> c = torch.Tensor([6]) |         >>> c = torch.tensor([6]) | ||||||
|         >>> pack_sequence([a, b, c]]) |         >>> pack_sequence([a, b, c]]) | ||||||
|         PackedSequence(data= |         PackedSequence(data=tensor([ 1,  4,  6,  2,  5,  3]), batch_sizes=tensor([ 3,  2,  1])) | ||||||
|          1 |  | ||||||
|          4 |  | ||||||
|          6 |  | ||||||
|          2 |  | ||||||
|          5 |  | ||||||
|          3 |  | ||||||
|         [torch.FloatTensor of size 6] |  | ||||||
|         , batch_sizes=[3, 2, 1]) |  | ||||||
|  |  | ||||||
|  |  | ||||||
|     Arguments: |     Arguments: | ||||||
|  | |||||||
| @ -152,7 +152,7 @@ def save(obj, f, pickle_module=pickle, pickle_protocol=DEFAULT_PROTOCOL): | |||||||
|  |  | ||||||
|     Example: |     Example: | ||||||
|         >>> # Save to file |         >>> # Save to file | ||||||
|         >>> x = torch.Tensor([0, 1, 2, 3, 4]) |         >>> x = torch.tensor([0, 1, 2, 3, 4]) | ||||||
|         >>> torch.save(x, 'tensor.pt') |         >>> torch.save(x, 'tensor.pt') | ||||||
|         >>> # Save to io.BytesIO buffer |         >>> # Save to io.BytesIO buffer | ||||||
|         >>> buffer = io.BytesIO() |         >>> buffer = io.BytesIO() | ||||||
|  | |||||||
| @ -1,4 +1,5 @@ | |||||||
| import torch | import torch | ||||||
|  | import warnings | ||||||
|  |  | ||||||
|  |  | ||||||
| def detach_variable(inputs): | def detach_variable(inputs): | ||||||
| @ -14,10 +15,16 @@ def detach_variable(inputs): | |||||||
|             "Only tuple of tensors is supported. Got Unsupported input type: ", type(inputs).__name__) |             "Only tuple of tensors is supported. Got Unsupported input type: ", type(inputs).__name__) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | def check_backward_validity(inputs): | ||||||
|  |     if not any(inp.requires_grad for inp in inputs): | ||||||
|  |         warnings.warn("None of the inputs have requires_grad=True. Gradients will be None") | ||||||
|  |  | ||||||
|  |  | ||||||
| class CheckpointFunction(torch.autograd.Function): | class CheckpointFunction(torch.autograd.Function): | ||||||
|  |  | ||||||
|     @staticmethod |     @staticmethod | ||||||
|     def forward(ctx, run_function, *args): |     def forward(ctx, run_function, *args): | ||||||
|  |         check_backward_validity(args) | ||||||
|         ctx.run_function = run_function |         ctx.run_function = run_function | ||||||
|         ctx.save_for_backward(*args) |         ctx.save_for_backward(*args) | ||||||
|         with torch.no_grad(): |         with torch.no_grad(): | ||||||
| @ -66,6 +73,11 @@ def checkpoint(function, *args): | |||||||
|         checkpointed version won't be equivalent, and unfortunately it can't be |         checkpointed version won't be equivalent, and unfortunately it can't be | ||||||
|         detected. |         detected. | ||||||
|  |  | ||||||
|  |     .. warning: | ||||||
|  |         At least one of the inputs needs to have :code:`requires_grad=True` if | ||||||
|  |         grads are needed for model inputs, otherwise the checkpointed part of the | ||||||
|  |         model won't have gradients. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         function: describes what to run in the forward pass of the model or |         function: describes what to run in the forward pass of the model or | ||||||
|             part of the model. It should also know how to handle the inputs |             part of the model. It should also know how to handle the inputs | ||||||
| @ -96,6 +108,11 @@ def checkpoint_sequential(functions, segments, *inputs): | |||||||
|         Checkpointing doesn't work with :func:`torch.autograd.grad`, but only |         Checkpointing doesn't work with :func:`torch.autograd.grad`, but only | ||||||
|         with :func:`torch.autograd.backward`. |         with :func:`torch.autograd.backward`. | ||||||
|  |  | ||||||
|  |     .. warning: | ||||||
|  |         At least one of the inputs needs to have :code:`requires_grad=True` if | ||||||
|  |         grads are needed for model inputs, otherwise the checkpointed part of the | ||||||
|  |         model won't have gradients. | ||||||
|  |  | ||||||
|     Args: |     Args: | ||||||
|         functions: A :class:`torch.nn.Sequential` or the list of modules or |         functions: A :class:`torch.nn.Sequential` or the list of modules or | ||||||
|             functions (comprising the model) to run sequentially. |             functions (comprising the model) to run sequentially. | ||||||
|  | |||||||
		Reference in New Issue
	
	Block a user
	