mirror of
				https://github.com/pytorch/pytorch.git
				synced 2025-10-31 12:15:03 +08:00 
			
		
		
		
	Compare commits
	
		
			177 Commits
		
	
	
		
			ciflow/tru
			...
			v0.3.1
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 2b4748011b | |||
| 902d57be9f | |||
| db9a700cb7 | |||
| 94ba828f7b | |||
| 3d6242daba | |||
| e90699b862 | |||
| d515806e84 | |||
| 17f94d20aa | |||
| ac0b41e3ba | |||
| 29f897ea2f | |||
| bbdaf66534 | |||
| 404510e3d5 | |||
| da3c4cb84c | |||
| 30bd9e6462 | |||
| 4e549e94b7 | |||
| 00f9da74b3 | |||
| a5fec26622 | |||
| a33a75385c | |||
| 86fdc898ec | |||
| 548596b5f7 | |||
| 7972c7e290 | |||
| 395d5f9295 | |||
| 596133697c | |||
| 2bcc44a33d | |||
| 281b6ce41d | |||
| e757aaf07d | |||
| d9001ce861 | |||
| b68861d7bb | |||
| f9f113adf5 | |||
| 9e3bcf4bce | |||
| b4862f67db | |||
| f98c795b71 | |||
| 24a4881cb2 | |||
| 7427a88660 | |||
| 26f038a557 | |||
| 3321cdce84 | |||
| f31ac990f7 | |||
| f69c6e4f2f | |||
| 4cec94d8ba | |||
| d721743b03 | |||
| a52e9dd352 | |||
| 26751c5675 | |||
| 45b06257dc | |||
| 819e76fa11 | |||
| 3f2b57e8e8 | |||
| 6ff04fbcd4 | |||
| ab5b03e02b | |||
| 24cd54d064 | |||
| afd1ce006b | |||
| 1e86e9106e | |||
| 4630ce8999 | |||
| 315662554d | |||
| c0e270a142 | |||
| 9260184592 | |||
| 120e9014d3 | |||
| 343c15c2b1 | |||
| 85ea548cff | |||
| 8a9f570eb1 | |||
| 53d1b6318b | |||
| 9e18db974b | |||
| 84da898124 | |||
| 63d6afdc62 | |||
| 8c3e1b713a | |||
| 0185d5aac6 | |||
| faea900161 | |||
| 19b1ad8c30 | |||
| bd8b9986ec | |||
| 3d3bddb953 | |||
| d7bd3b9acf | |||
| 7763c6f871 | |||
| 98879d58e3 | |||
| e98af60a7d | |||
| 6338da9c19 | |||
| b09e7a6788 | |||
| a9a76c6e75 | |||
| 1a5a28dc34 | |||
| 9fdc8644f1 | |||
| a120a008f8 | |||
| ab8b632d8c | |||
| e5920a1083 | |||
| 6d1bccceec | |||
| 82e39d1231 | |||
| 07f0364304 | |||
| 24e2ccfc07 | |||
| 4797f98158 | |||
| 840760c29f | |||
| ee24a054fe | |||
| f3519fd5f7 | |||
| f816029a72 | |||
| d27c3ce79c | |||
| 280bf0979d | |||
| d880a52091 | |||
| aae0ce4f05 | |||
| 47d35d2dea | |||
| f8b5ce1541 | |||
| 7f42c74f0f | |||
| af3964a872 | |||
| 1645546aa9 | |||
| 350fad8a22 | |||
| 565d183042 | |||
| 2ebda372f6 | |||
| 28b846c486 | |||
| 9622eaa6fa | |||
| db8154df32 | |||
| b6eeea343d | |||
| 1fe9991554 | |||
| 00118024f3 | |||
| 87edf5a349 | |||
| 20972878cc | |||
| 0d1128d25c | |||
| 81dc60493d | |||
| b18df1cedf | |||
| 3976d77509 | |||
| 09c83673bf | |||
| 5b9a8f918e | |||
| f20fb2c1a1 | |||
| 4e00120117 | |||
| 2b3f35daea | |||
| c580437342 | |||
| 455e788fe6 | |||
| c980fb359b | |||
| bae45bb106 | |||
| 34557d80f4 | |||
| 1e77879b2a | |||
| ff52d424b2 | |||
| 4b7aa13b30 | |||
| e1f2d0916e | |||
| 4b5b7e53f6 | |||
| db66fa9436 | |||
| 392c89ab6a | |||
| cddf501fc5 | |||
| d0907d2c34 | |||
| 448a85a8e0 | |||
| ea3138fd09 | |||
| b89c96fe58 | |||
| 088f47bb89 | |||
| ddb3804f87 | |||
| a896311d06 | |||
| 937b634b5d | |||
| 004dfdc7cc | |||
| f8aa5e2ed7 | |||
| 8a49309f81 | |||
| 14de24d89c | |||
| c7cccc250e | |||
| 1f694e9a6e | |||
| 1108bced80 | |||
| c36d452224 | |||
| 11955b86d2 | |||
| 9a6788202b | |||
| d58bad4073 | |||
| f95e252984 | |||
| b49f0f8154 | |||
| 269c25267b | |||
| fde471ee2a | |||
| eb24d2ff6e | |||
| f768068c3b | |||
| c456451915 | |||
| f282d1dc7c | |||
| 2a3cae0f3e | |||
| 3d9630abc2 | |||
| da7a5147db | |||
| 5df8e582cd | |||
| 5dff261598 | |||
| aa0c8920af | |||
| a3b658bf3b | |||
| 94e89f3911 | |||
| f0956ad9ec | |||
| 452ea78f43 | |||
| 3d5d66868e | |||
| cf373e25e2 | |||
| 91d764c781 | |||
| 524235bb71 | |||
| e035fa028b | |||
| 58a928c3b9 | |||
| 4f1eefa8ad | |||
| 4251c151e3 | |||
| c0931a3a4d | 
| @ -202,9 +202,9 @@ MACOSX_DEPLOYMENT_TARGET=10.9 CC=clang CXX=clang++ python setup.py install | ||||
| Dockerfile is supplied to build images with cuda support and cudnn v6. Build as usual | ||||
| ``` | ||||
| docker build -t pytorch . | ||||
|  | ||||
| ``` | ||||
| Dockerfile to build with cuda 9 and cudnn v7 (with Volta support) is in tools/docker, the build command is | ||||
|  | ||||
| ``` | ||||
| docker build -t pytorch_cuda9 -f tools/docker/Dockerfile9 . | ||||
| ``` | ||||
| Alternatively, if you want to use a runtime image, you can use the pre-built one from Docker Hub and run with nvidia-docker: | ||||
|  | ||||
| @ -56,6 +56,12 @@ gradients are correct. | ||||
| Profiler | ||||
| -------- | ||||
|  | ||||
| Autograd includes a profiler that lets you inspect the cost of different | ||||
| operators inside your model - both on the CPU and GPU. There are two modes | ||||
| implemented at the moment - CPU-only using :class:`~torch.autograd.profiler.profile`. | ||||
| and nvprof based (registers both CPU and GPU activity) using | ||||
| :class:`~torch.autograd.profiler.emit_nvtx`. | ||||
|  | ||||
| .. autoclass:: torch.autograd.profiler.profile | ||||
|     :members: | ||||
|  | ||||
|  | ||||
| @ -37,6 +37,10 @@ Streams and events | ||||
| .. autoclass:: Event | ||||
|    :members: | ||||
|  | ||||
| Memory management | ||||
| ----------------- | ||||
| .. autofunction:: empty_cache | ||||
|  | ||||
| NVIDIA Tools Extension (NVTX) | ||||
| ----------------------------- | ||||
|  | ||||
|  | ||||
| @ -19,10 +19,10 @@ Probability distributions - torch.distributions | ||||
| .. autoclass:: Bernoulli | ||||
|     :members: | ||||
|  | ||||
| :hidden:`Multinomial` | ||||
| :hidden:`Categorical` | ||||
| ~~~~~~~~~~~~~~~~~~~~~~~ | ||||
|  | ||||
| .. autoclass:: Multinomial | ||||
| .. autoclass:: Categorical | ||||
|     :members: | ||||
|  | ||||
| :hidden:`Normal` | ||||
|  | ||||
| @ -3,18 +3,19 @@ | ||||
| CUDA semantics | ||||
| ============== | ||||
|  | ||||
| :mod:`torch.cuda` keeps track of currently selected GPU, and all CUDA tensors | ||||
| you allocate will be created on it. The selected device can be changed with a | ||||
| :mod:`torch.cuda` is used to set up and run CUDA operations. It keeps track of | ||||
| the currently selected GPU, and all CUDA tensors you allocate will by default be | ||||
| created on that device. The selected device can be changed with a | ||||
| :any:`torch.cuda.device` context manager. | ||||
|  | ||||
| However, once a tensor is allocated, you can do operations on it irrespectively | ||||
| of your selected device, and the results will be always placed in on the same | ||||
| However, once a tensor is allocated, you can do operations on it irrespective | ||||
| of the selected device, and the results will be always placed in on the same | ||||
| device as the tensor. | ||||
|  | ||||
| Cross-GPU operations are not allowed by default, with the only exception of | ||||
| :meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory accesses, | ||||
| any attempts to launch ops on tensors spread across different devices will | ||||
| raise an error. | ||||
| :meth:`~torch.Tensor.copy_`. Unless you enable peer-to-peer memory access, any | ||||
| attempts to launch ops on tensors spread across different devices will raise an | ||||
| error. | ||||
|  | ||||
| Below you can find a small example showcasing this:: | ||||
|  | ||||
| @ -41,6 +42,66 @@ Below you can find a small example showcasing this:: | ||||
|         d = torch.randn(2).cuda(2) | ||||
|         # d.get_device() == 2 | ||||
|  | ||||
| Asynchronous execution | ||||
| ---------------------- | ||||
|  | ||||
| By default, GPU operations are asynchronous.  When you call a function that | ||||
| uses the GPU, the operations are *enqueued* to the particular device, but not | ||||
| necessarily executed until later.  This allows us to execute more computations | ||||
| in parallel, including operations on CPU or other GPUs. | ||||
|  | ||||
| In general, the effect of asynchronous computation is invisible to the caller, | ||||
| because (1) each device executes operations in the order they are queued, and | ||||
| (2) PyTorch automatically performs necessary synchronization when copying data | ||||
| between CPU and GPU or between two GPUs.  Hence, computation will proceed as if | ||||
| every operation was executed synchronously. | ||||
|  | ||||
| You can force synchronous computation by setting environment variable | ||||
| `CUDA_LAUNCH_BLOCKING=1`.  This can be handy when an error occurs on the GPU. | ||||
| (With asynchronous execution, such an error isn't reported until after the | ||||
| operation is actually executed, so the stack trace does not show where it was | ||||
| requested.) | ||||
|  | ||||
| As an exception, several functions such as :meth:`~torch.Tensor.copy_` admit | ||||
| an explicit :attr:`async` argument, which lets the caller bypass synchronization | ||||
| when it is unnecessary.  Another exception is CUDA streams, explained below. | ||||
|  | ||||
| CUDA streams | ||||
| ^^^^^^^^^^^^ | ||||
|  | ||||
| A `CUDA stream`_ is a linear sequence of execution that belongs to a specific | ||||
| device.  You normally do not need to create one explicitly: by default, each | ||||
| device uses its own "default" stream. | ||||
|  | ||||
| Operations inside each stream are serialized in the order they are created, | ||||
| but operations from different streams can execute concurrently in any | ||||
| relative order, unless explicit synchronization functions (such as | ||||
| :meth:`~torch.cuda.synchronize` or :meth:`~torch.cuda.Stream.wait_stream`) are | ||||
| used.  For example, the following code is incorrect:: | ||||
|  | ||||
|     s = torch.cuda.stream()  # Create a new stream. | ||||
|     A = torch.cuda.FloatTensor(100, 100).normal_(0.0, 1.0) | ||||
|     with torch.cuda.stream(s): | ||||
|         # sum() may start execution before normal_() finishes! | ||||
|         B = torch.sum(A) | ||||
|  | ||||
| When the "current stream" is the default stream, PyTorch automatically performs | ||||
| necessary synchronization when data is moved around, as explained above. | ||||
| However, when using non-default streams, it is the user's responsibility to | ||||
| ensure proper synchronization. | ||||
|  | ||||
| .. _CUDA stream: http://docs.nvidia.com/cuda/cuda-c-programming-guide/index.html#streams | ||||
|  | ||||
| Memory management | ||||
| ----------------- | ||||
|  | ||||
| PyTorch use a caching memory allocator to speed up memory allocations. This | ||||
| allows fast memory deallocation without device synchronizations. However, the | ||||
| unused memory managed by the allocator will still show as if used in | ||||
| `nvidia-smi`. Calling :meth:`~torch.cuda.empty_cache` can release all unused | ||||
| cached memory from PyTorch so that those can be used by other GPU applications. | ||||
|  | ||||
|  | ||||
| Best practices | ||||
| -------------- | ||||
|  | ||||
| @ -49,13 +110,13 @@ Device-agnostic code | ||||
|  | ||||
| Due to the structure of PyTorch, you may need to explicitly write | ||||
| device-agnostic (CPU or GPU) code; an example may be creating a new tensor as | ||||
| the initial hidden state of a recurrent neural network.  | ||||
| the initial hidden state of a recurrent neural network. | ||||
|  | ||||
| The first step is to determine whether the GPU should be used or not. A common | ||||
| pattern is to use Python's `argparse` module to read in user arguments, and | ||||
| pattern is to use Python's ``argparse`` module to read in user arguments, and | ||||
| have a flag that can be used to disable CUDA, in combination with | ||||
| `torch.cuda.is_available()`. In the following, `args.cuda` results in a flag | ||||
| that can be used to cast tensors and modules to CUDA if desired:: | ||||
| :meth:`~torch.cuda.is_available`. In the following, ``args.cuda`` results in a | ||||
| flag that can be used to cast tensors and modules to CUDA if desired:: | ||||
|  | ||||
|     import argparse | ||||
|     import torch | ||||
| @ -66,7 +127,7 @@ that can be used to cast tensors and modules to CUDA if desired:: | ||||
|     args = parser.parse_args() | ||||
|     args.cuda = not args.disable_cuda and torch.cuda.is_available() | ||||
|  | ||||
| If modules or tensors need to be sent to the GPU, `args.cuda` can be used as | ||||
| If modules or tensors need to be sent to the GPU, ``args.cuda`` can be used as | ||||
| follows:: | ||||
|  | ||||
|     x = torch.Tensor(8, 42) | ||||
| @ -84,9 +145,9 @@ dataloader would be as follows:: | ||||
|         x = Variable(x.type(dtype)) | ||||
|  | ||||
| When working with multiple GPUs on a system, you can use the | ||||
| `CUDA_VISIBLE_DEVICES` environment flag to manage which GPUs are available to | ||||
| PyTorch. To manually control which GPU a tensor is created on, the best practice | ||||
| is to use the `torch.cuda.device()` context manager:: | ||||
| ``CUDA_VISIBLE_DEVICES`` environment flag to manage which GPUs are available to | ||||
| PyTorch. As mentioned above, to manually control which GPU a tensor is created | ||||
| on, the best practice is to use a :any:`torch.cuda.device` context manager:: | ||||
|  | ||||
|     print("Outside device is 0")  # On device 0 (default in most scenarios) | ||||
|     with torch.cuda.device(1): | ||||
| @ -94,9 +155,10 @@ is to use the `torch.cuda.device()` context manager:: | ||||
|     print("Outside device is still 0")  # On device 0 | ||||
|  | ||||
| If you have a tensor and would like to create a new tensor of the same type on | ||||
| the same device, then you can use the `.new()` function, which acts the same as | ||||
| a normal tensor constructor. Whilst the previously mentioned methods depend on | ||||
| the current GPU context, `new()` preserves the device of the original tensor. | ||||
| the same device, then you can use the :meth:`~torch.Tensor.new` method, which | ||||
| acts the same as a normal tensor constructor. Whilst the previously mentioned | ||||
| methods depend on the current GPU context, :meth:`~torch.Tensor.new` preserves | ||||
| the device of the original tensor. | ||||
|  | ||||
| This is the recommended practice when creating modules in which new | ||||
| tensors/variables need to be created internally during the forward pass:: | ||||
| @ -110,8 +172,9 @@ tensors/variables need to be created internally during the forward pass:: | ||||
|     y_cpu_long = x_cpu_long.new([[1, 2, 3]]) | ||||
|  | ||||
| If you want to create a tensor of the same type and size of another tensor, and | ||||
| fill it with either ones or zeros, `torch.ones_like()` or `torch.zeros_like()` | ||||
| are provided as more convenient functions (which also preserve device):: | ||||
| fill it with either ones or zeros, :meth:`~torch.ones_like` or | ||||
| :meth:`~torch.zeros_like` are provided as convenient helper functions (which | ||||
| also preserve device):: | ||||
|  | ||||
|     x_cpu = torch.FloatTensor(1) | ||||
|     x_gpu = torch.cuda.FloatTensor(1) | ||||
| @ -145,9 +208,9 @@ pinned memory by passing ``pin_memory=True`` to its constructor. | ||||
| Use nn.DataParallel instead of multiprocessing | ||||
| ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ | ||||
|  | ||||
| Most use cases involving batched input and multiple GPUs should default to using | ||||
| :class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with the GIL, | ||||
| a single python process can saturate multiple GPUs. | ||||
| Most use cases involving batched inputs and multiple GPUs should default to | ||||
| using :class:`~torch.nn.DataParallel` to utilize more than one GPU. Even with | ||||
| the GIL, a single Python process can saturate multiple GPUs. | ||||
|  | ||||
| As of version 0.1.9, large numbers of GPUs (8+) might not be fully utilized. | ||||
| However, this is a known issue that is under active development. As always, | ||||
|  | ||||
| @ -53,7 +53,7 @@ exporter to print out a human-readable representation of the network:: | ||||
| You can also verify the protobuf using the `onnx <https://github.com/onnx/onnx/>`_ library. | ||||
| You can install ``onnx`` with conda:: | ||||
|  | ||||
|     conda install -c ezyang onnx | ||||
|     conda install -c conda-forge onnx | ||||
|  | ||||
| Then, you can run:: | ||||
|  | ||||
| @ -75,10 +75,8 @@ To run the exported script with `caffe2 <https://caffe2.ai/>`_, you will need th | ||||
|  | ||||
| 2. You'll need `onnx-caffe2 <https://github.com/onnx/onnx-caffe2>`_, a | ||||
|    pure-Python library which provides a Caffe2 backend for ONNX.  You can install ``onnx-caffe2`` | ||||
|    with conda or pip:: | ||||
|    with pip:: | ||||
|  | ||||
|       conda install -c ezyang onnx-caffe2 | ||||
|       # OR | ||||
|       pip install onnx-caffe2 | ||||
|  | ||||
| Once these are installed, you can use the backend for Caffe2:: | ||||
| @ -122,34 +120,48 @@ Limitations | ||||
| Supported operators | ||||
| ------------------- | ||||
|  | ||||
| In this tech preview, only the following operators are supported: | ||||
| The following operators are supported: | ||||
|  | ||||
| * Add (inplace is discarded) | ||||
| * Sub (inplace is discarded) | ||||
| * Mul (inplace is discarded) | ||||
| * Negate (inplace is discarded) | ||||
| * Addmm (inplace is discarded, alpha and beta must be 1) | ||||
| * Tanh (inplace is discarded) | ||||
| * Sigmoid (inplace is discarded) | ||||
| * Transpose | ||||
| * View | ||||
| * Permute | ||||
| * Concat | ||||
| * Squeeze (inplace is discarded) | ||||
| * add (nonzero alpha not supported) | ||||
| * sub (nonzero alpha not supported) | ||||
| * mul | ||||
| * div | ||||
| * cat | ||||
| * mm | ||||
| * addmm | ||||
| * neg | ||||
| * tanh | ||||
| * sigmoid | ||||
| * mean | ||||
| * t | ||||
| * expand (only when used before a broadcasting ONNX operator; e.g., add) | ||||
| * transpose | ||||
| * view | ||||
| * split | ||||
| * squeeze | ||||
| * prelu (single weight shared among input channels not supported) | ||||
| * threshold (non-zero threshold/non-zero value not supported) | ||||
| * leaky_relu | ||||
| * glu | ||||
| * softmax | ||||
| * avg_pool2d (ceil_mode not supported) | ||||
| * log_softmax | ||||
| * unfold (experimental support with ATen-Caffe2 integration) | ||||
| * elu | ||||
| * Conv | ||||
| * BatchNorm | ||||
| * Convolution | ||||
| * Embedding (only optional argument that is supported is ``padding_idx``) | ||||
| * Slice (only integer indexing is supported) | ||||
| * Dropout (inplace is discarded) | ||||
| * Relu (inplace is discarded) | ||||
| * PReLU (inplace is discarded, sharing a single weight among all channels is not supported) | ||||
| * LeakyRelu (inplace is discarded) | ||||
| * MaxPool1d (ceil_mode must be False) | ||||
| * MaxPool2d (ceil_mode must be False) | ||||
| * AvgPool2d (ceil_mode must be False) | ||||
| * MaxPool1d (ceil_mode not supported) | ||||
| * MaxPool2d (ceil_mode not supported) | ||||
| * MaxPool3d (ceil_mode not supported) | ||||
| * Embedding (no optional arguments supported) | ||||
| * RNN | ||||
| * ConstantPadNd | ||||
| * Dropout | ||||
| * FeatureDropout (training mode not supported) | ||||
| * Index (constant integer and tuple indices supported) | ||||
| * Negate | ||||
|  | ||||
| We plan on expanding support to more operators; RNNs are high on our priority | ||||
| list.  The operator set above is sufficient to export the following models: | ||||
| The operator set above is sufficient to export the following models: | ||||
|  | ||||
| * AlexNet | ||||
| * DCGAN | ||||
|  | ||||
| @ -18,11 +18,11 @@ you can specify optimizer-specific options such as the learning rate, weight dec | ||||
|  | ||||
| .. note:: | ||||
|  | ||||
|     If you need to move a model to GPU via `.cuda()`, please do so before  | ||||
|     If you need to move a model to GPU via `.cuda()`, please do so before | ||||
|     constructing optimizers for it. Parameters of a model after `.cuda()` will | ||||
|     be different objects with those before the call.  | ||||
|     be different objects with those before the call. | ||||
|  | ||||
|     In general, you should make sure that optimized parameters live in   | ||||
|     In general, you should make sure that optimized parameters live in | ||||
|     consistent locations when optimizers are constructed and used. | ||||
|  | ||||
| Example:: | ||||
| @ -111,6 +111,8 @@ Algorithms | ||||
|     :members: | ||||
| .. autoclass:: Adam | ||||
|     :members: | ||||
| .. autoclass:: SparseAdam | ||||
|     :members: | ||||
| .. autoclass:: Adamax | ||||
|     :members: | ||||
| .. autoclass:: ASGD | ||||
| @ -128,7 +130,7 @@ How to adjust Learning Rate | ||||
| --------------------------- | ||||
|  | ||||
| :mod:`torch.optim.lr_scheduler` provides several methods to adjust the learning | ||||
| rate based on the number of epoches. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau` | ||||
| rate based on the number of epochs. :class:`torch.optim.lr_scheduler.ReduceLROnPlateau` | ||||
| allows dynamic learning rate reducing based on some validation measurements. | ||||
|  | ||||
| .. autoclass:: torch.optim.lr_scheduler.LambdaLR | ||||
| @ -139,5 +141,7 @@ allows dynamic learning rate reducing based on some validation measurements. | ||||
|     :members: | ||||
| .. autoclass:: torch.optim.lr_scheduler.ExponentialLR | ||||
|     :members: | ||||
| .. autoclass:: torch.optim.lr_scheduler.CosineAnnealingLR | ||||
|     :members: | ||||
| .. autoclass:: torch.optim.lr_scheduler.ReduceLROnPlateau | ||||
|     :members: | ||||
|  | ||||
| @ -1,5 +1,7 @@ | ||||
| .. currentmodule:: torch | ||||
|  | ||||
| .. _tensor-doc: | ||||
|  | ||||
| torch.Tensor | ||||
| =================================== | ||||
|  | ||||
|  | ||||
							
								
								
									
										16
									
								
								setup.py
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								setup.py
									
									
									
									
									
								
							| @ -542,7 +542,7 @@ if os.getenv('PYTORCH_BINARY_BUILD') and platform.system() == 'Linux': | ||||
|     STDCPP_LIB = STDCPP_LIB[:-1] | ||||
|     if type(STDCPP_LIB) != str:  # python 3 | ||||
|         STDCPP_LIB = STDCPP_LIB.decode(sys.stdout.encoding) | ||||
|     main_link_args += [STDCPP_LIB] | ||||
|     extra_link_args += [STDCPP_LIB] | ||||
|     version_script = os.path.abspath("tools/pytorch.version") | ||||
|     extra_link_args += ['-Wl,--version-script=' + version_script] | ||||
|  | ||||
| @ -593,9 +593,11 @@ extensions.append(THNN) | ||||
| if WITH_CUDA: | ||||
|     thnvrtc_link_flags = extra_link_args + [make_relative_rpath('lib')] | ||||
|     if platform.system() == 'Linux': | ||||
|         thnvrtc_link_flags = ['-Wl,--no-as-needed'] + thnvrtc_link_flags | ||||
|         thnvrtc_link_flags = thnvrtc_link_flags + ['-Wl,--no-as-needed'] | ||||
|     # these have to be specified as -lcuda in link_flags because they | ||||
|     # have to come right after the `no-as-needed` option | ||||
|     thnvrtc_link_flags += ['-lcuda', '-lnvrtc'] | ||||
|     THNVRTC = Extension("torch._nvrtc", | ||||
|                         libraries=['nvrtc', 'cuda'], | ||||
|                         sources=['torch/csrc/nvrtc.cpp'], | ||||
|                         language='c++', | ||||
|                         include_dirs=include_dirs, | ||||
| @ -618,11 +620,13 @@ if WITH_CUDA: | ||||
|                        ) | ||||
|     extensions.append(THCUNN) | ||||
|  | ||||
| version = '0.2.0' | ||||
| version = '0.3.1b0' | ||||
| if os.getenv('PYTORCH_BUILD_VERSION'): | ||||
|     assert os.getenv('PYTORCH_BUILD_NUMBER') is not None | ||||
|     version = os.getenv('PYTORCH_BUILD_VERSION') \ | ||||
|         + '_' + os.getenv('PYTORCH_BUILD_NUMBER') | ||||
|     build_number = int(os.getenv('PYTORCH_BUILD_NUMBER')) | ||||
|     version = os.getenv('PYTORCH_BUILD_VERSION') | ||||
|     if build_number > 1: | ||||
|         version += '.post' + str(build_number) | ||||
| else: | ||||
|     try: | ||||
|         sha = subprocess.check_output(['git', 'rev-parse', 'HEAD'], cwd=cwd).decode('ascii').strip() | ||||
|  | ||||
| @ -31,6 +31,7 @@ UNITTEST_ARGS = [sys.argv[0]] + remaining | ||||
| def run_tests(): | ||||
|     unittest.main(argv=UNITTEST_ARGS) | ||||
|  | ||||
| IS_WINDOWS = sys.platform == "win32" | ||||
|  | ||||
| TEST_NUMPY = True | ||||
| try: | ||||
| @ -170,6 +171,9 @@ class TestCase(unittest.TestCase): | ||||
|         return x, y | ||||
|  | ||||
|     def assertEqual(self, x, y, prec=None, message=''): | ||||
|         if isinstance(prec, str) and message == '': | ||||
|             message = prec | ||||
|             prec = None | ||||
|         if prec is None: | ||||
|             prec = self.precision | ||||
|  | ||||
| @ -329,6 +333,8 @@ class TestCase(unittest.TestCase): | ||||
|                 self.assertEqual(s, expected) | ||||
|  | ||||
|     if sys.version_info < (3, 2): | ||||
|         # assertRegexpMatches renamed assertRegex in 3.2 | ||||
|         assertRegex = unittest.TestCase.assertRegexpMatches | ||||
|         # assertRaisesRegexp renamed assertRaisesRegex in 3.2 | ||||
|         assertRaisesRegex = unittest.TestCase.assertRaisesRegexp | ||||
|  | ||||
|  | ||||
| @ -246,10 +246,24 @@ module_tests = [ | ||||
| ] | ||||
|  | ||||
|  | ||||
| def nllloss2d_reference(input, target, weight=None, ignore_index=-100, | ||||
| def kldivloss_reference(input, target, size_average=True, reduce=True): | ||||
|     safe_target = target * (target > 0).type_as(target) | ||||
|     safe_target_log = (safe_target + (target <= 0).type_as(target)).log() | ||||
|     result = safe_target * (safe_target_log - input) | ||||
|     if reduce and size_average: | ||||
|         return result.mean() | ||||
|     elif reduce: | ||||
|         return result.sum() | ||||
|     return result | ||||
|  | ||||
|  | ||||
| def nlllossNd_reference(input, target, weight=None, ignore_index=-100, | ||||
|                         size_average=True, reduce=True): | ||||
|     N, C, H, W = input.size() | ||||
|     output = torch.zeros(N, H, W).type_as(input) | ||||
|     assert input.dim() >= 3 | ||||
|     N = input.size(0) | ||||
|     C = input.size(1) | ||||
|     out_size = (N,) + input.size()[2:] | ||||
|     output = torch.zeros(out_size).type_as(input) | ||||
|     if isinstance(target, Variable): | ||||
|         target = target.data | ||||
|  | ||||
| @ -257,13 +271,13 @@ def nllloss2d_reference(input, target, weight=None, ignore_index=-100, | ||||
|         weight = torch.ones(C).type_as(input) | ||||
|  | ||||
|     total_weight_data = 0 | ||||
|     for n in range(0, N): | ||||
|         for h in range(0, H): | ||||
|             for w in range(0, W): | ||||
|                 t_nhw = target[n][h][w] | ||||
|                 norm = 0. if ignore_index == t_nhw else weight[t_nhw] | ||||
|                 output[n][h][w] = -input[n][t_nhw][h][w] * norm | ||||
|                 total_weight_data += norm | ||||
|     for tup in product(*[range(size) for size in out_size]): | ||||
|         t_nx = target[tup] | ||||
|         norm = 0. if ignore_index == t_nx else weight[t_nx] | ||||
|         input_index = list(tup) | ||||
|         input_index.insert(1, t_nx) | ||||
|         output[tup] = -input[tuple(input_index)] * norm | ||||
|         total_weight_data += norm | ||||
|  | ||||
|     if reduce and size_average: | ||||
|         return output.sum() / total_weight_data | ||||
| @ -309,8 +323,9 @@ def smoothl1loss_reference(input, target, size_average=True, reduce=True): | ||||
|  | ||||
|  | ||||
| loss_reference_fns = { | ||||
|     'KLDivLoss': kldivloss_reference, | ||||
|     'NLLLoss': nllloss_reference, | ||||
|     'NLLLoss2d': nllloss2d_reference, | ||||
|     'NLLLossNd': nlllossNd_reference, | ||||
|     'SmoothL1Loss': smoothl1loss_reference, | ||||
| } | ||||
|  | ||||
| @ -370,6 +385,8 @@ criterion_tests = [ | ||||
|         module_name='KLDivLoss', | ||||
|         input_fn=lambda: torch.rand(10, 10).log(), | ||||
|         target_fn=lambda: torch.rand(10, 10), | ||||
|         reference_fn=lambda i, t, m: | ||||
|             kldivloss_reference(i, t, get_size_average(m), reduce=True), | ||||
|         check_no_size_average=True, | ||||
|     ), | ||||
|     dict( | ||||
| @ -410,7 +427,7 @@ criterion_tests = [ | ||||
|         input_size=(2, 3, 5, 5), | ||||
|         target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(), | ||||
|         reference_fn=lambda i, t, m: | ||||
|             nllloss2d_reference(i, t, size_average=get_size_average(m)), | ||||
|             nlllossNd_reference(i, t, size_average=get_size_average(m)), | ||||
|         check_no_size_average=True, | ||||
|     ), | ||||
|     dict( | ||||
| @ -419,7 +436,7 @@ criterion_tests = [ | ||||
|         input_size=(2, 3, 5, 5), | ||||
|         target=torch.rand(2, 5, 5).mul(3).floor().long(), | ||||
|         reference_fn=lambda i, t, m: | ||||
|             nllloss2d_reference(i, t, weight=get_weight(m)), | ||||
|             nlllossNd_reference(i, t, weight=get_weight(m)), | ||||
|         desc='weights', | ||||
|     ), | ||||
|     dict( | ||||
| @ -428,7 +445,7 @@ criterion_tests = [ | ||||
|         input_size=(2, 3, 5, 5), | ||||
|         target_fn=lambda: torch.rand(2, 5, 5).mul(3).floor().long(), | ||||
|         reference_fn=lambda i, t, m: | ||||
|             nllloss2d_reference(i, t, ignore_index=1), | ||||
|             nlllossNd_reference(i, t, ignore_index=1), | ||||
|         desc='ignore_index', | ||||
|     ), | ||||
|     dict( | ||||
|  | ||||
| @ -3,6 +3,6 @@ graph(%1 : Double(2, 2) | ||||
|       %3 : Double(2) | ||||
|       %4 : Double(2) | ||||
|       %5 : Double(2)) { | ||||
|   %7 : Double(2, 2), %8 : Handle = CppOp[N5torch8autograd16BatchNormForwardE](%1, %2, %3), uses = [[%0.i0], []]; | ||||
|   %7 : Double(2, 2), %8 : Handle = CppOp[N5torch8autograd16BatchNormForwardE](%1, %2, %3), uses = [[%0.i0], []], scope: BatchNorm2d; | ||||
|   return (%7); | ||||
| } | ||||
|  | ||||
| @ -1,6 +1,6 @@ | ||||
| graph(%1 : Double(20, 16, 50, 40) | ||||
|       %2 : Double(13, 16, 3, 3)) { | ||||
|   %4 : UNKNOWN_TYPE = Undefined(), uses = [%3.i2]; | ||||
|   %5 : Double(20, 13, 48, 38), %6 : Handle = CppOp[ConvForward](%1, %2, %4), uses = [[%0.i0], []]; | ||||
|   %4 : UNKNOWN_TYPE = Undefined(), uses = [%3.i2], scope: Conv2d; | ||||
|   %5 : Double(20, 13, 48, 38), %6 : Handle = CppOp[ConvForward](%1, %2, %4), uses = [[%0.i0], []], scope: Conv2d; | ||||
|   return (%5); | ||||
| } | ||||
|  | ||||
| @ -1,4 +1,4 @@ | ||||
| graph(%1 : Double(2, 2)) { | ||||
|   %3 : Double(2, 2), %4 : Handle = ^Dropout(0.6, True, False)(%1), uses = [[%0.i0], []]; | ||||
|   %3 : Double(2, 2), %4 : Handle = ^Dropout(0.6, True, False)(%1), uses = [[%0.i0], []], scope: Dropout; | ||||
|   return (%3); | ||||
| } | ||||
|  | ||||
							
								
								
									
										8
									
								
								test/expect/TestJit.test_scopes.expect
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								test/expect/TestJit.test_scopes.expect
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,8 @@ | ||||
| graph(%1 : Double(1) | ||||
|       %2 : Double(1)) { | ||||
|   %3 : Double(1) = add[alpha={1}](%1, %2), uses = [%4.i1]; | ||||
|   %4 : Double(1) = mul(%1, %3), uses = [%5.i0], scope: Foo; | ||||
|   %5 : Double(1) = tanh(%4), uses = [%6.i0], scope: Foo/Bar; | ||||
|   %6 : Double(1) = sigmoid(%5), uses = [%0.i0], scope: Foo; | ||||
|   return (%6); | ||||
| } | ||||
							
								
								
									
										9
									
								
								test/expect/TestJit.test_scopes_identity_node.expect
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								test/expect/TestJit.test_scopes_identity_node.expect
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,9 @@ | ||||
| graph(%1 : Double(1, 3, 227, 227) | ||||
|       %2 : Double(64, 3, 11, 11) | ||||
|       %3 : Double(64)) { | ||||
|   %5 : UNKNOWN_TYPE = Conv[kernel_shape=[11, 11], strides=[4, 4], pads=[2, 2, 2, 2], dilations=[1, 1], group=1](%1, %2), uses = [[%6.i0]], scope: Net/Sequential[features]/Conv2d[0]; | ||||
|   %6 : Double(1, 64, 56, 56) = Add[broadcast=1, axis=1](%5, %3), uses = [%7.i0], scope: Net/Sequential[features]/Conv2d[0]; | ||||
|   %7 : Double(1, 64, 56, 56) = Relu(%6), uses = [%8.i0], scope: Net/Sequential[features]/ReLU[1]; | ||||
|   %8 : Double(1, 64, 27, 27) = MaxPool[kernel_shape=[3, 3], pads=[0, 0], strides=[2, 2]](%7), uses = [%0.i0], scope: Net/Sequential[features]/MaxPool2d[2]; | ||||
|   return (%8); | ||||
| } | ||||
							
								
								
									
										5
									
								
								test/expect/TestJit.test_scopes_intermediate_node.expect
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										5
									
								
								test/expect/TestJit.test_scopes_intermediate_node.expect
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,5 @@ | ||||
| graph(%1 : Double(2)) { | ||||
|   %2 : Double(2) = Softmax[axis=0](%1), uses = [%3.i0], scope: Net; | ||||
|   %3 : Double(2) = Log(%2), uses = [%0.i0], scope: Net; | ||||
|   return (%3); | ||||
| } | ||||
| @ -345,32 +345,6 @@ class TestAutograd(TestCase): | ||||
|         self.assertEqual(counter[0], 1, 'bw_hook not called') | ||||
|         self.assertEqual(x.grad.data, torch.ones(5, 5) * 2) | ||||
|  | ||||
|     @unittest.skipIf(sys.version_info[0] == 2, "Python 2 doesn't collect cycles involving __del__") | ||||
|     def test_hooks_cycle(self): | ||||
|         import gc | ||||
|         counter = [0] | ||||
|  | ||||
|         class GradHook(object): | ||||
|             def __init__(self, var): | ||||
|                 self.var = var | ||||
|  | ||||
|             def __del__(self): | ||||
|                 counter[0] += 1 | ||||
|  | ||||
|             def __call__(self, *args): | ||||
|                 pass | ||||
|  | ||||
|         def run_test(): | ||||
|             x = Variable(torch.ones(5, 5), requires_grad=True) | ||||
|             y = x * 2 | ||||
|             x.register_hook(GradHook(x)) | ||||
|             y.register_hook(GradHook(y)) | ||||
|             y._backward_hooks[1] = GradHook(y) | ||||
|  | ||||
|         run_test() | ||||
|         gc.collect() | ||||
|         self.assertEqual(counter[0], 3) | ||||
|  | ||||
|     def test_hook_none(self): | ||||
|         # WARNING: this is a test for autograd internals. | ||||
|         # You should never have to use such things in your code. | ||||
| @ -995,6 +969,16 @@ class TestAutograd(TestCase): | ||||
|         self._test_setitem_tensor((5, 5), Variable(mask)) | ||||
|         self._test_setitem_tensor((5,), Variable(mask[0])) | ||||
|  | ||||
|     def test_select_sum(self): | ||||
|         # both select and sum return Scalars in ATen; ensure they work together. | ||||
|         x = Variable(torch.randn(10), requires_grad=True) | ||||
|  | ||||
|         def func(x): | ||||
|             return x.select(0, 1).sum() | ||||
|  | ||||
|         gradcheck(func, [x]) | ||||
|         gradgradcheck(func, [x]) | ||||
|  | ||||
|     def test_stack(self): | ||||
|         x = Variable(torch.randn(10, 10), requires_grad=True) | ||||
|         y = Variable(torch.randn(10, 10), requires_grad=True) | ||||
| @ -1006,6 +990,43 @@ class TestAutograd(TestCase): | ||||
|         self.assertEqual(y.grad.data, grad[1]) | ||||
|         self.assertEqual(z.grad.data, grad[2]) | ||||
|  | ||||
|     def test_put(self): | ||||
|         root = Variable(torch.randn(4, 5), requires_grad=True) | ||||
|         values = Variable(torch.randn(6), requires_grad=True) | ||||
|         idx = Variable(torch.LongTensor([1, 2, 3, -1, -2, -3])) | ||||
|  | ||||
|         def func(root, values): | ||||
|             x = root.clone() | ||||
|             x.put_(idx, values) | ||||
|             return x | ||||
|  | ||||
|         gradcheck(func, [root, values]) | ||||
|         gradgradcheck(func, [root, values]) | ||||
|  | ||||
|     def test_put_accumulate(self): | ||||
|         root = Variable(torch.randn(4, 5), requires_grad=True) | ||||
|         values = Variable(torch.randn(6), requires_grad=True) | ||||
|         idx = Variable(torch.LongTensor([1, 2, 3, 1, 2, 3])) | ||||
|  | ||||
|         def func(root, values): | ||||
|             x = root.clone() | ||||
|             x.put_(idx, values, accumulate=True) | ||||
|             return x | ||||
|  | ||||
|         gradcheck(func, [root, values]) | ||||
|         gradgradcheck(func, [root, values]) | ||||
|  | ||||
|     def test_fill(self): | ||||
|         root = Variable(torch.randn(4, 5), requires_grad=True) | ||||
|  | ||||
|         def func(root): | ||||
|             x = root.clone() | ||||
|             x.fill_(2) | ||||
|             return x | ||||
|  | ||||
|         gradcheck(func, [root]) | ||||
|         gradgradcheck(func, [root]) | ||||
|  | ||||
|     def test_unused_output(self): | ||||
|         x = Variable(torch.randn(10, 10), requires_grad=True) | ||||
|         outputs = x.chunk(5) | ||||
| @ -1461,13 +1482,14 @@ class TestAutograd(TestCase): | ||||
|     def test_norm_subgradient(self): | ||||
|         def run_test(input_size, norm_deg): | ||||
|             input = Variable(torch.zeros(*input_size), requires_grad=True) | ||||
|             out = input.norm(norm_deg) | ||||
|             out.backward() | ||||
|             input.norm(norm_deg).backward() | ||||
|             self.assertEqual(input.grad.data.abs().sum(), 0) | ||||
|  | ||||
|         run_test((10,), 2) | ||||
|         run_test((10, 10), 2) | ||||
|         run_test((10,), 3) | ||||
|         run_test((10,), 1) | ||||
|         run_test((10,), 1.5) | ||||
|  | ||||
|     def test_profiler(self): | ||||
|         x = Variable(torch.randn(10, 10)) | ||||
| @ -1764,8 +1786,14 @@ method_tests = [ | ||||
|     ('addcdiv', (S, S), (0.5, (S, 1), (1, S)), 'scale_broadcast_rhs'), | ||||
|     ('addcdiv', (1,), (0.5, (S, S, 1), (1, S)), 'scale_broadcast_all'), | ||||
|     ('zero_', (S, S, S), ()), | ||||
|     ('norm', (S, S, S), (2,)), | ||||
|     ('norm', (S, S, S), (3,), '3'), | ||||
|     ('norm', (S, S), (2,)), | ||||
|     ('norm', (S, S), (0,), '0'), | ||||
|     ('norm', (S, S), (0.5,), '0_5'), | ||||
|     ('norm', (S, S), (1,), '1'), | ||||
|     ('norm', (S, S), (3,), '3'), | ||||
|     ('norm', (S, S), (-1,), 'neg_1'), | ||||
|     ('norm', (S, S), (-0.5,), 'neg_0_5'), | ||||
|     ('norm', (S, S), (-1.5,), 'neg_1_5'), | ||||
|     ('norm', torch.rand(S, S, S) + 5e-2, (1.5,), '1_5'), | ||||
|     ('norm', (S, S, S), (2, 1), '2_dim', [1]), | ||||
|     ('norm', (S, S, S), (3, 1), '3_dim', [1]), | ||||
| @ -1842,6 +1870,7 @@ method_tests = [ | ||||
|     ('squeeze', (S, 1, S, 1), ()), | ||||
|     ('squeeze', (S, 1, S, 1), (1,), '1_dim', [0]), | ||||
|     ('squeeze', (S, 1, S, 1), (2,), 'not_1_dim', [0]), | ||||
|     ('squeeze', (1,), (0,), '1d_dim0', [0]), | ||||
|     ('unsqueeze', (S, S, S), (0,), 'first', [0]), | ||||
|     ('unsqueeze', (S, S, S), (1,), 'middle', [0]), | ||||
|     ('unsqueeze', (S, S, S), (3,), 'last', [0]), | ||||
| @ -1875,6 +1904,7 @@ method_tests = [ | ||||
|     ('topk', (S, M, S), (3, 1), 'dim'), | ||||
|     ('topk', (S, M, S), (3, 1, True), 'dim_desc'), | ||||
|     ('topk', (S, M, S), (3, 1, True, True), 'dim_desc_sort'), | ||||
|     ('take', (S, S, S), (Variable(torch.LongTensor([[-3, 2], [20, 2]])),)), | ||||
|     ('__getitem__', torch.randn(S, S, S), (dont_convert([1, 2]),)), | ||||
|     ('__getitem__', torch.randn(S, S, S), (slice(0, 3),), 'slice'), | ||||
|     ('__getitem__', torch.randn(S, S, S), (dont_convert([slice(0, 3), 1]),), 'slice_index'), | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| import math | ||||
| import tempfile | ||||
| import re | ||||
| import unittest | ||||
| from itertools import repeat | ||||
|  | ||||
| @ -16,6 +17,11 @@ if not torch.cuda.is_available(): | ||||
|     TestCase = object  # noqa: F811 | ||||
|     HAS_CUDA = False | ||||
|  | ||||
| HAS_MAGMA = HAS_CUDA | ||||
| if HAS_CUDA: | ||||
|     torch.ones(1).cuda()  # has_magma shows up after cuda is initialized | ||||
|     HAS_MAGMA = torch.cuda.has_magma | ||||
|  | ||||
|  | ||||
| def is_floating(t): | ||||
|     return type(t) in [torch.FloatTensor, torch.DoubleTensor, | ||||
| @ -91,6 +97,10 @@ def medium_2d(t): | ||||
|     return make_tensor(t, M, M) | ||||
|  | ||||
|  | ||||
| def medium_2d_expanded(t): | ||||
|     return t(1).expand(M, M) | ||||
|  | ||||
|  | ||||
| def medium_2d_scaled(t, scale=10): | ||||
|     return make_tensor(t, M, M).mul(scale) | ||||
|  | ||||
| @ -137,6 +147,13 @@ def new_t(*sizes): | ||||
|         return t(*sizes).copy_(torch.randn(*sizes)) | ||||
|     return tmp | ||||
|  | ||||
| # Content of each tuple: | ||||
| # - function name | ||||
| # - constructor for the tensor,    signature: fn(tensor_type) -> tensor | ||||
| # - constructor for the arguments, signature: fn(tensor_type) -> list | ||||
| # - postfix name for the test (must be unique for a given function) (default='') | ||||
| # - tensor types to use (default=types) | ||||
| # - disable inplace test, if set to True, no inplace test will be done (default=False) | ||||
| tests = [ | ||||
|     ('add', small_3d, lambda t: [number(3.14, 3, t)]), | ||||
|     ('add', small_3d, lambda t: [small_3d_positive(t)], 'tensor'), | ||||
| @ -289,9 +306,11 @@ tests = [ | ||||
|     ('topk', small_3d_unique, lambda t: [2, 1, True, True], 'dim_desc_sort'), | ||||
|     ('trace', medium_2d, lambda t: [],), | ||||
|     ('tril', medium_2d, lambda t: [],), | ||||
|     ('tril', medium_2d_expanded, lambda t: [], 'zero_stride', types, True), | ||||
|     ('tril', medium_2d, lambda t: [2], 'positive'), | ||||
|     ('tril', medium_2d, lambda t: [-2], 'negative'), | ||||
|     ('triu', medium_2d, lambda t: [],), | ||||
|     ('triu', medium_2d_expanded, lambda t: [], 'zero_stride', types, True), | ||||
|     ('triu', medium_2d, lambda t: [2], 'positive'), | ||||
|     ('triu', medium_2d, lambda t: [-2], 'negative'), | ||||
|     ('unsqueeze', new_t(2, 3, 4), lambda t: [2],), | ||||
| @ -378,18 +397,24 @@ def get_cycles_per_ms(): | ||||
|     return _cycles_per_ms | ||||
|  | ||||
|  | ||||
| def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5): | ||||
| def compare_cpu_gpu(tensor_constructor, arg_constructor, fn, t, precision=1e-5, force_gpu_half=False): | ||||
|     def tmp(self): | ||||
|         cpu_tensor = tensor_constructor(t) | ||||
|         gpu_tensor = to_gpu(cpu_tensor) | ||||
|         type_map = {} | ||||
|         if force_gpu_half: | ||||
|             type_map = { | ||||
|                 'torch.FloatTensor': 'torch.cuda.HalfTensor', | ||||
|                 'torch.DoubleTensor': 'torch.cuda.HalfTensor', | ||||
|             } | ||||
|         gpu_tensor = to_gpu(cpu_tensor, type_map) | ||||
|         cpu_args = arg_constructor(t) | ||||
|         gpu_args = [to_gpu(arg) for arg in cpu_args] | ||||
|         gpu_args = [to_gpu(arg, type_map) for arg in cpu_args] | ||||
|         cpu_result = getattr(cpu_tensor, fn)(*cpu_args) | ||||
|         try: | ||||
|             gpu_result = getattr(gpu_tensor, fn)(*gpu_args) | ||||
|         except RuntimeError as e: | ||||
|             reason = e.args[0] | ||||
|             if 'unimplemented data type' in reason: | ||||
|             if 'only supports floating-point types' in reason or 'unimplemented data type' in reason: | ||||
|                 raise unittest.SkipTest('unimplemented data type') | ||||
|             raise | ||||
|         except AttributeError as e: | ||||
| @ -707,6 +732,38 @@ class TestCuda(TestCase): | ||||
|         z = torch.cat([x, y], 0) | ||||
|         self.assertEqual(z.get_device(), x.get_device()) | ||||
|  | ||||
|     def test_cat(self): | ||||
|         SIZE = 10 | ||||
|         for dim in range(-3, 3): | ||||
|             pos_dim = dim if dim >= 0 else 3 + dim | ||||
|             x = torch.rand(13, SIZE, SIZE).transpose(0, pos_dim).cuda() | ||||
|             y = torch.rand(17, SIZE, SIZE).transpose(0, pos_dim).cuda() | ||||
|             z = torch.rand(19, SIZE, SIZE).transpose(0, pos_dim).cuda() | ||||
|  | ||||
|             res1 = torch.cat((x, y, z), dim) | ||||
|             self.assertEqual(res1.narrow(pos_dim, 0, 13), x, 0) | ||||
|             self.assertEqual(res1.narrow(pos_dim, 13, 17), y, 0) | ||||
|             self.assertEqual(res1.narrow(pos_dim, 30, 19), z, 0) | ||||
|  | ||||
|         x = torch.randn(20, SIZE, SIZE).cuda() | ||||
|         self.assertEqual(torch.cat(torch.split(x, 7)), x) | ||||
|         self.assertEqual(torch.cat(torch.chunk(x, 7)), x) | ||||
|  | ||||
|         y = torch.randn(1, SIZE, SIZE).cuda() | ||||
|         z = torch.cat([x, y]) | ||||
|         self.assertEqual(z.size(), (21, SIZE, SIZE)) | ||||
|  | ||||
|     def test_cat_bad_input_sizes(self): | ||||
|         x = torch.randn(2, 1).cuda() | ||||
|         y = torch.randn(2, 1, 1).cuda() | ||||
|         z = torch.randn(2, 1, 1).cuda() | ||||
|         self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z])) | ||||
|  | ||||
|         x = torch.randn(2, 1, 2).cuda() | ||||
|         y = torch.randn(2, 1, 1).cuda() | ||||
|         z = torch.randn(2, 2, 1).cuda() | ||||
|         self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z], dim=1)) | ||||
|  | ||||
|     def test_serialization(self): | ||||
|         x = torch.randn(4, 4).cuda() | ||||
|         with tempfile.NamedTemporaryFile() as f: | ||||
| @ -968,6 +1025,69 @@ class TestCuda(TestCase): | ||||
|     def test_tensor_scatterFill(self): | ||||
|         TestTorch._test_scatter_base(self, lambda t: t.cuda(), 'scatter_', True, test_bounds=False) | ||||
|  | ||||
|     def test_var(self): | ||||
|         cpu_tensor = torch.randn(2, 3, 3) | ||||
|         gpu_tensor = cpu_tensor.cuda() | ||||
|         self.assertEqual(gpu_tensor.var(), cpu_tensor.var()) | ||||
|         self.assertEqual(gpu_tensor.var(1), cpu_tensor.var(1)) | ||||
|         self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2)) | ||||
|         self.assertEqual(gpu_tensor.std(), cpu_tensor.std()) | ||||
|         self.assertEqual(gpu_tensor.std(1), cpu_tensor.std(1)) | ||||
|         self.assertEqual(gpu_tensor.var(2), cpu_tensor.var(2)) | ||||
|  | ||||
|         cpu_tensor = torch.randn(100) | ||||
|         gpu_tensor = cpu_tensor.cuda() | ||||
|         self.assertEqual(gpu_tensor.var(), cpu_tensor.var()) | ||||
|  | ||||
|     def test_var_unbiased(self): | ||||
|         tensor = torch.randn(100).cuda() | ||||
|         self.assertEqual(tensor.var(0), tensor.var(0, unbiased=True)) | ||||
|         self.assertEqual(tensor.var(), tensor.var(unbiased=True)) | ||||
|         self.assertEqual(tensor.var(unbiased=False), tensor.var(0, unbiased=False)[0]) | ||||
|  | ||||
|         tensor = torch.FloatTensor([1.0, 2.0]).cuda() | ||||
|         self.assertEqual(tensor.var(unbiased=True), 0.5) | ||||
|         self.assertEqual(tensor.var(unbiased=False), 0.25) | ||||
|  | ||||
|         tensor = torch.randn(100).cuda() | ||||
|         self.assertEqual(tensor.std(0), tensor.std(0, unbiased=True)) | ||||
|         self.assertEqual(tensor.std(), tensor.std(unbiased=True)) | ||||
|         self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0]) | ||||
|  | ||||
|     def test_var_large_input(self): | ||||
|         # Large, not-nice input | ||||
|         tensor_cpu = torch.randn(2 * 32 * 1024 + 1, 2, 67) | ||||
|         tensor_cuda = tensor_cpu.cuda() | ||||
|  | ||||
|         self.assertEqual(tensor_cpu.var(2), tensor_cuda.var(2).cpu()) | ||||
|  | ||||
|     def test_var_stability(self): | ||||
|         tensor = torch.FloatTensor([2281.5, 2281.25]).cuda() | ||||
|  | ||||
|         # Stability for inner dim | ||||
|         self.assertEqual(tensor.var(0)[0], 0.03125) | ||||
|  | ||||
|         # General stability | ||||
|         self.assertEqual(tensor.var(), 0.03125) | ||||
|  | ||||
|         # Stability for outer dimensions | ||||
|         tensor = tensor.unsqueeze(1) | ||||
|         self.assertEqual(tensor.var(0)[0], 0.03125) | ||||
|  | ||||
|     @unittest.skipIf(not HAS_MAGMA, "no MAGMA library detected") | ||||
|     def test_symeig(self): | ||||
|         # Small case | ||||
|         tensor = torch.randn(3, 3).cuda() | ||||
|         tensor = torch.mm(tensor, tensor.t()) | ||||
|         eigval, eigvec = torch.symeig(tensor, eigenvectors=True) | ||||
|         self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t())) | ||||
|  | ||||
|         # Large case | ||||
|         tensor = torch.randn(257, 257).cuda() | ||||
|         tensor = torch.mm(tensor, tensor.t()) | ||||
|         eigval, eigvec = torch.symeig(tensor, eigenvectors=True) | ||||
|         self.assertEqual(tensor, torch.mm(torch.mm(eigvec, eigval.diag()), eigvec.t())) | ||||
|  | ||||
|     def test_arange(self): | ||||
|         for t in ['IntTensor', 'LongTensor', 'FloatTensor', 'DoubleTensor']: | ||||
|             a = torch.cuda.__dict__[t]() | ||||
| @ -999,18 +1119,27 @@ if HAS_CUDA: | ||||
|         for t in types: | ||||
|             tensor = t() | ||||
|             gpu_tensor = get_gpu_type(t)() | ||||
|  | ||||
|             # Default values | ||||
|             desc = '' | ||||
|             type_subset = types | ||||
|             no_inplace = False | ||||
|             if len(decl) == 3: | ||||
|                 name, constr, arg_constr = decl | ||||
|                 desc = '' | ||||
|             elif len(decl) == 4: | ||||
|                 name, constr, arg_constr, desc = decl | ||||
|             elif len(decl) == 5: | ||||
|                 name, constr, arg_constr, desc, type_subset = decl | ||||
|                 if t not in type_subset: | ||||
|                     continue | ||||
|             elif len(decl) == 6: | ||||
|                 name, constr, arg_constr, desc, type_subset, no_inplace = decl | ||||
|  | ||||
|             if t not in type_subset: | ||||
|                 continue | ||||
|  | ||||
|             precision = custom_precision.get(name, TestCuda.precision) | ||||
|             for inplace in (True, False): | ||||
|                 if inplace and no_inplace: | ||||
|                     continue | ||||
|                 if inplace: | ||||
|                     name_inner = name + '_' | ||||
|                 else: | ||||
| @ -1027,7 +1156,15 @@ if HAS_CUDA: | ||||
|                     test_name += '_' + desc | ||||
|  | ||||
|                 assert not hasattr(TestCuda, test_name), "Duplicated test name: " + test_name | ||||
|                 setattr(TestCuda, test_name, compare_cpu_gpu(constr, arg_constr, name_inner, t, precision)) | ||||
|                 setattr(TestCuda, | ||||
|                         test_name, | ||||
|                         compare_cpu_gpu(constr, arg_constr, name_inner, t, precision)) | ||||
|                 if t == torch.FloatTensor: | ||||
|                     assert not hasattr(TestCuda, test_name + '_gpu_half'), "Duplicated test name: " + test_name | ||||
|                     setattr(TestCuda, | ||||
|                             test_name + '_gpu_half', | ||||
|                             compare_cpu_gpu(constr, arg_constr, name_inner, t, | ||||
|                                             precision, force_gpu_half=True)) | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|  | ||||
| @ -1,13 +1,46 @@ | ||||
| import math | ||||
| import sys | ||||
| import errno | ||||
| import os | ||||
| import ctypes | ||||
| import signal | ||||
| import torch | ||||
| import time | ||||
| import traceback | ||||
| import unittest | ||||
| from torch import multiprocessing | ||||
| from torch.utils.data import Dataset, TensorDataset, DataLoader, ConcatDataset | ||||
| from common import TestCase, run_tests, TEST_NUMPY | ||||
| from torch.utils.data.dataset import random_split | ||||
| from torch.utils.data.dataloader import default_collate, ExceptionWrapper | ||||
| from common import TestCase, run_tests, TEST_NUMPY, IS_WINDOWS | ||||
| from common_nn import TEST_CUDA | ||||
|  | ||||
|  | ||||
| JOIN_TIMEOUT = 17.0 if IS_WINDOWS else 4.5 | ||||
|  | ||||
|  | ||||
| class TestDatasetRandomSplit(TestCase): | ||||
|     def test_lengths_must_equal_datset_size(self): | ||||
|         with self.assertRaises(ValueError): | ||||
|             random_split([1, 2, 3, 4], [1, 2]) | ||||
|  | ||||
|     def test_splits_have_correct_size(self): | ||||
|         splits = random_split([1, 2, 3, 4, 5, 6], [2, 4]) | ||||
|         self.assertEqual(len(splits), 2) | ||||
|         self.assertEqual(len(splits[0]), 2) | ||||
|         self.assertEqual(len(splits[1]), 4) | ||||
|  | ||||
|     def test_splits_are_mutually_exclusive(self): | ||||
|         data = [5, 2, 3, 4, 1, 6] | ||||
|         splits = random_split(data, [2, 4]) | ||||
|         all_values = [] | ||||
|         all_values.extend(list(splits[0])) | ||||
|         all_values.extend(list(splits[1])) | ||||
|         data.sort() | ||||
|         all_values.sort() | ||||
|         self.assertListEqual(data, all_values) | ||||
|  | ||||
|  | ||||
| class TestTensorDataset(TestCase): | ||||
|  | ||||
|     def test_len(self): | ||||
| @ -73,6 +106,46 @@ class TestConcatDataset(TestCase): | ||||
|         self.assertEqual(0, (d3[0][0] - result[14][0]).abs().sum()) | ||||
|  | ||||
|  | ||||
| # Stores the first encountered exception in .exception. | ||||
| # Inspired by https://stackoverflow.com/a/33599967 | ||||
| class ErrorTrackingProcess(multiprocessing.Process): | ||||
|  | ||||
|     def __init__(self, *args, **kwargs): | ||||
|         super(ErrorTrackingProcess, self).__init__(*args, **kwargs) | ||||
|         self._pconn, self._cconn = multiprocessing.Pipe() | ||||
|         self._exception = None | ||||
|  | ||||
|     def run(self): | ||||
|         # Disable stderr printing from os level, and make workers not printing | ||||
|         # to stderr. | ||||
|         # Can't use sys.stderr.close, otherwise Python `raise` will error with | ||||
|         # ValueError: I/O operation on closed file. | ||||
|         os.close(sys.stderr.fileno()) | ||||
|         try: | ||||
|             super(ErrorTrackingProcess, self).run() | ||||
|             self._cconn.send(None) | ||||
|         except Exception as e: | ||||
|             self._cconn.send(ExceptionWrapper(sys.exc_info())) | ||||
|             raise | ||||
|  | ||||
|     @property | ||||
|     def exception(self): | ||||
|         if self._pconn.poll(): | ||||
|             self._exception = self._pconn.recv() | ||||
|         if self._exception is None: | ||||
|             return None | ||||
|         else: | ||||
|             return self._exception.exc_type(self._exception.exc_msg) | ||||
|  | ||||
|     # ESRCH means that os.kill can't finds alive proc | ||||
|     def send_signal(self, signum, ignore_ESRCH=False): | ||||
|         try: | ||||
|             os.kill(self.pid, signum) | ||||
|         except OSError as e: | ||||
|             if not ignore_ESRCH or e.errno != errno.ESRCH: | ||||
|                 raise | ||||
|  | ||||
|  | ||||
| class ErrorDataset(Dataset): | ||||
|  | ||||
|     def __init__(self, size): | ||||
| @ -82,6 +155,84 @@ class ErrorDataset(Dataset): | ||||
|         return self.size | ||||
|  | ||||
|  | ||||
| class SegfaultDataset(Dataset): | ||||
|  | ||||
|     def __init__(self, size): | ||||
|         self.size = size | ||||
|  | ||||
|     def __getitem__(self, idx): | ||||
|         return ctypes.string_at(0) | ||||
|  | ||||
|     def __len__(self): | ||||
|         return self.size | ||||
|  | ||||
|  | ||||
| class SleepDataset(Dataset): | ||||
|  | ||||
|     def __init__(self, size, sleep_sec): | ||||
|         self.size = size | ||||
|         self.sleep_sec = sleep_sec | ||||
|  | ||||
|     def __getitem__(self, idx): | ||||
|         time.sleep(self.sleep_sec) | ||||
|         return idx | ||||
|  | ||||
|     def __len__(self): | ||||
|         return self.size | ||||
|  | ||||
|  | ||||
| class SeedDataset(Dataset): | ||||
|  | ||||
|     def __init__(self, size): | ||||
|         self.size = size | ||||
|  | ||||
|     def __getitem__(self, idx): | ||||
|         return torch.initial_seed() | ||||
|  | ||||
|     def __len__(self): | ||||
|         return self.size | ||||
|  | ||||
|  | ||||
| # Inspired by https://stackoverflow.com/a/26703365 | ||||
| # This will ensure that each worker at least processes one data | ||||
| class SynchronizedSeedDataset(Dataset): | ||||
|  | ||||
|     def __init__(self, size, num_workers): | ||||
|         assert size >= num_workers | ||||
|         self.count = multiprocessing.Value('i', 0) | ||||
|         self.barrier = multiprocessing.Semaphore(0) | ||||
|         self.num_workers = num_workers | ||||
|         self.size = size | ||||
|  | ||||
|     def __getitem__(self, idx): | ||||
|         self.count.value += 1 | ||||
|         if self.count.value == self.num_workers: | ||||
|             self.barrier.release() | ||||
|         self.barrier.acquire() | ||||
|         self.barrier.release() | ||||
|         return torch.initial_seed() | ||||
|  | ||||
|     def __len__(self): | ||||
|         return self.size | ||||
|  | ||||
|  | ||||
| def _test_timeout(): | ||||
|     dataset = SleepDataset(10, 10) | ||||
|     dataloader = DataLoader(dataset, batch_size=2, num_workers=2, timeout=1) | ||||
|     _ = next(iter(dataloader)) | ||||
|  | ||||
|  | ||||
| def _test_segfault(): | ||||
|     dataset = SegfaultDataset(10) | ||||
|     dataloader = DataLoader(dataset, batch_size=2, num_workers=2) | ||||
|     _ = next(iter(dataloader)) | ||||
|  | ||||
|  | ||||
| # test custom init function | ||||
| def init_fn(worker_id): | ||||
|     torch.manual_seed(12345) | ||||
|  | ||||
|  | ||||
| class TestDataLoader(TestCase): | ||||
|  | ||||
|     def setUp(self): | ||||
| @ -148,6 +299,62 @@ class TestDataLoader(TestCase): | ||||
|             self.assertTrue(input.is_pinned()) | ||||
|             self.assertTrue(target.is_pinned()) | ||||
|  | ||||
|     def test_multiple_dataloaders(self): | ||||
|         loader1_it = iter(DataLoader(self.dataset, num_workers=1)) | ||||
|         loader2_it = iter(DataLoader(self.dataset, num_workers=2)) | ||||
|         next(loader1_it) | ||||
|         next(loader1_it) | ||||
|         next(loader2_it) | ||||
|         next(loader2_it) | ||||
|         next(loader1_it) | ||||
|         next(loader2_it) | ||||
|  | ||||
|     @unittest.skipIf(True, "flaky test") | ||||
|     def test_segfault(self): | ||||
|         p = ErrorTrackingProcess(target=_test_segfault) | ||||
|         p.start() | ||||
|         p.join(JOIN_TIMEOUT) | ||||
|         try: | ||||
|             self.assertFalse(p.is_alive()) | ||||
|             self.assertNotEqual(p.exitcode, 0) | ||||
|             if IS_WINDOWS: | ||||
|                 self.assertIsInstance(p.exception, OSError) | ||||
|                 self.assertRegex(str(p.exception), r'access violation reading ') | ||||
|             else: | ||||
|                 self.assertIsInstance(p.exception, RuntimeError) | ||||
|                 self.assertRegex(str(p.exception), r'DataLoader worker \(pid \d+\) is killed by signal: ') | ||||
|         finally: | ||||
|             p.terminate() | ||||
|  | ||||
|     def test_timeout(self): | ||||
|         p = ErrorTrackingProcess(target=_test_timeout) | ||||
|         p.start() | ||||
|         p.join(JOIN_TIMEOUT) | ||||
|         try: | ||||
|             self.assertFalse(p.is_alive()) | ||||
|             self.assertNotEqual(p.exitcode, 0) | ||||
|             self.assertIsInstance(p.exception, RuntimeError) | ||||
|             self.assertRegex(str(p.exception), r'DataLoader timed out after \d+ seconds') | ||||
|         finally: | ||||
|             p.terminate() | ||||
|  | ||||
|     def test_worker_seed(self): | ||||
|         num_workers = 6 | ||||
|         dataset = SynchronizedSeedDataset(num_workers, num_workers) | ||||
|         dataloader = DataLoader(dataset, batch_size=1, num_workers=num_workers) | ||||
|         seeds = set() | ||||
|         for batch in dataloader: | ||||
|             seeds.add(batch[0]) | ||||
|         self.assertEqual(len(seeds), num_workers) | ||||
|  | ||||
|     def test_worker_init_fn(self): | ||||
|         dataset = SeedDataset(4) | ||||
|         dataloader = DataLoader(dataset, batch_size=2, num_workers=2, | ||||
|                                 worker_init_fn=init_fn) | ||||
|         for batch in dataloader: | ||||
|             self.assertEqual(12345, batch[0]) | ||||
|             self.assertEqual(12345, batch[1]) | ||||
|  | ||||
|     def test_shuffle(self): | ||||
|         self._test_shuffle(DataLoader(self.dataset, shuffle=True)) | ||||
|  | ||||
| @ -223,17 +430,17 @@ class TestDataLoader(TestCase): | ||||
|         "check that workers exit even if the iterator is not exhausted" | ||||
|         loader = iter(DataLoader(self.dataset, batch_size=2, num_workers=4, pin_memory=True)) | ||||
|         workers = loader.workers | ||||
|         pin_thread = loader.pin_thread | ||||
|         worker_manager_thread = loader.worker_manager_thread | ||||
|         for i, sample in enumerate(loader): | ||||
|             if i == 3: | ||||
|                 break | ||||
|         del loader | ||||
|         for w in workers: | ||||
|             w.join(1.0)  # timeout of one second | ||||
|             w.join(JOIN_TIMEOUT) | ||||
|             self.assertFalse(w.is_alive(), 'subprocess not terminated') | ||||
|             self.assertEqual(w.exitcode, 0) | ||||
|         pin_thread.join(1.0) | ||||
|         self.assertFalse(pin_thread.is_alive()) | ||||
|         worker_manager_thread.join(JOIN_TIMEOUT) | ||||
|         self.assertFalse(worker_manager_thread.is_alive()) | ||||
|  | ||||
|     def test_len(self): | ||||
|         def check_len(dl, expected): | ||||
| @ -276,6 +483,23 @@ class TestDataLoader(TestCase): | ||||
|             batch = next(iter(loader)) | ||||
|             self.assertIsInstance(batch, tt) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_NUMPY, "numpy unavailable") | ||||
|     def test_default_colate_bad_numpy_types(self): | ||||
|         import numpy as np | ||||
|  | ||||
|         # Should be a no-op | ||||
|         arr = np.array(['a', 'b', 'c']) | ||||
|         default_collate(arr) | ||||
|  | ||||
|         arr = np.array([[['a', 'b', 'c']]]) | ||||
|         self.assertRaises(TypeError, lambda: default_collate(arr)) | ||||
|  | ||||
|         arr = np.array([object(), object(), object()]) | ||||
|         self.assertRaises(TypeError, lambda: default_collate(arr)) | ||||
|  | ||||
|         arr = np.array([[[object(), object(), object()]]]) | ||||
|         self.assertRaises(TypeError, lambda: default_collate(arr)) | ||||
|  | ||||
|  | ||||
| class StringDataset(Dataset): | ||||
|     def __init__(self): | ||||
|  | ||||
| @ -2,7 +2,7 @@ from common import TestCase, run_tests | ||||
| import math | ||||
| import torch | ||||
| from torch.autograd import Variable, gradcheck | ||||
| from torch.distributions import Bernoulli, Multinomial, Normal | ||||
| from torch.distributions import Bernoulli, Categorical, Normal | ||||
|  | ||||
|  | ||||
| class TestDistributions(TestCase): | ||||
| @ -47,22 +47,22 @@ class TestDistributions(TestCase): | ||||
|     def test_multinomial_1d(self): | ||||
|         p = Variable(torch.Tensor([0.1, 0.2, 0.3]), requires_grad=True) | ||||
|         # TODO: this should return a 0-dim tensor once we have Scalar support | ||||
|         self.assertEqual(Multinomial(p).sample().size(), (1,)) | ||||
|         self.assertEqual(Multinomial(p).sample_n(1).size(), (1, 1)) | ||||
|         self._gradcheck_log_prob(Multinomial, (p,)) | ||||
|         self.assertEqual(Categorical(p).sample().size(), (1,)) | ||||
|         self.assertEqual(Categorical(p).sample_n(1).size(), (1, 1)) | ||||
|         self._gradcheck_log_prob(Categorical, (p,)) | ||||
|  | ||||
|     def test_multinomial_2d(self): | ||||
|         probabilities = [[0.1, 0.2, 0.3], [0.5, 0.3, 0.2]] | ||||
|         p = Variable(torch.Tensor(probabilities), requires_grad=True) | ||||
|         self.assertEqual(Multinomial(p).sample().size(), (2,)) | ||||
|         self.assertEqual(Multinomial(p).sample_n(6).size(), (6, 2)) | ||||
|         self._gradcheck_log_prob(Multinomial, (p,)) | ||||
|         self.assertEqual(Categorical(p).sample().size(), (2,)) | ||||
|         self.assertEqual(Categorical(p).sample_n(6).size(), (6, 2)) | ||||
|         self._gradcheck_log_prob(Categorical, (p,)) | ||||
|  | ||||
|         def ref_log_prob(idx, val, log_prob): | ||||
|             sample_prob = p.data[idx][val] / p.data[idx].sum() | ||||
|             self.assertEqual(log_prob, math.log(sample_prob)) | ||||
|  | ||||
|         self._check_log_prob(Multinomial(p), ref_log_prob) | ||||
|         self._check_log_prob(Categorical(p), ref_log_prob) | ||||
|  | ||||
|     def test_normal(self): | ||||
|         mean = Variable(torch.randn(5, 5), requires_grad=True) | ||||
|  | ||||
| @ -15,6 +15,15 @@ try: | ||||
| except ImportError: | ||||
|     HAS_TORCHVISION = False | ||||
|  | ||||
| RUN_CUDA = torch.cuda.is_available() | ||||
| if torch.cuda.is_available(): | ||||
|     CUDA_VERSION = torch._C._cuda_getCompiledVersion() | ||||
|     for d in range(torch.cuda.device_count()): | ||||
|         major = torch.cuda.get_device_capability(d)[0] | ||||
|         if (CUDA_VERSION < 8000 and major >= 6) or (CUDA_VERSION < 9000 and major >= 7): | ||||
|             RUN_CUDA = False | ||||
|  | ||||
|  | ||||
| skipIfNoTorchVision = unittest.skipIf(not HAS_TORCHVISION, "no torchvision") | ||||
|  | ||||
|  | ||||
| @ -41,6 +50,12 @@ def LSTMCellC(*args, **kwargs): | ||||
| class TestJit(TestCase): | ||||
|     maxDiff = None | ||||
|  | ||||
|     def assertExpectedTrace(self, trace, *args, **kwargs): | ||||
|         torch._C._jit_pass_lint(trace) | ||||
|         torch._C._jit_pass_dce(trace) | ||||
|         torch._C._jit_pass_lint(trace) | ||||
|         self.assertExpected(str(trace), *args, **kwargs) | ||||
|  | ||||
|     def test_simple(self): | ||||
|         x = Variable(torch.Tensor([0.4]), requires_grad=True) | ||||
|         y = Variable(torch.Tensor([0.7]), requires_grad=True) | ||||
| @ -52,7 +67,64 @@ class TestJit(TestCase): | ||||
|         torch._C._jit_pass_lint(trace) | ||||
|         self.assertExpected(str(trace)) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA") | ||||
|     def test_scopes(self): | ||||
|         x = Variable(torch.Tensor([0.4]), requires_grad=True) | ||||
|         y = Variable(torch.Tensor([0.7]), requires_grad=True) | ||||
|  | ||||
|         def f(x, y): | ||||
|             out = x + y | ||||
|             with torch.jit.scope('Foo', out): | ||||
|                 out = x * out | ||||
|                 with torch.jit.scope('Bar', out): | ||||
|                     out = torch.tanh(out) | ||||
|                 out = torch.sigmoid(out) | ||||
|             return out | ||||
|  | ||||
|         trace, z = torch.jit.trace(f, (x, y), nderivs=0) | ||||
|         torch._C._jit_pass_lint(trace) | ||||
|         self.assertExpected(str(trace)) | ||||
|  | ||||
|     def test_scopes_intermediate_node(self): | ||||
|  | ||||
|         class Net(nn.Module): | ||||
|             def forward(self, x): | ||||
|                 return F.log_softmax(x, dim=0) | ||||
|  | ||||
|         net = Net() | ||||
|         t = Variable(torch.ones(2), requires_grad=True) | ||||
|         trace, _ = torch.jit.trace(net, (t, )) | ||||
|         torch.onnx._optimize_trace(trace) | ||||
|  | ||||
|         self.assertExpectedTrace(trace) | ||||
|  | ||||
|     def test_scopes_identity_node(self): | ||||
|  | ||||
|         class Net(nn.Module): | ||||
|  | ||||
|             def __init__(self): | ||||
|                 super(Net, self).__init__() | ||||
|                 self.features = nn.Sequential( | ||||
|                     nn.Conv2d(3, 64, kernel_size=11, stride=4, padding=2), | ||||
|                     nn.ReLU(inplace=True), | ||||
|                     nn.MaxPool2d(kernel_size=3, stride=2), | ||||
|                 ) | ||||
|  | ||||
|             def forward(self, x): | ||||
|                 x = self.features(x) | ||||
|                 return x | ||||
|  | ||||
|         model = Net() | ||||
|  | ||||
|         t = Variable(torch.ones(1, 3, 227, 227), requires_grad=True) | ||||
|  | ||||
|         with torch.onnx.set_training(model, False): | ||||
|             trace, _ = torch.jit.trace(model, (t, )) | ||||
|  | ||||
|         torch.onnx._optimize_trace(trace) | ||||
|  | ||||
|         self.assertExpectedTrace(trace) | ||||
|  | ||||
|     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") | ||||
|     def test_lstm_fusion(self): | ||||
|         input = Variable(torch.randn(3, 10).cuda()) | ||||
|         hx = Variable(torch.randn(3, 20).cuda()) | ||||
| @ -65,7 +137,7 @@ class TestJit(TestCase): | ||||
|         torch._C._jit_pass_lint(trace) | ||||
|         self.assertExpected(str(trace)) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA") | ||||
|     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") | ||||
|     def test_run_lstm_fusion(self): | ||||
|         input = Variable(torch.randn(3, 10).cuda()) | ||||
|         hx = Variable(torch.randn(3, 20).cuda()) | ||||
| @ -78,7 +150,7 @@ class TestJit(TestCase): | ||||
|         z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True) | ||||
|         self.assertEqual(z, z2) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA") | ||||
|     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") | ||||
|     def test_run_lstm_fusion_concat(self): | ||||
|         input = Variable(torch.randn(3, 10).cuda()) | ||||
|         hx = Variable(torch.randn(3, 20).cuda()) | ||||
| @ -91,7 +163,7 @@ class TestJit(TestCase): | ||||
|         z2 = CompiledLSTMCell(input, (hx, cx), *module.parameters(), _assert_compiled=True) | ||||
|         self.assertEqual(z, z2) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA") | ||||
|     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") | ||||
|     def test_concat_fusion(self): | ||||
|         hx = Variable(torch.randn(3, 20).cuda()) | ||||
|         cx = Variable(torch.randn(3, 20).cuda()) | ||||
| @ -105,7 +177,7 @@ class TestJit(TestCase): | ||||
|         torch._C._jit_pass_lint(trace) | ||||
|         self.assertExpected(str(trace)) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA") | ||||
|     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") | ||||
|     def test_fusion_distribute(self): | ||||
|         def f(x, y): | ||||
|             z1, z2 = (x + y).chunk(2, dim=1) | ||||
| @ -146,7 +218,7 @@ class TestJit(TestCase): | ||||
|         self.assertEqual(z, torch.sigmoid(torch.tanh(x * (x + y)))) | ||||
|         self.assertEqual(z, z2) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "fuser requires CUDA") | ||||
|     @unittest.skipIf(not RUN_CUDA, "fuser requires CUDA") | ||||
|     def test_compile_addc(self): | ||||
|         x = Variable(torch.Tensor([0.4]), requires_grad=True).cuda() | ||||
|         y = Variable(torch.Tensor([0.7]), requires_grad=True).cuda() | ||||
| @ -613,7 +685,7 @@ class TestJit(TestCase): | ||||
|         assert(torch.equal(torch.ones([2, 2]), t_node.t("a"))) | ||||
|         self.assertExpected(str(g2)) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), "cpp tests require CUDA") | ||||
|     @unittest.skipIf(not RUN_CUDA, "cpp tests require CUDA") | ||||
|     def test_cpp(self): | ||||
|         torch._C._jit_run_cpp_tests() | ||||
|  | ||||
|  | ||||
| @ -11,14 +11,15 @@ import torch.cuda | ||||
| import torch.multiprocessing as mp | ||||
| from torch.autograd import Variable | ||||
| from torch.nn import Parameter | ||||
| from common import TestCase, run_tests | ||||
| from common import TestCase, run_tests, IS_WINDOWS | ||||
|  | ||||
|  | ||||
| TEST_REPEATS = 30 | ||||
| HAS_SHM_FILES = os.path.isdir('/dev/shm') | ||||
| TEST_CUDA_IPC = torch.cuda.is_available() and \ | ||||
|     sys.version_info[0] == 3 and \ | ||||
|     sys.platform != 'darwin' | ||||
|     sys.platform != 'darwin' and \ | ||||
|     sys.platform != 'win32' | ||||
| TEST_MULTIGPU = TEST_CUDA_IPC and torch.cuda.device_count() > 1 | ||||
|  | ||||
|  | ||||
| @ -318,6 +319,7 @@ class TestMultiprocessing(TestCase): | ||||
|             self.assertEqual(tensor_size, 5) | ||||
|             self.assertEqual(storage_size, 5) | ||||
|  | ||||
|     @unittest.skipIf(IS_WINDOWS, 'not applicable to Windows (only fails with fork)') | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), 'CUDA not available') | ||||
|     def test_cuda_bad_call(self): | ||||
|         # Initialize CUDA | ||||
|  | ||||
							
								
								
									
										416
									
								
								test/test_nn.py
									
									
									
									
									
								
							
							
						
						
									
										416
									
								
								test/test_nn.py
									
									
									
									
									
								
							| @ -27,7 +27,7 @@ from torch.nn import Parameter | ||||
| from torch.nn.parallel._functions import Broadcast | ||||
| from common_nn import NNTestCase, ModuleTest, CriterionTest, TestBase, \ | ||||
|     module_tests, criterion_tests, TEST_CUDA, TEST_MULTIGPU, TEST_CUDNN, \ | ||||
|     TEST_CUDNN_VERSION, loss_reference_fns | ||||
|     TEST_CUDNN_VERSION, loss_reference_fns, get_size_average | ||||
| from common import freeze_rng_state, run_tests, TestCase, skipIfNoLapack, \ | ||||
|     TEST_SCIPY, download_file | ||||
|  | ||||
| @ -934,6 +934,12 @@ class TestNN(NNTestCase): | ||||
|         self.assertEqual(output[0][0].sum().data[0], 0) | ||||
|         self.assertEqual(output[1][2].sum().data[0], 0) | ||||
|  | ||||
|         embedding = nn.Embedding(10, 20, padding_idx=0, sparse=True) | ||||
|         input = Variable(torch.LongTensor([[0, 2, 4, 5], [4, 3, 0, 9]])) | ||||
|         output = embedding(input) | ||||
|         self.assertEqual(output[0][0].sum().data[0], 0) | ||||
|         self.assertEqual(output[1][2].sum().data[0], 0) | ||||
|  | ||||
|     def test_embedding_max_norm(self): | ||||
|         embedding = nn.Embedding(22, 5, max_norm=1.0) | ||||
|         input = Variable(torch.LongTensor([2, 8, 8, 6])) | ||||
| @ -1060,6 +1066,26 @@ class TestNN(NNTestCase): | ||||
|         offset[-1] = 100 | ||||
|         self.assertRaises(ValueError, lambda: es(input.view(-1), offset)) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") | ||||
|     def test_pool3d_size_one_feature_dim(self): | ||||
|         # Tests crazy strides for feature dim of size 1 | ||||
|         x = torch.randn(7, 1, 5, 3, 2).cuda() | ||||
|         strange_strides = (30, 1234, 6, 2, 1) | ||||
|         y = x.new().set_(x.storage(), x.storage_offset(), x.size(), strange_strides) | ||||
|         x = x.cpu().set_(x.cpu().storage(), x.storage_offset(), x.size(), strange_strides) | ||||
|         x, y = Variable(x), Variable(y) | ||||
|  | ||||
|         to_test = { | ||||
|             'max_pool3d': lambda t: F.max_pool3d(t, (5, 1, 1), stride=(5, 1, 1)), | ||||
|             'avg_pool3d': lambda t: F.avg_pool3d(t, (5, 1, 1), stride=(5, 1, 1)), | ||||
|         } | ||||
|  | ||||
|         for test, fn in to_test.items(): | ||||
|             # Should not crash | ||||
|             out_y = fn(y) | ||||
|             out_x = fn(x) | ||||
|             self.assertEqual(out_y, out_x.cuda(), test) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") | ||||
|     def test_AvgPool3d_backward_after_cat_dim1_cuda(self): | ||||
|         # x has to have batch_size 1 to test contiguous checks | ||||
| @ -1609,6 +1635,60 @@ class TestNN(NNTestCase): | ||||
|         self.assertEqual(out.get_device(), 0) | ||||
|         self.assertEqual(out.data, expected_out) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") | ||||
|     def test_data_parallel_module_kwargs_only_empty_list(self): | ||||
|         class Net(nn.Module): | ||||
|             def __init__(self): | ||||
|                 super(Net, self).__init__() | ||||
|                 self.l = l | ||||
|  | ||||
|             def forward(self, input): | ||||
|                 return self.l(input['data']) | ||||
|  | ||||
|         l = nn.Linear(10, 5).float().cuda() | ||||
|         i = Variable(torch.randn(20, 10).float().cuda()) | ||||
|         expected_out = l(i).data | ||||
|         n = nn.DataParallel(Net()) | ||||
|         out = n(input={'data': i, 'unused': []}) | ||||
|         self.assertEqual(out.get_device(), 0) | ||||
|         self.assertEqual(out.data, expected_out) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") | ||||
|     def test_data_parallel_module_kwargs_only_empty_dict(self): | ||||
|         class Net(nn.Module): | ||||
|             def __init__(self): | ||||
|                 super(Net, self).__init__() | ||||
|                 self.l = l | ||||
|  | ||||
|             def forward(self, input): | ||||
|                 return self.l(input['data']) | ||||
|  | ||||
|         l = nn.Linear(10, 5).float().cuda() | ||||
|         i = Variable(torch.randn(20, 10).float().cuda()) | ||||
|         expected_out = l(i).data | ||||
|         n = nn.DataParallel(Net()) | ||||
|         out = n(input={'data': i, 'unused': {}}) | ||||
|         self.assertEqual(out.get_device(), 0) | ||||
|         self.assertEqual(out.data, expected_out) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDA, "CUDA unavailable") | ||||
|     def test_data_parallel_module_kwargs_only_empty_tuple(self): | ||||
|         class Net(nn.Module): | ||||
|             def __init__(self): | ||||
|                 super(Net, self).__init__() | ||||
|                 self.l = l | ||||
|  | ||||
|             def forward(self, input): | ||||
|                 return self.l(input['data']) | ||||
|  | ||||
|         l = nn.Linear(10, 5).float().cuda() | ||||
|         i = Variable(torch.randn(20, 10).float().cuda()) | ||||
|         expected_out = l(i).data | ||||
|         n = nn.DataParallel(Net()) | ||||
|         out = n(input={'data': i, 'unused': ()}) | ||||
|         self.assertEqual(out.get_device(), 0) | ||||
|         self.assertEqual(out.data, expected_out) | ||||
|  | ||||
|     def test_state_dict(self): | ||||
|         l = nn.Linear(5, 5) | ||||
|         block = nn.Module() | ||||
| @ -1909,6 +1989,32 @@ class TestNN(NNTestCase): | ||||
|                 input = Variable(torch.Tensor(torch.Size((3, ) * dims))) | ||||
|                 self.assertRaises(ValueError, lambda: module(input)) | ||||
|  | ||||
|     def test_conv_shapecheck(self): | ||||
|         def test(should_raise, module, input_size): | ||||
|             input = Variable(torch.Tensor(3, *input_size)) | ||||
|             if should_raise: | ||||
|                 self.assertRaises(RuntimeError, lambda: module(input)) | ||||
|             else: | ||||
|                 # just run it to ensure no exception raised. | ||||
|                 module(input) | ||||
|  | ||||
|         # Conv1d | ||||
|         test(True, nn.Conv1d(1, 1, 3), (1, 2)) | ||||
|         test(True, nn.Conv1d(1, 1, 3, stride=2), (1, 2)) | ||||
|         test(False, nn.Conv1d(1, 1, 2), (1, 2)) | ||||
|         test(False, nn.Conv1d(1, 1, 2, stride=2), (1, 2)) | ||||
|         test(False, nn.Conv1d(1, 1, 3, stride=2, padding=1), (1, 2)) | ||||
|  | ||||
|         # Conv2d | ||||
|         test(True, nn.Conv2d(1, 1, (3, 3)), (1, 2, 2)) | ||||
|         test(False, nn.Conv2d(1, 1, (3, 3)), (1, 3, 3)) | ||||
|         test(False, nn.Conv2d(1, 1, (3, 3), padding=1), (1, 2, 2)) | ||||
|  | ||||
|         # Conv3D | ||||
|         test(True, nn.Conv3d(1, 1, (3, 3, 3)), (1, 2, 2, 2)) | ||||
|         test(False, nn.Conv3d(1, 1, (3, 3, 3)), (1, 3, 3, 3)) | ||||
|         test(False, nn.Conv3d(1, 1, (3, 3, 3), padding=1), (1, 2, 2, 2)) | ||||
|  | ||||
|     def test_ConvTranspose2d_output_size(self): | ||||
|         m = nn.ConvTranspose2d(3, 4, 3, 3, 0, 2) | ||||
|         i = Variable(torch.randn(2, 3, 6, 6)) | ||||
| @ -2249,6 +2355,38 @@ class TestNN(NNTestCase): | ||||
|             weight_data[:] = 4 | ||||
|             self.assertEqual(weight_data, all_vars[4].data) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDNN, 'CUDNN not available') | ||||
|     def test_cudnn_weight_tying(self): | ||||
|         rnns = [ | ||||
|             nn.LSTM(10, 20, batch_first=True, bidirectional=True), | ||||
|             nn.GRU(10, 20, batch_first=True, bidirectional=True), | ||||
|             nn.RNN(10, 20, batch_first=True, bidirectional=True) | ||||
|         ] | ||||
|         for rnn in rnns: | ||||
|             rnn.bias_ih_l0_reverse = rnn.bias_ih_l0 | ||||
|             rnn.cuda() | ||||
|             input = Variable(torch.randn(5, 4, 10).cuda(), requires_grad=True) | ||||
|             hx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True) | ||||
|             all_vars = [input, hx] + list(rnn.parameters()) | ||||
|             opt = torch.optim.SGD(rnn.parameters(), lr=0.1) | ||||
|             opt.zero_grad() | ||||
|             if isinstance(rnn, nn.LSTM): | ||||
|                 cx = Variable(torch.randn(2, 5, 20).cuda(), requires_grad=True) | ||||
|                 all_vars[2:2] = [cx] | ||||
|                 hx = (hx, cx) | ||||
|  | ||||
|             with warnings.catch_warnings(record=True) as w: | ||||
|                 output = rnn(input, hx) | ||||
|             output[0].sum().backward() | ||||
|  | ||||
|             opt.step() | ||||
|             with warnings.catch_warnings(record=True) as w: | ||||
|                 output_cuda = rnn(input, hx) | ||||
|             rnn.cpu() | ||||
|             hx = (hx[0].cpu(), hx[1].cpu()) if isinstance(rnn, nn.LSTM) else hx.cpu() | ||||
|             output_cpu = rnn(input.cpu(), hx) | ||||
|             self.assertEqual(output_cuda, output_cpu) | ||||
|  | ||||
|     @unittest.skipIf(not TEST_CUDA, 'CUDA not available') | ||||
|     def test_cuda_rnn_fused(self): | ||||
|         def copy_rnn(rnn1, rnn2): | ||||
| @ -2318,6 +2456,69 @@ class TestNN(NNTestCase): | ||||
|         finally: | ||||
|             torch.backends.cudnn.enabled = prev | ||||
|  | ||||
|     def test_rnn_args_check(self): | ||||
|         input_size = 3 | ||||
|         hidden_size = 5 | ||||
|         num_layers = 2 | ||||
|         batch_size = 4 | ||||
|         seq_len = 6 | ||||
|         num_directions = 1 | ||||
|  | ||||
|         def test(input_shape, hidden_shape, mode): | ||||
|             for input, hidden in get_inputs(input_shape, hidden_shape, mode): | ||||
|                 model = getattr(nn, mode)(input_size, hidden_size, num_layers) | ||||
|                 self.assertRaises(RuntimeError, lambda: model(input, hidden)) | ||||
|  | ||||
|         correct_input_shape = (seq_len, batch_size, input_size) | ||||
|         correct_hidden_shape = (num_layers * num_directions, batch_size, hidden_size) | ||||
|  | ||||
|         def update_tuple(tup, dim, delta): | ||||
|             new_tup = list(tup) | ||||
|             new_tup[dim] = delta | ||||
|             return tuple(new_tup) | ||||
|  | ||||
|         def get_inputs(input_shape, hidden_shape, mode): | ||||
|             '''returns list( tuple(input, hidden) ) | ||||
|             where input, hidden are inputs to a model''' | ||||
|             input = Variable(torch.randn(input_shape)) | ||||
|             hidden = Variable(torch.randn(hidden_shape)) | ||||
|             if mode is not 'LSTM': | ||||
|                 return [(input, hidden)] | ||||
|             if hidden_shape == correct_hidden_shape: | ||||
|                 return [(input, (hidden, hidden))] | ||||
|             good_hidden = Variable(torch.randn(correct_hidden_shape)) | ||||
|             return [ | ||||
|                 (input, (hidden, good_hidden)), | ||||
|                 (input, (good_hidden, hidden)), | ||||
|             ] | ||||
|  | ||||
|         rnn_modes = ['RNN', 'GRU', 'LSTM'] | ||||
|         for mode in rnn_modes: | ||||
|             # Incorrect input batch size | ||||
|             input_shape = update_tuple(correct_input_shape, 1, -1) | ||||
|             hidden_shape = correct_hidden_shape | ||||
|             test(input_shape, hidden_shape, mode) | ||||
|  | ||||
|             # Incorrect hidden batch size | ||||
|             input_shape = correct_input_shape | ||||
|             hidden_shape = update_tuple(correct_hidden_shape, 1, -1) | ||||
|             test(input_shape, hidden_shape, mode) | ||||
|  | ||||
|             # Incorrect input size | ||||
|             input_shape = update_tuple(correct_input_shape, 2, -1) | ||||
|             hidden_shape = correct_hidden_shape | ||||
|             test(input_shape, hidden_shape, mode) | ||||
|  | ||||
|             # Incorrect hidden size | ||||
|             input_shape = correct_input_shape | ||||
|             hidden_shape = update_tuple(correct_hidden_shape, 2, -1) | ||||
|             test(input_shape, hidden_shape, mode) | ||||
|  | ||||
|             # Incorrect hidden[0] | ||||
|             input_shape = correct_input_shape | ||||
|             hidden_shape = update_tuple(correct_hidden_shape, 0, -1) | ||||
|             test(input_shape, hidden_shape, mode) | ||||
|  | ||||
|     def test_rnn_initial_hidden_state(self): | ||||
|         rnn_modes = ['RNN', 'GRU', 'LSTM'] | ||||
|         for mode in rnn_modes: | ||||
| @ -2759,6 +2960,26 @@ class TestNN(NNTestCase): | ||||
|  | ||||
|         self.assertEqual(out1, out2) | ||||
|  | ||||
|     def test_elu_inplace_gradgrad(self): | ||||
|         v = Variable(torch.randn(8), requires_grad=True) | ||||
|  | ||||
|         def func(root): | ||||
|             x = root.clone() | ||||
|             return F.elu(x, inplace=True) | ||||
|  | ||||
|         gradcheck(func, [v]) | ||||
|         gradgradcheck(func, [v]) | ||||
|  | ||||
|     def test_hardtanh_inplace_gradgrad(self): | ||||
|         v = Variable(torch.randn(8), requires_grad=True) | ||||
|  | ||||
|         def func(root): | ||||
|             x = root.clone() | ||||
|             return F.hardtanh(x, inplace=True) | ||||
|  | ||||
|         gradcheck(func, [v]) | ||||
|         gradgradcheck(func, [v]) | ||||
|  | ||||
|     def test_batchnorm_raises_error_if_running_mean_is_not_same_size_as_input(self): | ||||
|         input = Variable(torch.rand(2, 10)) | ||||
|         running_var = torch.rand(10) | ||||
| @ -2844,39 +3065,25 @@ class TestNN(NNTestCase): | ||||
|         self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=0), (input1, input2))) | ||||
|         self.assertTrue(gradcheck(lambda x, y: F.cosine_similarity(x, y, dim=-1), (input1, input2))) | ||||
|  | ||||
|         # Check cosine_similarity input/output shapes | ||||
|         input_size = (1, 3, 2, 1) | ||||
|         expected_size = (1, 2, 1) | ||||
|         input1 = Variable(torch.randn(input_size), requires_grad=True) | ||||
|         input2 = Variable(torch.randn(input_size), requires_grad=True) | ||||
|         self.assertEqual(F.cosine_similarity(input1, input2, dim=1).size(), expected_size) | ||||
|  | ||||
|     def test_grid_sample(self): | ||||
|         # test known input on CPU | ||||
|         input = Variable(torch.arange(1, 11).view(1, 1, 2, 5)) | ||||
|         grid = Variable(torch.Tensor( | ||||
|             [[-1, -0.5, 0, 0.2, 1], | ||||
|              [-1, -0.333, 0, 0.5, 1], | ||||
|              [-1, -0.5, 0, 0.3333, 1], | ||||
|              [-1, -0.2, 0, 0.2, 1]]).view(1, 2, 5, 2)) | ||||
|         output = F.grid_sample(input, grid) | ||||
|         groundtruth = torch.Tensor( | ||||
|             [[2.2500, 6.0000000000, 5.0000, 4.8340, 9.0000], | ||||
|              [2.2500, 6.333250045, 5.0000, 5.1000, 8.4000]]).view(1, 1, 2, 5) | ||||
|         self.assertEqual(output.data, groundtruth) | ||||
|         def test_cpu_against_cuda(N, C, H, W, padding_mode): | ||||
|             def test_shape(N, C, IH, IW, H, W, padding_mode): | ||||
|  | ||||
|         # do gradcheck | ||||
|         N = random.randint(1, 8) | ||||
|         C = random.randint(1, 8) | ||||
|         H = random.randint(1, 8) | ||||
|         W = random.randint(1, 8) | ||||
|         input = Variable(torch.randn(N, C, H, W), requires_grad=True) | ||||
|         grid = Variable(torch.randn(N, H, W, 2), requires_grad=True) | ||||
|         self.assertTrue(gradcheck(lambda inp, grid: F.grid_sample(inp, grid), (input, grid))) | ||||
|  | ||||
|         def test_cpu_against_cuda(N, C, H, W): | ||||
|             def test_shape(N, C, IH, IW, H, W): | ||||
|                 input_cpu = Variable(torch.randn(C, N, IH, IW).transpose(0, 1), requires_grad=True) | ||||
|                 grid_cpu = Variable(torch.randn(H, N, W, 2).transpose(0, 1), requires_grad=True) | ||||
|                 out_cpu = F.grid_sample(input_cpu, grid_cpu) | ||||
|                 out_cpu = F.grid_sample(input_cpu, grid_cpu, padding_mode=padding_mode) | ||||
|                 self.assertTrue(out_cpu.size() == torch.Size([N, C, H, W])) | ||||
|  | ||||
|                 input_cuda = Variable(input_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True) | ||||
|                 grid_cuda = Variable(grid_cpu.data.transpose(0, 1).cuda().transpose(0, 1), requires_grad=True) | ||||
|                 out_cuda = F.grid_sample(input_cuda, grid_cuda) | ||||
|                 out_cuda = F.grid_sample(input_cuda, grid_cuda, padding_mode=padding_mode) | ||||
|                 self.assertEqual(out_cpu, out_cuda) | ||||
|  | ||||
|                 gradients = out_cpu.data.new(out_cpu.size()).normal_() | ||||
| @ -2889,15 +3096,15 @@ class TestNN(NNTestCase): | ||||
|                 base_input = torch.randn(C, IH, IW) | ||||
|                 input_cpu = Variable(base_input.expand(input_cuda.size()), requires_grad=True) | ||||
|                 grid_cpu = Variable(torch.randn(N, H, W, 2), requires_grad=True) | ||||
|                 out_cpu = F.grid_sample(input_cpu, grid_cpu) | ||||
|                 out_cpu = F.grid_sample(input_cpu, grid_cpu, padding_mode=padding_mode) | ||||
|  | ||||
|                 input_cuda = Variable(base_input.cuda().expand(input_cuda.size()), requires_grad=True) | ||||
|                 grid_cuda = Variable(grid_cpu.data.cuda(), requires_grad=True) | ||||
|                 out_cuda = F.grid_sample(input_cuda, grid_cuda) | ||||
|                 out_cuda = F.grid_sample(input_cuda, grid_cuda, padding_mode=padding_mode) | ||||
|                 self.assertEqual(out_cpu, out_cuda) | ||||
|  | ||||
|             # test same size output | ||||
|             test_shape(N, C, H, W, H, W) | ||||
|             test_shape(N, C, H, W, H, W, padding_mode) | ||||
|  | ||||
|             # test larger output | ||||
|             N = random.randint(1, 8) | ||||
| @ -2906,7 +3113,7 @@ class TestNN(NNTestCase): | ||||
|             IW = random.randint(1, 8) | ||||
|             H = random.randint(IH + 1, 12) | ||||
|             W = random.randint(IH + 1, 12) | ||||
|             test_shape(N, C, IH, IW, H, W) | ||||
|             test_shape(N, C, IH, IW, H, W, padding_mode) | ||||
|  | ||||
|             # test smaller output | ||||
|             N = random.randint(1, 8) | ||||
| @ -2915,21 +3122,44 @@ class TestNN(NNTestCase): | ||||
|             IW = random.randint(1, 8) | ||||
|             H = random.randint(1, IH) | ||||
|             W = random.randint(1, IW) | ||||
|             test_shape(N, C, IH, IW, H, W) | ||||
|             test_shape(N, C, IH, IW, H, W, padding_mode) | ||||
|  | ||||
|         # test CUDNN against CPU | ||||
|         if TEST_CUDNN: | ||||
|             test_cpu_against_cuda(N, C, H, W) | ||||
|         # test known input on CPU | ||||
|         for padding_mode in ['zeros', 'border']: | ||||
|  | ||||
|         # test CUDA (without CUDNN) against CPU | ||||
|         if TEST_CUDA: | ||||
|             input = Variable(torch.arange(1, 11).view(1, 1, 2, 5)) | ||||
|             grid = Variable(torch.Tensor( | ||||
|                 [[-0.9, -1.4, 0, 0.2, 1], | ||||
|                  [-1, -0.333, 0, 0.5, 1], | ||||
|                  [-1, -0.5, 0, 0.3333, 1], | ||||
|                  [-1, -0.2, 0, 1.1, 0.5]]).view(1, 2, 5, 2)) | ||||
|             output = F.grid_sample(input, grid, padding_mode=padding_mode) | ||||
|  | ||||
|             # GridSampler will automatically use CUDNN if it is available | ||||
|             # so we disable CUDNN temporarily | ||||
|             original_cudnn_enabled = cudnn.enabled | ||||
|             cudnn.enabled = False | ||||
|             test_cpu_against_cuda(N, C, H, W) | ||||
|             cudnn.enabled = original_cudnn_enabled | ||||
|             if padding_mode == 'zeros': | ||||
|                 groundtruth = torch.Tensor( | ||||
|                     [[0.9600, 6.0000000000, 5.0000, 4.8340, 9.0000], | ||||
|                      [2.2500, 6.333250045, 5.0000, 5.1000, 7.0000]]).view(1, 1, 2, 5) | ||||
|             else: | ||||
|                 groundtruth = torch.Tensor( | ||||
|                     [[1.2000, 6.0000000000, 5.0000, 4.8340, 9.0000], | ||||
|                      [2.2500, 6.333250045, 5.0000, 5.1000, 8.7500]]).view(1, 1, 2, 5) | ||||
|  | ||||
|             self.assertEqual(output.data, groundtruth) | ||||
|  | ||||
|             # do gradcheck | ||||
|             N = random.randint(1, 8) | ||||
|             C = random.randint(1, 8) | ||||
|             H = random.randint(1, 8) | ||||
|             W = random.randint(1, 8) | ||||
|             input = Variable(torch.randn(N, C, H, W), requires_grad=True) | ||||
|             grid = Variable(torch.randn(N, H, W, 2), requires_grad=True) | ||||
|             self.assertTrue(gradcheck( | ||||
|                 lambda inp, grid: F.grid_sample(inp, grid, padding_mode=padding_mode), | ||||
|                 (input, grid))) | ||||
|  | ||||
|             # test CUDA against CPU | ||||
|             if TEST_CUDA: | ||||
|                 test_cpu_against_cuda(N, C, H, W, padding_mode) | ||||
|  | ||||
|     def test_affine_grid(self): | ||||
|         # test known input on CPU | ||||
| @ -3637,22 +3867,62 @@ new_criterion_tests = [ | ||||
|         target_fn=lambda: torch.randn(15, 10).gt(0).double(), | ||||
|         desc='weights' | ||||
|     ), | ||||
|     dict( | ||||
|         module_name='NLLLoss', | ||||
|         input_size=(2, 3, 5, 5, 2, 2), | ||||
|         target_fn=lambda: torch.rand(2, 5, 5, 2, 2).mul(3).floor().long(), | ||||
|         reference_fn=lambda i, t, m: | ||||
|             loss_reference_fns['NLLLossNd'](i, t, size_average=get_size_average(m)), | ||||
|         check_no_size_average=True, | ||||
|         desc='higher_dim' | ||||
|     ), | ||||
|     dict( | ||||
|         module_name='NLLLoss', | ||||
|         input_size=(2, 3, 5), | ||||
|         target_fn=lambda: torch.rand(2, 5).mul(3).floor().long(), | ||||
|         reference_fn=lambda i, t, m: | ||||
|             loss_reference_fns['NLLLossNd'](i, t, size_average=get_size_average(m)), | ||||
|         check_no_size_average=True, | ||||
|         desc='dim_is_3' | ||||
|     ), | ||||
|     dict( | ||||
|         module_name='PoissonNLLLoss', | ||||
|         input_size=(2, 3, 4, 5), | ||||
|         target_fn=lambda: torch.randn(2, 3, 4, 5).floor_().abs_(), | ||||
|         desc='reduced_loss', | ||||
|         desc='no_full_loss',  # without sterling approx | ||||
|     ), | ||||
|     dict( | ||||
|         module_name='PoissonNLLLoss', | ||||
|         constructor_args=(False, True, True), | ||||
|         input_fn=lambda: torch.randn(2, 3, 4, 5).abs_().add_(0.001), | ||||
|         target_fn=lambda: torch.randn(2, 3, 4, 5).floor_().abs_(), | ||||
|         desc='full_loss', | ||||
|         desc='full_loss',  # with sterling approx | ||||
|     ), | ||||
| ] | ||||
|  | ||||
|  | ||||
| def poissonnllloss_no_reduce_test(): | ||||
|     t = Variable(torch.randn(10, 10)) | ||||
|     return dict( | ||||
|         fullname='PoissonNLLLLoss_no_reduce', | ||||
|         constructor=wrap_functional( | ||||
|             lambda i: F.poisson_nll_loss(i, t.type_as(i), reduce=False)), | ||||
|         input_fn=lambda: torch.rand(10, 10), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| def kldivloss_no_reduce_test(): | ||||
|     t = Variable(torch.randn(10, 10)) | ||||
|     return dict( | ||||
|         fullname='KLDivLoss_no_reduce', | ||||
|         constructor=wrap_functional( | ||||
|             lambda i: F.kl_div(i, t.type_as(i), reduce=False)), | ||||
|         input_fn=lambda: torch.rand(10, 10).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['KLDivLoss'](i, t.data.type_as(i), reduce=False), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| def l1loss_no_reduce_test(): | ||||
|     t = Variable(torch.randn(2, 3, 4)) | ||||
|     return dict( | ||||
| @ -3764,7 +4034,7 @@ def nllloss2d_no_reduce_test(): | ||||
|             lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), | ||||
|         input_fn=lambda: torch.rand(2, 3, 5, 5).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['NLLLoss2d'](i, t.type_as(i).long(), **kwargs), | ||||
|             loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| @ -3777,7 +4047,7 @@ def nllloss2d_no_reduce_ignore_index_test(): | ||||
|             lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), | ||||
|         input_fn=lambda: torch.rand(2, 3, 5, 5).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['NLLLoss2d'](i, t.type_as(i).long(), **kwargs), | ||||
|             loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| @ -3794,7 +4064,50 @@ def nllloss2d_no_reduce_weights_test(): | ||||
|             lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))), | ||||
|         input_fn=lambda: torch.rand(2, 3, 5, 5).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['NLLLoss2d'](i, t.type_as(i).long(), **kwargs(i)), | ||||
|             loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| def nlllossNd_no_reduce_test(): | ||||
|     t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long()) | ||||
|     kwargs = {'reduce': False} | ||||
|     return dict( | ||||
|         fullname='NLLLossNd_no_reduce', | ||||
|         constructor=wrap_functional( | ||||
|             lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), | ||||
|         input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| def nlllossNd_no_reduce_ignore_index_test(): | ||||
|     t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long()) | ||||
|     kwargs = {'ignore_index': 1, 'reduce': False} | ||||
|     return dict( | ||||
|         fullname='NLLLossNd_no_reduce_ignore_index', | ||||
|         constructor=wrap_functional( | ||||
|             lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs)), | ||||
|         input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| def nlllossNd_no_reduce_weights_test(): | ||||
|     t = Variable(torch.rand(2, 5, 5, 2, 2).mul(3).floor().long()) | ||||
|     weight = torch.rand(3) | ||||
|  | ||||
|     def kwargs(i): | ||||
|         return {'weight': weight.type_as(i), 'reduce': False} | ||||
|  | ||||
|     return dict( | ||||
|         fullname='NLLLossNd_no_reduce_weights', | ||||
|         constructor=wrap_functional( | ||||
|             lambda i: F.nll_loss(i, t.type_as(i).long(), **kwargs(i.data))), | ||||
|         input_fn=lambda: torch.rand(2, 3, 5, 5, 2, 2).log(), | ||||
|         reference_fn=lambda i, _: | ||||
|             loss_reference_fns['NLLLossNd'](i, t.type_as(i).long(), **kwargs(i)), | ||||
|         pickle=False) | ||||
|  | ||||
|  | ||||
| @ -3811,6 +4124,8 @@ def smoothl1loss_no_reduce_test(): | ||||
|  | ||||
|  | ||||
| new_module_tests = [ | ||||
|     poissonnllloss_no_reduce_test(), | ||||
|     kldivloss_no_reduce_test(), | ||||
|     l1loss_no_reduce_test(), | ||||
|     mseloss_no_reduce_test(), | ||||
|     nllloss_no_reduce_test(), | ||||
| @ -3821,6 +4136,9 @@ new_module_tests = [ | ||||
|     nllloss2d_no_reduce_test(), | ||||
|     nllloss2d_no_reduce_weights_test(), | ||||
|     nllloss2d_no_reduce_ignore_index_test(), | ||||
|     nlllossNd_no_reduce_test(), | ||||
|     nlllossNd_no_reduce_weights_test(), | ||||
|     nlllossNd_no_reduce_ignore_index_test(), | ||||
|     smoothl1loss_no_reduce_test(), | ||||
|     dict( | ||||
|         module_name='BatchNorm1d', | ||||
| @ -4553,7 +4871,7 @@ new_module_tests = [ | ||||
|         desc='dim' | ||||
|     ), | ||||
|     dict( | ||||
|         constructor=wrap_functional(F.softmax, dim=1), | ||||
|         constructor=wrap_functional(F.softmax, dim=-1), | ||||
|         input_size=(2, 128),  # trigger the last-dim algo in CUDA | ||||
|         fullname='softmax_lastdim', | ||||
|         pickle=False, | ||||
| @ -4585,7 +4903,7 @@ new_module_tests = [ | ||||
|         pickle=False, | ||||
|     ), | ||||
|     dict( | ||||
|         constructor=wrap_functional(F.log_softmax, dim=1), | ||||
|         constructor=wrap_functional(F.log_softmax, dim=-1), | ||||
|         input_size=(2, 128),  # trigger the last-dim algo in CUDA | ||||
|         fullname='log_softmax_lastdim', | ||||
|         pickle=False, | ||||
|  | ||||
| @ -1,3 +1,4 @@ | ||||
| import math | ||||
| import unittest | ||||
| import functools | ||||
| from copy import deepcopy | ||||
| @ -8,7 +9,7 @@ import torch.nn.functional as F | ||||
| from torch.optim import SGD | ||||
| from torch.autograd import Variable | ||||
| from torch import sparse | ||||
| from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, ReduceLROnPlateau | ||||
| from torch.optim.lr_scheduler import LambdaLR, StepLR, MultiStepLR, ExponentialLR, CosineAnnealingLR, ReduceLROnPlateau | ||||
| from common import TestCase, run_tests | ||||
|  | ||||
|  | ||||
| @ -61,13 +62,14 @@ class TestOptim(TestCase): | ||||
|  | ||||
|         self.assertLessEqual(params.data.dist(solution), initial_dist) | ||||
|  | ||||
|     def _test_rosenbrock_sparse(self, constructor): | ||||
|     def _test_rosenbrock_sparse(self, constructor, sparse_only=False): | ||||
|         params_t = torch.Tensor([1.5, 1.5]) | ||||
|  | ||||
|         params = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True) | ||||
|         params_c = Variable(torch.Tensor([1.5, 1.5]), requires_grad=True) | ||||
|         params = Variable(params_t, requires_grad=True) | ||||
|         optimizer = constructor([params]) | ||||
|         optimizer_c = constructor([params_c]) | ||||
|         if not sparse_only: | ||||
|             params_c = Variable(params_t.clone(), requires_grad=True) | ||||
|             optimizer_c = constructor([params_c]) | ||||
|  | ||||
|         solution = torch.Tensor([1, 1]) | ||||
|         initial_dist = params.data.dist(solution) | ||||
| @ -99,8 +101,9 @@ class TestOptim(TestCase): | ||||
|             # Do cyclic coordinate descent | ||||
|             w = i % 2 | ||||
|             optimizer.step(functools.partial(eval, params, True, w)) | ||||
|             optimizer_c.step(functools.partial(eval, params_c, False, w)) | ||||
|             self.assertEqual(params.data, params_c.data) | ||||
|             if not sparse_only: | ||||
|                 optimizer_c.step(functools.partial(eval, params_c, False, w)) | ||||
|                 self.assertEqual(params.data, params_c.data) | ||||
|  | ||||
|         self.assertLessEqual(params.data.dist(solution), initial_dist) | ||||
|  | ||||
| @ -229,6 +232,11 @@ class TestOptim(TestCase): | ||||
|                 lr=1e-3) | ||||
|         ) | ||||
|  | ||||
|     def test_sgd_sparse(self): | ||||
|         self._test_rosenbrock_sparse( | ||||
|             lambda params: optim.SGD(params, lr=5e-3) | ||||
|         ) | ||||
|  | ||||
|     def test_adam(self): | ||||
|         self._test_rosenbrock( | ||||
|             lambda params: optim.Adam(params, lr=1e-2), | ||||
| @ -247,6 +255,12 @@ class TestOptim(TestCase): | ||||
|                 lr=1e-3) | ||||
|         ) | ||||
|  | ||||
|     def test_sparse_adam(self): | ||||
|         self._test_rosenbrock_sparse( | ||||
|             lambda params: optim.SparseAdam(params, lr=4e-2), | ||||
|             True | ||||
|         ) | ||||
|  | ||||
|     def test_adadelta(self): | ||||
|         self._test_rosenbrock( | ||||
|             lambda params: optim.Adadelta(params), | ||||
| @ -423,10 +437,10 @@ class TestLRScheduler(TestCase): | ||||
|         # lr = 0.05     if epoch < 3 | ||||
|         # lr = 0.005    if 30 <= epoch < 6 | ||||
|         # lr = 0.0005   if epoch >= 9 | ||||
|         single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3 | ||||
|         targets = [single_targets, list(map(lambda x: x * 10, single_targets))] | ||||
|         scheduler = StepLR(self.opt, gamma=0.1, step_size=3) | ||||
|         epochs = 10 | ||||
|         single_targets = [0.05] * 3 + [0.005] * 3 + [0.0005] * 3 + [0.00005] * 3 | ||||
|         targets = [single_targets, list(map(lambda x: x * epochs, single_targets))] | ||||
|         scheduler = StepLR(self.opt, gamma=0.1, step_size=3) | ||||
|         self._test(scheduler, targets, epochs) | ||||
|  | ||||
|     def test_multi_step_lr(self): | ||||
| @ -434,106 +448,116 @@ class TestLRScheduler(TestCase): | ||||
|         # lr = 0.005    if 2 <= epoch < 5 | ||||
|         # lr = 0.0005   if epoch < 9 | ||||
|         # lr = 0.00005   if epoch >= 9 | ||||
|         single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3 | ||||
|         targets = [single_targets, list(map(lambda x: x * 10, single_targets))] | ||||
|         scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9]) | ||||
|         epochs = 10 | ||||
|         single_targets = [0.05] * 2 + [0.005] * 3 + [0.0005] * 4 + [0.00005] * 3 | ||||
|         targets = [single_targets, list(map(lambda x: x * epochs, single_targets))] | ||||
|         scheduler = MultiStepLR(self.opt, gamma=0.1, milestones=[2, 5, 9]) | ||||
|         self._test(scheduler, targets, epochs) | ||||
|  | ||||
|     def test_exp_lr(self): | ||||
|         single_targets = [0.05 * (0.9 ** x) for x in range(10)] | ||||
|         targets = [single_targets, list(map(lambda x: x * 10, single_targets))] | ||||
|         scheduler = ExponentialLR(self.opt, gamma=0.9) | ||||
|         epochs = 10 | ||||
|         single_targets = [0.05 * (0.9 ** x) for x in range(epochs)] | ||||
|         targets = [single_targets, list(map(lambda x: x * epochs, single_targets))] | ||||
|         scheduler = ExponentialLR(self.opt, gamma=0.9) | ||||
|         self._test(scheduler, targets, epochs) | ||||
|  | ||||
|     def test_cos_anneal_lr(self): | ||||
|         epochs = 10 | ||||
|         eta_min = 1e-10 | ||||
|         single_targets = [eta_min + (0.05 - eta_min) * | ||||
|                           (1 + math.cos(math.pi * x / epochs)) / 2 | ||||
|                           for x in range(epochs)] | ||||
|         targets = [single_targets, list(map(lambda x: x * epochs, single_targets))] | ||||
|         scheduler = CosineAnnealingLR(self.opt, T_max=epochs, eta_min=eta_min) | ||||
|         self._test(scheduler, targets, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau1(self): | ||||
|         epochs = 10 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 20] | ||||
|         metrics = [10 - i * 0.0167 for i in range(20)] | ||||
|         scheduler = ReduceLROnPlateau(self.opt, threshold_mode='abs', mode='min', | ||||
|                                       threshold=0.01, patience=5, cooldown=5) | ||||
|         epochs = 10 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau2(self): | ||||
|         epochs = 22 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 6 + [0.05] * 7 + [0.005] * 7 + [0.0005] * 2] | ||||
|         metrics = [10 - i * 0.0165 for i in range(22)] | ||||
|         scheduler = ReduceLROnPlateau(self.opt, patience=5, cooldown=0, threshold_mode='abs', | ||||
|                                       mode='min', threshold=0.1) | ||||
|         epochs = 22 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau3(self): | ||||
|         epochs = 22 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * (2 + 6) + [0.05] * (5 + 6) + [0.005] * 4] | ||||
|         metrics = [-0.8] * 2 + [-0.234] * 20 | ||||
|         scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=5, cooldown=5, | ||||
|                                       threshold_mode='abs') | ||||
|         epochs = 22 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau4(self): | ||||
|         epochs = 20 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 20] | ||||
|         metrics = [1.5 * (1.025 ** i) for i in range(20)]  # 1.025 > 1.1**0.25 | ||||
|         scheduler = ReduceLROnPlateau(self.opt, mode='max', patience=3, | ||||
|                                       threshold_mode='rel', threshold=0.1) | ||||
|         epochs = 20 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau5(self): | ||||
|         epochs = 20 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4] | ||||
|         metrics = [1.5 * (1.005 ** i) for i in range(20)] | ||||
|         scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel', | ||||
|                                       threshold=0.1, patience=5, cooldown=5) | ||||
|         epochs = 20 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau6(self): | ||||
|         epochs = 20 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 20] | ||||
|         metrics = [1.5 * (0.85 ** i) for i in range(20)] | ||||
|         scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel', | ||||
|                                       threshold=0.1) | ||||
|         epochs = 20 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau7(self): | ||||
|         epochs = 20 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 6 + [0.05] * (5 + 6) + [0.005] * 4] | ||||
|         metrics = [1] * 7 + [0.6] + [0.5] * 12 | ||||
|         scheduler = ReduceLROnPlateau(self.opt, mode='min', threshold_mode='rel', | ||||
|                                       threshold=0.1, patience=5, cooldown=5) | ||||
|         epochs = 20 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_reduce_lr_on_plateau8(self): | ||||
|         epochs = 20 | ||||
|         for param_group in self.opt.param_groups: | ||||
|             param_group['lr'] = 0.5 | ||||
|         targets = [[0.5] * 6 + [0.4] * 14, [0.5] * 6 + [0.3] * 14] | ||||
|         metrics = [1.5 * (1.005 ** i) for i in range(20)] | ||||
|         scheduler = ReduceLROnPlateau(self.opt, mode='max', threshold_mode='rel', min_lr=[0.4, 0.3], | ||||
|                                       threshold=0.1, patience=5, cooldown=5) | ||||
|         epochs = 20 | ||||
|         self._test_reduce_lr_on_plateau(scheduler, targets, metrics, epochs) | ||||
|  | ||||
|     def test_lambda_lr(self): | ||||
|         epochs = 10 | ||||
|         self.opt.param_groups[0]['lr'] = 0.05 | ||||
|         self.opt.param_groups[1]['lr'] = 0.4 | ||||
|         targets = [[0.05 * (0.9 ** x) for x in range(10)], [0.4 * (0.8 ** x) for x in range(10)]] | ||||
|         targets = [[0.05 * (0.9 ** x) for x in range(epochs)], [0.4 * (0.8 ** x) for x in range(epochs)]] | ||||
|         scheduler = LambdaLR(self.opt, | ||||
|                              lr_lambda=[lambda x1: 0.9 ** x1, lambda x2: 0.8 ** x2]) | ||||
|         epochs = 10 | ||||
|         self._test(scheduler, targets, epochs) | ||||
|  | ||||
|     def _test(self, scheduler, targets, epochs=10): | ||||
|  | ||||
| @ -8,6 +8,7 @@ import torch.cuda | ||||
| import tempfile | ||||
| import unittest | ||||
| import warnings | ||||
| import pickle | ||||
| from torch.utils.dlpack import from_dlpack, to_dlpack | ||||
| from itertools import product, combinations | ||||
| from common import TestCase, iter_indices, TEST_NUMPY, run_tests, download_file, skipIfNoLapack, \ | ||||
| @ -71,6 +72,34 @@ class TestTorch(TestCase): | ||||
|                     res2[i, j] = v1[i] * v2[j] | ||||
|             self.assertEqual(res1, res2) | ||||
|  | ||||
|     def test_addr(self): | ||||
|         types = { | ||||
|             'torch.DoubleTensor': 1e-8, | ||||
|             'torch.FloatTensor': 1e-4, | ||||
|         } | ||||
|  | ||||
|         def run_test(m, v1, v2, m_transform=lambda x: x): | ||||
|             m = m_transform(m.clone()) | ||||
|             ref = m.clone() | ||||
|             torch.addr(m, v1, v2, out=m) | ||||
|             for i in range(m.size(0)): | ||||
|                 for j in range(m.size(1)): | ||||
|                     ref[i, j] += v1[i] * v2[j] | ||||
|             self.assertEqual(m, ref) | ||||
|  | ||||
|         for tname, _prec in types.items(): | ||||
|             for h, w in [(100, 110), (1, 20), (200, 2)]: | ||||
|                 m = torch.randn(h, w).type(tname) | ||||
|                 v1 = torch.randn(h).type(tname) | ||||
|                 v2 = torch.randn(w).type(tname) | ||||
|                 run_test(m, v1, v2) | ||||
|                 # test transpose | ||||
|                 run_test(m, v2, v1, lambda x: x.transpose(0, 1)) | ||||
|                 # test 0 strided | ||||
|                 v1 = torch.randn(1).type(tname).expand(h) | ||||
|                 run_test(m, v1, v2) | ||||
|                 run_test(m, v2, v1, lambda x: x.transpose(0, 1)) | ||||
|  | ||||
|     def test_addmv(self): | ||||
|         types = { | ||||
|             'torch.DoubleTensor': 1e-8, | ||||
| @ -320,17 +349,20 @@ class TestTorch(TestCase): | ||||
|             "mean", "median", "mode", "norm", "prod", | ||||
|             "std", "sum", "var", "max", "min"] | ||||
|  | ||||
|         def normfn_attr(t, dim, keepdim=False): | ||||
|         def normfn_attr(t, dim, keepdim=False, out=None): | ||||
|             attr = getattr(torch, "norm") | ||||
|             return attr(t, 2, dim, keepdim) | ||||
|             return attr(t, 2, dim, keepdim, out=out) | ||||
|  | ||||
|         for fn_name in dim_red_fns: | ||||
|             fn_attr = getattr(torch, fn_name) if fn_name != "norm" else normfn_attr | ||||
|  | ||||
|             def fn(x, dim, keepdim=False): | ||||
|                 ans = fn_attr(x, dim, keepdim=keepdim) | ||||
|             def fn(x, dim, keepdim=False, out=None): | ||||
|                 ans = fn_attr(x, dim, keepdim=keepdim, out=out) | ||||
|                 return ans if not isinstance(ans, tuple) else ans[0] | ||||
|  | ||||
|             def fn_tuple(x, dim, keepdim=False, out=None): | ||||
|                 return fn_attr(x, dim, keepdim=keepdim, out=out) | ||||
|  | ||||
|             def test_multidim(x, dim): | ||||
|                 self.assertEqual(fn(x, dim).unsqueeze(dim), fn(x, dim, keepdim=True)) | ||||
|                 self.assertEqual(x.ndimension() - 1, fn(x, dim).ndimension()) | ||||
| @ -355,6 +387,25 @@ class TestTorch(TestCase): | ||||
|             x = cast(torch.randn(dims)) | ||||
|             test_multidim(x, singleton_dim) | ||||
|  | ||||
|             # check reducing with output kwargs | ||||
|             if fn_name in ['median', 'mode', 'max', 'min']: | ||||
|                 y = cast(torch.randn(5, 3)) | ||||
|                 values = cast(torch.randn(5, 3)) | ||||
|                 indices = cast(torch.zeros(5, 3).long() - 1) | ||||
|                 fn_tuple(y, 1, keepdim=False, out=(values[:, 1], indices[:, 1])) | ||||
|                 values_expected, indices_expected = fn_tuple(y, 1, keepdim=False) | ||||
|                 self.assertEqual(values[:, 1], values_expected, | ||||
|                                  '{} values with out= kwarg'.format(fn_name)) | ||||
|                 self.assertEqual(indices[:, 1], indices_expected, | ||||
|                                  '{} indices with out= kwarg'.format(fn_name)) | ||||
|                 continue | ||||
|  | ||||
|             x = cast(torch.randn(5, 3)) | ||||
|             y = cast(torch.randn(5, 3)) | ||||
|             fn(y, 1, keepdim=False, out=x[:, 1]) | ||||
|             expected = fn(y, 1, keepdim=False) | ||||
|             self.assertEqual(x[:, 1], expected, '{} with out= kwarg'.format(fn_name)) | ||||
|  | ||||
|     def test_dim_reduction(self): | ||||
|         self._test_dim_reduction(self, lambda t: t) | ||||
|  | ||||
| @ -408,6 +459,17 @@ class TestTorch(TestCase): | ||||
|         test((10,)) | ||||
|         test((5, 5)) | ||||
|  | ||||
|     def test_all_any_empty(self): | ||||
|         x = torch.ByteTensor() | ||||
|         self.assertTrue(x.all()) | ||||
|         self.assertFalse(x.any()) | ||||
|  | ||||
|     @unittest.skipIf(not torch.cuda.is_available(), 'no CUDA') | ||||
|     def test_all_any_empty_cuda(self): | ||||
|         x = torch.cuda.ByteTensor() | ||||
|         self.assertTrue(x.all()) | ||||
|         self.assertFalse(x.any()) | ||||
|  | ||||
|     def test_mv(self): | ||||
|         m1 = torch.randn(100, 100) | ||||
|         v1 = torch.randn(100) | ||||
| @ -1111,6 +1173,11 @@ class TestTorch(TestCase): | ||||
|         torch.arange(0, 1, out=res2) | ||||
|         self.assertEqual(res1, res2, 0) | ||||
|  | ||||
|         # Check arange with only one argument | ||||
|         res1 = torch.arange(10) | ||||
|         res2 = torch.arange(0, 10) | ||||
|         self.assertEqual(res1, res2, 0) | ||||
|  | ||||
|         # Check arange for non-contiguous tensors. | ||||
|         x = torch.zeros(2, 3) | ||||
|         torch.arange(0, 4, out=x.narrow(1, 1, 2)) | ||||
| @ -1873,6 +1940,17 @@ class TestTorch(TestCase): | ||||
|  | ||||
|         self.assertRaises(RuntimeError, lambda: torch.cat([])) | ||||
|  | ||||
|     def test_cat_bad_input_sizes(self): | ||||
|         x = torch.randn(2, 1) | ||||
|         y = torch.randn(2, 1, 1) | ||||
|         z = torch.randn(2, 1, 1) | ||||
|         self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z])) | ||||
|  | ||||
|         x = torch.randn(2, 1, 2) | ||||
|         y = torch.randn(2, 1, 1) | ||||
|         z = torch.randn(2, 2, 1) | ||||
|         self.assertRaises(RuntimeError, lambda: torch.cat([x, y, z], dim=1)) | ||||
|  | ||||
|     def test_stack(self): | ||||
|         x = torch.rand(2, 3, 4) | ||||
|         y = torch.rand(2, 3, 4) | ||||
| @ -3429,6 +3507,24 @@ class TestTorch(TestCase): | ||||
|             dest2[idx[i]] = dest2[idx[i]] + src[i] | ||||
|         self.assertEqual(dest, dest2) | ||||
|  | ||||
|     def test_index_select(self): | ||||
|         src = torch.randn(3, 4, 5) | ||||
|         # Index can be duplicated. | ||||
|         idx = torch.LongTensor([2, 1, 0, 1, 2]) | ||||
|         dest = torch.index_select(src, 0, idx) | ||||
|         self.assertEqual(dest.shape, (5, 4, 5)) | ||||
|         for i in range(idx.size(0)): | ||||
|             self.assertEqual(dest[i], src[idx[i]]) | ||||
|  | ||||
|         # Check that 'out' is used correctly. | ||||
|         out = torch.randn(5 * 4 * 5) | ||||
|         dest = torch.index_select(src, 0, idx, out=out.view(5, 4, 5)) | ||||
|         self.assertEqual(dest.shape, (5, 4, 5)) | ||||
|         for i in range(idx.size(0)): | ||||
|             self.assertEqual(dest[i], src[idx[i]]) | ||||
|         out.fill_(0.123) | ||||
|         self.assertEqual(out, dest.view(-1))  # Must point to the same storage. | ||||
|  | ||||
|     def test_take(self): | ||||
|         def check(src, idx): | ||||
|             expected = src.contiguous().view(-1).index_select( | ||||
| @ -3643,6 +3739,11 @@ class TestTorch(TestCase): | ||||
|         self.assertEqual(tensor.std(), tensor.std(unbiased=True)) | ||||
|         self.assertEqual(tensor.std(unbiased=False), tensor.std(0, unbiased=False)[0]) | ||||
|  | ||||
|     def test_var_stability(self): | ||||
|         tensor = torch.FloatTensor([2281.5, 2281.25]) | ||||
|         self.assertEqual(tensor.var(0)[0], 0.03125) | ||||
|         self.assertEqual(tensor.var(), 0.03125) | ||||
|  | ||||
|     def test_view(self): | ||||
|         tensor = torch.rand(15) | ||||
|         template = torch.rand(3, 5) | ||||
| @ -3698,18 +3799,47 @@ class TestTorch(TestCase): | ||||
|         self.assertEqual(torch.randn(()).expand(()), torch.randn(())) | ||||
|  | ||||
|     def test_repeat(self): | ||||
|         result = torch.Tensor() | ||||
|         tensor = torch.rand(8, 4) | ||||
|  | ||||
|         initial_shape = (8, 4) | ||||
|         tensor = torch.rand(*initial_shape) | ||||
|  | ||||
|         size = (3, 1, 1) | ||||
|         torchSize = torch.Size(size) | ||||
|         target = [3, 8, 4] | ||||
|         self.assertEqual(tensor.repeat(*size).size(), target, 'Error in repeat') | ||||
|         self.assertEqual(tensor.repeat(torchSize).size(), target, 'Error in repeat using LongStorage') | ||||
|         self.assertEqual(tensor.repeat(torchSize).size(), target, | ||||
|                          'Error in repeat using LongStorage') | ||||
|         result = tensor.repeat(*size) | ||||
|         self.assertEqual(result.size(), target, 'Error in repeat using result') | ||||
|         result = tensor.repeat(torchSize) | ||||
|         self.assertEqual(result.size(), target, 'Error in repeat using result and LongStorage') | ||||
|         self.assertEqual((result.mean(0).view(8, 4) - tensor).abs().max(), 0, 'Error in repeat (not equal)') | ||||
|         self.assertEqual(result.mean(0).view(8, 4), tensor, 'Error in repeat (not equal)') | ||||
|  | ||||
|     @unittest.skipIf(not TEST_NUMPY, "Numpy not found") | ||||
|     def test_repeat_tile(self): | ||||
|  | ||||
|         initial_shape = (8, 4) | ||||
|  | ||||
|         repeats = ((3, 1, 1), | ||||
|                    (3, 3, 3), | ||||
|                    (1, 2, 1), | ||||
|                    (2, 2, 2, 2)) | ||||
|  | ||||
|         def _generate_noncontiguous_input(): | ||||
|  | ||||
|             out = np.broadcast_to(np.random.random((1, 4)), | ||||
|                                   initial_shape) | ||||
|  | ||||
|             assert not (out.flags.c_contiguous or out.flags.f_contiguous) | ||||
|  | ||||
|             return out | ||||
|  | ||||
|         for repeat in repeats: | ||||
|             for tensor in (torch.from_numpy(np.random.random(initial_shape)), | ||||
|                            torch.from_numpy(_generate_noncontiguous_input()),): | ||||
|  | ||||
|                 self.assertEqual(tensor.repeat(*repeat).numpy(), | ||||
|                                  np.tile(tensor.numpy(), repeat)) | ||||
|  | ||||
|     def test_is_same_size(self): | ||||
|         t1 = torch.Tensor(3, 4, 9, 10) | ||||
| @ -4071,6 +4201,18 @@ class TestTorch(TestCase): | ||||
|             rootview = c[8] | ||||
|             self.assertEqual(rootview.data_ptr(), c[0].data_ptr()) | ||||
|  | ||||
|     def test_serialization_offset(self): | ||||
|         a = torch.randn(5, 5) | ||||
|         i = 41 | ||||
|         with tempfile.TemporaryFile() as f: | ||||
|             pickle.dump(i, f) | ||||
|             torch.save(a, f) | ||||
|             f.seek(0) | ||||
|             j = pickle.load(f) | ||||
|             b = torch.load(f) | ||||
|             self.assertTrue(torch.equal(a, b)) | ||||
|             self.assertEqual(i, j) | ||||
|  | ||||
|     def test_half_tensor(self): | ||||
|         x = torch.randn(5, 5).float() | ||||
|         y = torch.randn(5, 5).float() | ||||
| @ -4186,6 +4328,10 @@ class TestTorch(TestCase): | ||||
|         self.assertEqual(type(tensor), torch.FloatTensor) | ||||
|         self.assertEqual(tensor, torch.FloatTensor([[1.0, 2.0], [3.0, 4.0]])) | ||||
|  | ||||
|         tensor = torch.load(test_file_path, map_location='cpu') | ||||
|         self.assertEqual(type(tensor), torch.FloatTensor) | ||||
|         self.assertEqual(tensor, torch.FloatTensor([[1.0, 2.0], [3.0, 4.0]])) | ||||
|  | ||||
|     def test_from_buffer(self): | ||||
|         a = bytearray([1, 2, 3, 4]) | ||||
|         self.assertEqual(torch.ByteStorage.from_buffer(a).tolist(), [1, 2, 3, 4]) | ||||
| @ -4247,6 +4393,19 @@ class TestTorch(TestCase): | ||||
|         x.__repr__() | ||||
|         str(x), | ||||
|  | ||||
|     def test_sizeof(self): | ||||
|         sizeof_empty = torch.randn(0).storage().__sizeof__() | ||||
|         sizeof_10 = torch.randn(10).storage().__sizeof__() | ||||
|         sizeof_100 = torch.randn(100).storage().__sizeof__() | ||||
|         self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10) | ||||
|         self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0) | ||||
|  | ||||
|         sizeof_empty = torch.randn(0).type(torch.ByteTensor).storage().__sizeof__() | ||||
|         sizeof_10 = torch.randn(10).type(torch.ByteTensor).storage().__sizeof__() | ||||
|         sizeof_100 = torch.randn(100).type(torch.ByteTensor).storage().__sizeof__() | ||||
|         self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10) | ||||
|         self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0) | ||||
|  | ||||
|     def test_unsqueeze(self): | ||||
|         x = torch.randn(2, 3, 4) | ||||
|         y = x.unsqueeze(1) | ||||
| @ -4511,6 +4670,19 @@ class TestTorch(TestCase): | ||||
|             for i in range(len(x)): | ||||
|                 self.assertEqual(geq2_x[i], geq2_array[i]) | ||||
|  | ||||
|     def test_error_msg_type_translation(self): | ||||
|         with self.assertRaisesRegex( | ||||
|                 RuntimeError, | ||||
|                 # message includes both torch.DoubleTensor and torch.LongTensor | ||||
|                 '(?=.*torch\.DoubleTensor)(?=.*torch\.LongTensor)'): | ||||
|  | ||||
|             # Calls model with a DoubleTensor input but LongTensor weights | ||||
|             input = torch.autograd.Variable(torch.randn(1, 1, 1, 6).double()) | ||||
|             weight = torch.zeros(1, 1, 1, 3).long() | ||||
|             model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False) | ||||
|             model.weight.data = weight | ||||
|             out = model(input) | ||||
|  | ||||
|     def test_comparison_ops(self): | ||||
|         x = torch.randn(5, 5) | ||||
|         y = torch.randn(5, 5) | ||||
|  | ||||
| @ -386,7 +386,7 @@ class TestONNXUtils(TestCase): | ||||
|         sizes = [2, 3, 4] | ||||
|         pad = [1, 2, 3, 4] | ||||
|         paddings = prepare_onnx_paddings(len(sizes), pad) | ||||
|         self.assertEqual(paddings, [0, 0, 3, 4, 1, 2]) | ||||
|         self.assertEqual(paddings, [0, 3, 1, 0, 4, 2]) | ||||
|  | ||||
|     def test_check_onnx_broadcast(self): | ||||
|  | ||||
|  | ||||
| @ -13,10 +13,10 @@ | ||||
|  | ||||
| - name: add(Tensor self, Tensor other, *, Scalar alpha=1) | ||||
|   self: grad | ||||
|   other: grad * alpha | ||||
|   other: maybe_multiply(grad, alpha) | ||||
|  | ||||
| - name: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) | ||||
|   self: grad * beta | ||||
|   self: maybe_multiply(grad, beta) | ||||
|   batch1: grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) }).bmm(batch2.transpose(1, 2)) * alpha | ||||
|   batch2: batch1.transpose(1, 2).bmm(grad.unsqueeze(0).expand({ batch1.size(0), batch1.size(1), batch2.size(2) })) * alpha | ||||
|  | ||||
| @ -36,12 +36,12 @@ | ||||
|   mat2: mm_mat2_backward(grad, mat1, mat2.sizes(), mat2.strides(), alpha) | ||||
|  | ||||
| - name: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) | ||||
|   self: grad * beta | ||||
|   self: maybe_multiply(grad, beta) | ||||
|   mat: grad.ger(vec) * alpha | ||||
|   vec: mat.t().mv(grad) * alpha | ||||
|  | ||||
| - name: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) | ||||
|   self: grad * beta | ||||
|   self: maybe_multiply(grad, beta) | ||||
|   vec1: grad.mv(vec2) * alpha | ||||
|   vec2: grad.t().mv(vec1) * alpha | ||||
|  | ||||
| @ -62,7 +62,7 @@ | ||||
|   other: grad * -self * ((self * self + other * other).reciprocal()) | ||||
|  | ||||
| - name: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) | ||||
|   self: grad * beta | ||||
|   self: maybe_multiply(grad, beta) | ||||
|   batch1: grad.bmm(batch2.transpose(1, 2)) * alpha | ||||
|   batch2: batch1.transpose(1, 2).bmm(grad) * alpha | ||||
|  | ||||
| @ -108,8 +108,8 @@ | ||||
|   self: grad.diag(diagonal) | ||||
|  | ||||
| - name: dist(Tensor self, Tensor other, Scalar p=2) | ||||
|   self: norm_backward(grad, self - other, p) | ||||
|   other: -norm_backward(grad, self - other, p) | ||||
|   self: norm_backward(grad, self - other, p, result) | ||||
|   other: -norm_backward(grad, self - other, p, result) | ||||
|  | ||||
| - name: div(Tensor self, Scalar other) | ||||
|   self: grad / other | ||||
| @ -149,7 +149,8 @@ | ||||
|  | ||||
| - name: eye  # fallthrough | ||||
|  | ||||
| - name: fill(Tensor self, Scalar value)  # FIXME | ||||
| - name: fill(Tensor self, Scalar value) | ||||
|   self: zeros_like(grad) | ||||
|  | ||||
| - name: floor(Tensor self) | ||||
|   self: zeros_like(grad) | ||||
| @ -217,7 +218,6 @@ | ||||
|  | ||||
| - name: index_select(Tensor self, int64_t dim, Tensor index) | ||||
|   self: grad.type().zeros(self.sizes()).index_add_(dim, index, grad) | ||||
|   __view__: True | ||||
|  | ||||
| - name: inverse(Tensor self) | ||||
|   self: -at::mm(output.t(), at::mm(grad, output.t())) | ||||
| @ -348,10 +348,10 @@ | ||||
|   self: zeros_like(grad) | ||||
|  | ||||
| - name: norm(Tensor self, Scalar p=2) | ||||
|   self: norm_backward(grad, self, p) | ||||
|   self: norm_backward(grad, self, p, result) | ||||
|  | ||||
| - name: norm(Tensor self, Scalar p, int64_t dim, bool keepdim=False) | ||||
|   self: norm_backward(grad, self, p, dim, keepdim) | ||||
|   self: norm_backward(grad, self, p, destination, dim, keepdim) | ||||
|  | ||||
| - name: numel  # fallthrough | ||||
| - name: ones  # fallthrough | ||||
| @ -395,7 +395,7 @@ | ||||
|   self: not_implemented("pstrf") | ||||
|  | ||||
| - name: put(Tensor self, Tensor index, Tensor source, bool accumulate) | ||||
|   self: zeros_like(self).put_(index, source, accumulate) | ||||
|   self: grad.clone().put_(index, zeros_like(source), accumulate) | ||||
|   source: grad.take(index) | ||||
|  | ||||
| - name: qr(Tensor self) | ||||
| @ -468,7 +468,7 @@ | ||||
|   __view__: True | ||||
|  | ||||
| - name: squeeze(Tensor self, int64_t dim) | ||||
|   self: maybe_unsqueeze(grad, dim, self.size(dim) == 1) | ||||
|   self: maybe_unsqueeze(grad, dim, self.size(dim) == 1 && self.sizes().size() != 1) | ||||
|   __view__: True | ||||
|  | ||||
| - name: std | ||||
| @ -563,9 +563,9 @@ | ||||
|   grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad) | ||||
|   input: zeros_like(input) | ||||
|  | ||||
| - name: elu_backward(Tensor grad_output, Tensor input, Scalar alpha, bool inplace, Tensor output) | ||||
|   grad_output: elu_backward(grad, input, alpha, inplace, output) | ||||
|   input: grad * grad_input * (input < 0).toType(grad.type()) | ||||
| - name: elu_backward(Tensor grad_output, Scalar alpha, Tensor output) | ||||
|   grad_output: elu_backward(grad, alpha, output) | ||||
|   output: grad * grad_output * (output < 0).toType(grad.type()) | ||||
|  | ||||
| - name: glu_backward(Tensor grad_output, Tensor input, int64_t dim) | ||||
|   grad_output: glu_double_backward_grad_output(grad, input, dim) | ||||
| @ -575,11 +575,12 @@ | ||||
|   grad_output: hardshrink_backward(grad, input, lambd) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val, bool inplace) | ||||
|   grad_output: hardtanh_backward(grad, input, min_val, max_val, false) | ||||
| - name: hardtanh_backward(Tensor grad_output, Tensor input, Scalar min_val, Scalar max_val) | ||||
|   grad_output: hardtanh_backward(grad, input, min_val, max_val) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: kl_div_backward(Tensor input, Tensor target, bool size_average) | ||||
| - name: kl_div_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce) | ||||
|   grad_output: kl_div_double_backward_grad_output(grad, input, target, size_average, reduce) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce) | ||||
| @ -594,8 +595,8 @@ | ||||
|   grad_output: grad - (grad * output.exp()).sum(dim, true) | ||||
|   input: log_softmax_double_backward(grad, grad_output, dim, output) | ||||
|  | ||||
| - name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope, bool inplace) | ||||
|   grad_output: leaky_relu_backward(grad, input, negative_slope, false) | ||||
| - name: leaky_relu_backward(Tensor grad_output, Tensor input, Scalar negative_slope) | ||||
|   grad_output: leaky_relu_backward(grad, input, negative_slope) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: max_pool2d_backward(Tensor grad_output, Tensor input, IntList kernel_size, IntList stride, IntList padding, IntList dilation, bool ceil_mode, Tensor indices) | ||||
| @ -623,8 +624,8 @@ | ||||
|   input: zeros_like(input) | ||||
|   weight: zeros_like(weight) | ||||
|  | ||||
| - name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, bool inplace, Tensor noise) | ||||
|   grad_output: rrelu_backward(grad, input, lower, upper, training, false, noise) | ||||
| - name: rrelu_backward(Tensor grad_output, Tensor input, Scalar lower, Scalar upper, bool training, Tensor noise) | ||||
|   grad_output: rrelu_backward(grad, input, lower, upper, training, noise) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: smooth_l1_loss_backward(Tensor grad_output, Tensor input, Tensor target, bool size_average, bool reduce) | ||||
| @ -646,8 +647,8 @@ | ||||
|   grad_output: softshrink_backward(grad, input, lambd) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value, bool inplace) | ||||
|   grad_output: threshold_backward(grad, input, threshold, value, false) | ||||
| - name: threshold_backward(Tensor grad_output, Tensor input, Scalar threshold, Scalar value) | ||||
|   grad_output: threshold_backward(grad, input, threshold, value) | ||||
|   input: zeros_like(grad) | ||||
|  | ||||
| - name: _sigmoid_backward(Tensor grad_output, Tensor output) | ||||
|  | ||||
| @ -49,6 +49,16 @@ PY_VARIABLE_METHOD_DEF = CodeTemplate("""\ | ||||
| UNPACK_SELF = "auto& self_ = reinterpret_cast<THPVariable*>(self)->cdata;" | ||||
|  | ||||
|  | ||||
| # XXX: if you got here because of an assertion failure, it doesn't mean | ||||
| # it's enough to just extend the list here. Before you do this, make sure | ||||
| # to add an appropriate wrap() overload in torch/csrc/autograd/utils/wrap_outputs.h. | ||||
| SUPPORTED_RETURN_TYPES = { | ||||
|     'Tensor', 'std::tuple<Tensor,Tensor>', | ||||
|     'std::tuple<Tensor,Tensor,Tensor>', 'std::vector<Tensor>', | ||||
|     'Scalar', 'bool', 'int64_t', 'void*' | ||||
| } | ||||
|  | ||||
|  | ||||
| def create_python_bindings( | ||||
|         python_functions, py_methods, py_method_defs, py_method_dispatch, | ||||
|         is_class): | ||||
| @ -80,6 +90,9 @@ def create_python_bindings( | ||||
|  | ||||
|     def emit_dispatch(i, function): | ||||
|         env = {} | ||||
|         simple_return_type = function['return_type'].replace(' &', '') | ||||
|         assert simple_return_type in SUPPORTED_RETURN_TYPES, \ | ||||
|             function['name'] + ' returns unsupported type: ' + simple_return_type | ||||
|  | ||||
|         actuals = [] | ||||
|         formal_args = [] | ||||
|  | ||||
| @ -39,7 +39,11 @@ return baseType->${method_prefix}${api_name}(${unpacked_args});""") | ||||
|  | ||||
| METHOD_DEFINITION_FALLTHROUGH_VARIABLE = CodeTemplate("""\ | ||||
| ${unpack_args} | ||||
| return as_variable(baseType->${method_prefix}${api_name}(${unpacked_args}));""") | ||||
| auto flags = compute_flags({ ${args_with_derivatives} }); | ||||
| auto var = as_variable(baseType->${method_prefix}${api_name}(${unpacked_args})); | ||||
| var.is_volatile() = flags.is_volatile; | ||||
| return var; | ||||
| """) | ||||
|  | ||||
| METHOD_DEFINITION_FALLTHROUGH_INPLACE = CodeTemplate("""\ | ||||
| ${unpack_args} | ||||
| @ -67,6 +71,7 @@ FUNCTION_DEFINITION = CodeTemplate("""\ | ||||
| variable_list ${op}::apply(const variable_list& grads) { | ||||
|   variable_list grad_inputs{${num_inputs}}; | ||||
|   ${body} | ||||
|   ensure_no_aten_scalars(grad_inputs); | ||||
|   return grad_inputs; | ||||
| } | ||||
| """) | ||||
| @ -682,11 +687,6 @@ def create_variable_type(top_env, aten_declarations): | ||||
|         if declaration['return_type'] in FALLTHROUGH_RETURN_TYPES: | ||||
|             body.extend(METHOD_DEFINITION_FALLTHROUGH.substitute(combined).split('\n')) | ||||
|             return body | ||||
|         elif declaration['name'] in FALLTHROUGH_FUNCTIONS: | ||||
|             tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace'] | ||||
|                     else METHOD_DEFINITION_FALLTHROUGH_VARIABLE) | ||||
|             body.extend(tmpl.substitute(combined).split('\n')) | ||||
|             return body | ||||
|  | ||||
|         arguments = declaration['arguments'] | ||||
|         tensor_args = [arg for arg in arguments if arg['simple_type'] in {'Tensor', 'TensorList'}] | ||||
| @ -752,6 +752,12 @@ def create_variable_type(top_env, aten_declarations): | ||||
|         elif is_view: | ||||
|             env['version_counter'] = 'take_version_counter(ret, self);' | ||||
|  | ||||
|         if declaration['name'] in FALLTHROUGH_FUNCTIONS: | ||||
|             tmpl = (METHOD_DEFINITION_FALLTHROUGH_INPLACE if declaration['inplace'] | ||||
|                     else METHOD_DEFINITION_FALLTHROUGH_VARIABLE) | ||||
|             body.extend(tmpl.substitute(combined).split('\n')) | ||||
|             return body | ||||
|  | ||||
|         base_call = BASE_CALL.substitute(combined) | ||||
|         if not declaration['inplace']: | ||||
|             base_call = 'auto ret = as_variable({})'.format(base_call) | ||||
|  | ||||
| @ -34,41 +34,44 @@ Tensor maybe_multiply(const Tensor & t, const Scalar & s) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_) { | ||||
|   auto p = p_.toDouble(); | ||||
|   auto norm = self.norm(p_); | ||||
|  | ||||
|   if (norm.toDouble() == 0.0) { | ||||
|     // handle case at 0 where we return a subgradient containing 0 | ||||
|     return zeros_like(self); | ||||
|   } | ||||
|  | ||||
|   if (p == 2.0) { | ||||
|     return self * (grad / norm); | ||||
|   } else { | ||||
|     auto pow_ = self.abs().pow(p - 2); | ||||
|     auto scale_v = grad / norm.toTensor().pow(p - 1); | ||||
|     return self * pow_ * scale_v; | ||||
| // Don't expose ATen scalars to Variable API, because they are not supported yet. | ||||
| void ensure_no_aten_scalars(variable_list &vars) { | ||||
|   for (auto& v : vars) { | ||||
|     if (v.defined() && v.dim() == 0) { | ||||
|       v.data().as_strided_({1}, {1}); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, int64_t dim, bool keepdim) { | ||||
|   if (!keepdim && self.dim() > 1) { | ||||
|     grad = grad.unsqueeze(dim); | ||||
|   } | ||||
|   auto p = p_.toDouble(); | ||||
|   auto norm = self.norm(p, dim, true); | ||||
|   Tensor grad_input; | ||||
|   if (p == 2.0) { | ||||
|     grad_input = self * (grad / norm); | ||||
| Tensor norm_backward(const Tensor & grad, const Tensor & self, const Scalar & p_, const Tensor & norm) { | ||||
|   double p = p_.toDouble(); | ||||
|   Tensor self_scaled; | ||||
|   Tensor scale_v; | ||||
|   if (p == 0.0) { | ||||
|     return zeros_like(self); | ||||
|   } else if (p == 1.0) { | ||||
|     return self.sign() * grad; | ||||
|   } else if (p < 2.0) { | ||||
|     self_scaled = self.sign() * self.abs().pow(p - 1); | ||||
|     scale_v = grad / norm.pow(p - 1); | ||||
|   } else if (p == 2.0) { | ||||
|     self_scaled = self; | ||||
|     scale_v = grad / norm; | ||||
|   } else { | ||||
|     auto pow_ = self.abs().pow(p - 2); | ||||
|     auto scale_v = grad / norm.pow(p - 1); | ||||
|     grad_input = self * pow_ * scale_v; | ||||
|     self_scaled = self * self.abs().pow(p - 2); | ||||
|     scale_v = grad / norm.pow(p - 1); | ||||
|   } | ||||
|   // handle case at 0 where we return a subgradient containing 0 | ||||
|   grad_input.masked_fill_(norm == 0, 0); | ||||
|   return grad_input; | ||||
|   scale_v.masked_fill_(norm == 0, 0); | ||||
|   return self_scaled * scale_v; | ||||
| } | ||||
|  | ||||
| Tensor norm_backward(Tensor grad, const Tensor & self, const Scalar & p_, Tensor norm, int64_t dim, bool keepdim) { | ||||
|   if (!keepdim && self.dim() > 1) { | ||||
|     grad = grad.unsqueeze(dim); | ||||
|     norm = norm.unsqueeze(dim); | ||||
|   } | ||||
|   return norm_backward(grad, self, p_, norm); | ||||
| } | ||||
|  | ||||
| Tensor reduce_to(const Tensor & grad, IntList sizes) { | ||||
| @ -300,6 +303,16 @@ Tensor glu_double_backward_grad_output(const Tensor & grad, const Tensor & input | ||||
|   return tmp.narrow(dim, 0, sizes[dim]) + tmp.narrow(dim, sizes[dim], sizes[dim]); | ||||
| } | ||||
|  | ||||
| Tensor kl_div_double_backward_grad_output(const Tensor & grad, const Tensor & input, const Tensor & target, bool size_average, bool reduce) { | ||||
|   auto result = kl_div_backward(grad, input, target, size_average, false); | ||||
|   if (reduce && size_average) { | ||||
|     return result.mean().toTensor(); | ||||
|   } else if (reduce) { | ||||
|     return result.sum().toTensor(); | ||||
|   } | ||||
|   return result; | ||||
| } | ||||
|  | ||||
| Tensor log_sigmoid_double_backward(const Tensor & grad, const Tensor & input) { | ||||
|   auto z = input.sigmoid(); | ||||
|   return grad * (z - 1) * z; | ||||
|  | ||||
| @ -25,7 +25,7 @@ RUN curl -o ~/miniconda.sh -O  https://repo.continuum.io/miniconda/Miniconda3-la | ||||
|      /opt/conda/bin/conda create -y --name pytorch-py$PYTHON_VERSION python=$PYTHON_VERSION numpy pyyaml scipy ipython mkl&& \ | ||||
|      /opt/conda/bin/conda clean -ya  | ||||
| ENV PATH /opt/conda/envs/pytorch-py$PYTHON_VERSION/bin:$PATH | ||||
| #RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda80 | ||||
| RUN conda install --name pytorch-py$PYTHON_VERSION -c soumith magma-cuda90 | ||||
| # This must be done before pip so that requirements.txt is available | ||||
| WORKDIR /opt/pytorch | ||||
| COPY . . | ||||
|  | ||||
							
								
								
									
										31
									
								
								tools/pytorch.version
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								tools/pytorch.version
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,31 @@ | ||||
| { | ||||
|      global: | ||||
|          _TH*; | ||||
|          __TH*; | ||||
|          TH*; | ||||
|          *THP*; | ||||
|          *THCP*; | ||||
|          PyInit*; | ||||
|          init*; | ||||
|          state; | ||||
| 	 _ZGVZN2at*; | ||||
|          _ZN2at*; | ||||
| 	 _ZNK2at*Type*; | ||||
| 	 _ZNK2at*Tensor*; | ||||
| 	 _ZNK2at*Storage*; | ||||
| 	 _ZNK2at*Scalar*; | ||||
| 	 _ZNK2at*CUDA*; | ||||
| 	 *2at7Context*; | ||||
| 	 _ZTIN2at*; | ||||
| 	 _ZTIZN2at*; | ||||
| 	 _ZTSN2at*; | ||||
| 	 _ZTSPN2at*; | ||||
| 	 _ZTSZN2at*; | ||||
| 	 _ZTVN2at*; | ||||
| 	 _ZZN2at*; | ||||
| 	 _Z*torch*; | ||||
| 	 _Z*Tensor*; | ||||
| 	 _Z*tensor*; | ||||
|      local: | ||||
|          *; | ||||
|  }; | ||||
| @ -108,6 +108,11 @@ def set_default_tensor_type(t): | ||||
|     global Storage | ||||
|     Tensor = _import_dotted_name(t) | ||||
|     Storage = _import_dotted_name(t.replace('Tensor', 'Storage')) | ||||
|  | ||||
|     if 'cuda' in t: | ||||
|         import torch.cuda | ||||
|         torch.cuda.init() | ||||
|  | ||||
|     _C._set_default_tensor_type(Tensor) | ||||
|  | ||||
|  | ||||
|  | ||||
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										1653
									
								
								torch/_torch_docs.py
									
									
									
									
									
								
							
							
						
						
									
										1653
									
								
								torch/_torch_docs.py
									
									
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @ -12,7 +12,7 @@ def _type(self, new_type=None, async=False): | ||||
|  | ||||
|     Args: | ||||
|         new_type (type or string): The desired type | ||||
|         async (bool): If True, and the source is in pinned memory and | ||||
|         async (bool): If ``True``, and the source is in pinned memory and | ||||
|                       destination is on the GPU or vice versa, the copy is | ||||
|                       performed asynchronously with respect to the host. | ||||
|                       Otherwise, the argument has no effect. | ||||
| @ -46,7 +46,7 @@ def _cuda(self, device=None, async=False): | ||||
|  | ||||
|     Args: | ||||
|         device (int): The destination GPU id. Defaults to the current device. | ||||
|         async (bool): If True and the source is in pinned memory, the copy will | ||||
|         async (bool): If ``True`` and the source is in pinned memory, the copy will | ||||
|                       be asynchronous with respect to the host. Otherwise, the | ||||
|                       argument has no effect. | ||||
|     """ | ||||
|  | ||||
| @ -63,16 +63,16 @@ def backward(variables, grad_variables=None, retain_graph=None, create_graph=Non | ||||
|         grad_variables (sequence of (Tensor, Variable or None)): Gradients w.r.t. | ||||
|             each element of corresponding variables.  Any tensors will be | ||||
|             automatically converted to Variables that are volatile unless | ||||
|             ``create_graph`` is True.  None values can be specified for scalar | ||||
|             ``create_graph`` is ``True``.  None values can be specified for scalar | ||||
|             Variables or ones that don't require grad. If a None value would | ||||
|             be acceptable for all grad_variables, then this argument is optional. | ||||
|         retain_graph (bool, optional): If False, the graph used to compute the grad | ||||
|             will be freed. Note that in nearly all cases setting this option to True | ||||
|         retain_graph (bool, optional): If ``False``, the graph used to compute the grad | ||||
|             will be freed. Note that in nearly all cases setting this option to ``True`` | ||||
|             is not needed and often can be worked around in a much more efficient | ||||
|             way. Defaults to the value of ``create_graph``. | ||||
|         create_graph (bool, optional): If true, graph of the derivative will | ||||
|         create_graph (bool, optional): If ``True``, graph of the derivative will | ||||
|             be constructed, allowing to compute higher order derivative products. | ||||
|             Defaults to False, unless ``grad_variables`` contains at least one | ||||
|             Defaults to ``False``, unless ``grad_variables`` contains at least one | ||||
|             non-volatile Variable. | ||||
|     """ | ||||
|     variables = (variables,) if isinstance(variables, Variable) else tuple(variables) | ||||
| @ -109,8 +109,8 @@ def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=Non | ||||
|     Gradients can be given as Tensors when one doesn't need the graph of the | ||||
|     derivative, or as Variables, in which case the graph will be created. | ||||
|  | ||||
|     If ``only_inputs`` is True, the function will only return a list of gradients | ||||
|     w.r.t the specified inputs. If it's False, then gradient w.r.t. all remaining | ||||
|     If ``only_inputs`` is ``True``, the function will only return a list of gradients | ||||
|     w.r.t the specified inputs. If it's ``False``, then gradient w.r.t. all remaining | ||||
|     leaves will still be computed, and will be accumulated into their ``.grad`` | ||||
|     attribute. | ||||
|  | ||||
| @ -120,24 +120,24 @@ def grad(outputs, inputs, grad_outputs=None, retain_graph=None, create_graph=Non | ||||
|             returned (and not accumulated into ``.grad``). | ||||
|         grad_outputs (sequence of Tensor or Variable): Gradients w.r.t. each output. | ||||
|             Any tensors will be automatically converted to Variables that are | ||||
|             volatile unless ``create_graph`` is True.  None values can be | ||||
|             volatile unless ``create_graph`` is ``True``. None values can be | ||||
|             specified for scalar Variables or ones that don't require grad. | ||||
|             If a None value would be acceptable for all grad_variables, then | ||||
|             this argument is optional. | ||||
|         retain_graph (bool, optional): If False, the graph used to compute the grad | ||||
|             will be freed. Note that in nearly all cases setting this option to True | ||||
|         retain_graph (bool, optional): If ``False``, the graph used to compute the grad | ||||
|             will be freed. Note that in nearly all cases setting this option to ``True`` | ||||
|             is not needed and often can be worked around in a much more efficient | ||||
|             way. Defaults to the value of ``create_graph``. | ||||
|         create_graph (bool, optional): If True, graph of the derivative will | ||||
|         create_graph (bool, optional): If ``True``, graph of the derivative will | ||||
|             be constructed, allowing to compute higher order derivative products. | ||||
|             Defaults to False, unless ``grad_variables`` contains at least one | ||||
|             Defaults to ``False``, unless ``grad_variables`` contains at least one | ||||
|             non-volatile Variable. | ||||
|         only_inputs (bool, optional): If True, gradient w.r.t. leaves that are | ||||
|         only_inputs (bool, optional): If ``True``, gradient w.r.t. leaves that are | ||||
|             part of the graph, but don't appear in ``inputs`` won't be computed | ||||
|             and accumulated. Defaults to True. | ||||
|         allow_unused (bool, optional): If False, specifying inputs that were not | ||||
|             and accumulated. Defaults to ``True``. | ||||
|         allow_unused (bool, optional): If ``False``, specifying inputs that were not | ||||
|             used when computing outputs (and therefore their grad is always zero) | ||||
|             is an error. Default: False. | ||||
|             is an error. Defaults to ``False``. | ||||
|     """ | ||||
|  | ||||
|     outputs = (outputs,) if isinstance(outputs, Variable) else tuple(outputs) | ||||
|  | ||||
| @ -2,7 +2,7 @@ import torch | ||||
| from ..function import Function | ||||
|  | ||||
|  | ||||
| class Multinomial(Function): | ||||
| class Categorical(Function): | ||||
|     @staticmethod | ||||
|     def forward(ctx, probs, num_samples, with_replacement): | ||||
|         samples = probs.multinomial(num_samples, with_replacement) | ||||
|  | ||||
| @ -57,15 +57,14 @@ def maybe_unexpand_or_view(variable, old_size): | ||||
| #          The order is dim_n_begin, dim_n_end, dim_n-1_begin, dim_n-1_end, ... | ||||
| def prepare_onnx_paddings(dim, pad): | ||||
|     assert isinstance(dim, int) | ||||
|     # The order of paddings is dim_0_begin, dim_0_end, dim_1_begin, ... , dim_n_end. | ||||
|     # The desired order of paddings is | ||||
|     # dim_0_begin, dim_1_begin, ... , dim_0_end, ..., dim_n_end. | ||||
|     # n is the dimension of input. | ||||
|     assert len(pad) <= dim * 2 | ||||
|     paddings = [] | ||||
|     # pad is guaranteed to have even elements. | ||||
|     for i, j in zip(pad[0::2], pad[1::2]): | ||||
|         paddings = [i, j] + paddings | ||||
|     while len(paddings) < 2 * dim: | ||||
|         paddings = [0, 0] + paddings | ||||
|     # assume zero-dimensions in the beginning | ||||
|     paddings = list(pad[:]) + [0] * (dim * 2 - len(pad)) | ||||
|     # reverse order and collate first beginnings and then ends | ||||
|     paddings = paddings[-2::-2] + paddings[-1::-2] | ||||
|     assert len(paddings) == dim * 2 | ||||
|     return paddings | ||||
|  | ||||
|  | ||||
| @ -203,7 +203,7 @@ def gradcheck(func, inputs, eps=1e-6, atol=1e-5, rtol=1e-3, raise_exception=True | ||||
|     return True | ||||
|  | ||||
|  | ||||
| def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3): | ||||
| def gradgradcheck(func, inputs, grad_outputs=None, eps=1e-6, atol=1e-5, rtol=1e-3): | ||||
|     """Check gradients of gradients computed via small finite differences | ||||
|        against analytical gradients | ||||
|     This function checks that backpropagating through the gradients computed | ||||
| @ -216,17 +216,27 @@ def gradgradcheck(func, inputs, grad_outputs, eps=1e-6, atol=1e-5, rtol=1e-3): | ||||
|     is true for all elements of analytical gradient a and numerical gradient n. | ||||
|  | ||||
|     Args: | ||||
|         func: Python function that takes Variable inputs and returns | ||||
|         func (function): Python function that takes Variable inputs and returns | ||||
|             a tuple of Variables | ||||
|         inputs: tuple of Variables | ||||
|         grad_outputs: tuple of Variables | ||||
|         eps: perturbation for finite differences | ||||
|         atol: absolute tolerance | ||||
|         rtol: relative tolerance | ||||
|         inputs (tuple of Variable): inputs to the function | ||||
|         grad_outputs (tuple of Variable, optional): The gradients with respect to | ||||
|             the function's outputs. | ||||
|         eps (float, optional): perturbation for finite differences | ||||
|         atol (float, optional): absolute tolerance | ||||
|         rtol (float, optional): relative tolerance | ||||
|  | ||||
|     Returns: | ||||
|         True if all differences satisfy allclose condition | ||||
|         True if all differences satisfy allclose condition. Raises an exception | ||||
|         otherwise. | ||||
|     """ | ||||
|     if grad_outputs is None: | ||||
|         # If grad_outputs is not specified, create random variables of the same | ||||
|         # shape, type, and device as the outputs | ||||
|         def randn_like(x): | ||||
|             return Variable(x.data.new(x.size()).normal_(), requires_grad=True) | ||||
|         outputs = _as_tuple(func(*inputs)) | ||||
|         grad_outputs = [randn_like(x) for x in outputs] | ||||
|  | ||||
|     def new_func(*input_args): | ||||
|         input_args = input_args[:-len(grad_outputs)] | ||||
|         outputs = _differentiable_outputs(func(*input_args)) | ||||
|  | ||||
| @ -1,12 +1,18 @@ | ||||
| import torch | ||||
| import subprocess | ||||
| import os | ||||
| import sys | ||||
| import copy | ||||
| import tempfile | ||||
| import re | ||||
| import itertools | ||||
| from collections import defaultdict, namedtuple | ||||
|  | ||||
| import torch | ||||
|  | ||||
| try: | ||||
|     FileNotFoundError | ||||
| except NameError: | ||||
|     # py2.7 | ||||
|     FileNotFoundError = IOError | ||||
|  | ||||
|  | ||||
| class EventList(list): | ||||
|     """A list of Events (for pretty printing)""" | ||||
| @ -17,6 +23,17 @@ class EventList(list): | ||||
|         return self.table() | ||||
|  | ||||
|     def table(self, sort_by=None): | ||||
|         """Prints an EventList as a nicely formatted table. | ||||
|  | ||||
|         Arguments: | ||||
|             sort_by (str, optional): Attribute used to sort entries. By default | ||||
|                 they are printed in the same order as they were registered. | ||||
|                 Valid keys include: ``cpu_time``, ``cuda_time``, ``cpu_time_total``, | ||||
|                 ``cuda_time_total``, ``count``. | ||||
|  | ||||
|         Returns: | ||||
|             A string containing the table. | ||||
|         """ | ||||
|         return build_table(self, sort_by) | ||||
|  | ||||
|     def export_chrome_trace(self, path): | ||||
| @ -72,7 +89,7 @@ class profile(object): | ||||
|  | ||||
|     Arguments: | ||||
|         enabled (bool, optional): Setting this to False makes this context manager a no-op. | ||||
|             Default: True. | ||||
|             Default: ``True``. | ||||
|  | ||||
|     .. warning: | ||||
|         This context managers should not be called recursively, i.e. at most one | ||||
| @ -131,21 +148,27 @@ class profile(object): | ||||
|             return '<unfinished torch.autograd.profile>' | ||||
|         return str(self.function_events) | ||||
|  | ||||
|     def export_chrome_trace(self, path): | ||||
|     def _check_finish(self): | ||||
|         if self.function_events is None: | ||||
|             raise RuntimeError("can't export a trace that didn't finish running") | ||||
|  | ||||
|     def table(self, sort_by=None): | ||||
|         self._check_finish() | ||||
|         return self.function_events.table(sort_by) | ||||
|     table.__doc__ = EventList.table.__doc__ | ||||
|  | ||||
|     def export_chrome_trace(self, path): | ||||
|         self._check_finish() | ||||
|         return self.function_events.export_chrome_trace(path) | ||||
|     export_chrome_trace.__doc__ = EventList.export_chrome_trace.__doc__ | ||||
|  | ||||
|     def key_averages(self): | ||||
|         if self.function_events is None: | ||||
|             raise RuntimeError("can't average a trace that didn't finish running") | ||||
|         self._check_finish() | ||||
|         return self.function_events.key_averages() | ||||
|     key_averages.__doc__ = EventList.key_averages.__doc__ | ||||
|  | ||||
|     def total_average(self): | ||||
|         if self.function_events is None: | ||||
|             raise RuntimeError("can't average a trace that didn't finish running") | ||||
|         self._check_finish() | ||||
|         return self.function_events.total_average() | ||||
|     total_average.__doc__ = EventList.total_average.__doc__ | ||||
|  | ||||
| @ -153,18 +176,24 @@ class profile(object): | ||||
| class emit_nvtx(object): | ||||
|     """Context manager that makes every autograd operation emit an NVTX range. | ||||
|  | ||||
|     It is useful when running the program under nvprof. Unfortunately, there's no | ||||
|     way to force nvprof to flush the data it collected to disk, so for CUDA profiling | ||||
|     one has to use this context manager to annotate nvprof traces, and then use | ||||
|     :func:`torch.autograd.profiler.open_nvtx` to analyze the checkpoint. | ||||
|     It is useful when running the program under nvprof:: | ||||
|  | ||||
|         nvprof --profile-from-start off -o trace_name.prof -- <regular command here> | ||||
|  | ||||
|     Unfortunately, there's no way to force nvprof to flush the data it collected | ||||
|     to disk, so for CUDA profiling one has to use this context manager to annotate | ||||
|     nvprof traces and wait for the process to exit before inspecting them. | ||||
|     Then, either NVIDIA Visual Profiler (nvvp) can be used to visualize the timeline, or | ||||
|     :func:`torch.autograd.profiler.load_nvprof` can load the results for inspection | ||||
|     e.g. in Python REPL. | ||||
|  | ||||
|     .. warning: | ||||
|         This context managers should not be called recursively, i.e. at most one | ||||
|         This context manager should not be called recursively, i.e. at most one | ||||
|         instance should be enabled at any given time. | ||||
|  | ||||
|     Arguments: | ||||
|         enabled (bool, optional): Setting this to False makes this context manager a no-op. | ||||
|             Default: True. | ||||
|             Default: ``True``. | ||||
|  | ||||
|     Example: | ||||
|         >>> with torch.cuda.profiler.profile(): | ||||
| @ -173,7 +202,7 @@ class emit_nvtx(object): | ||||
|         ...         model(x) | ||||
|     """ | ||||
|     def __init__(self, enabled=True): | ||||
|         self.enabled = True | ||||
|         self.enabled = enabled | ||||
|         self.entered = False | ||||
|  | ||||
|     def __enter__(self): | ||||
| @ -291,7 +320,7 @@ def demangle(name): | ||||
|     try: | ||||
|         with open(os.devnull, 'w') as devnull: | ||||
|             return subprocess.check_output(['c++filt', '-n', name], stderr=devnull).rstrip().decode("ascii") | ||||
|     except subprocess.CalledProcessError: | ||||
|     except (subprocess.CalledProcessError, OSError, FileNotFoundError) as e: | ||||
|         return name | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -154,14 +154,14 @@ class Variable(_C._VariableBase): | ||||
|                 None values can be specified for scalar Variables or ones that | ||||
|                 don't require grad. If a None value would be acceptable then | ||||
|                 this argument is optional. | ||||
|             retain_graph (bool, optional): If False, the graph used to compute | ||||
|             retain_graph (bool, optional): If ``False``, the graph used to compute | ||||
|                 the grads will be freed. Note that in nearly all cases setting | ||||
|                 this option to True is not needed and often can be worked around | ||||
|                 in a much more efficient way. Defaults to the value of | ||||
|                 ``create_graph``. | ||||
|             create_graph (bool, optional): If true, graph of the derivative will | ||||
|             create_graph (bool, optional): If ``True``, graph of the derivative will | ||||
|                 be constructed, allowing to compute higher order derivative | ||||
|                 products. Defaults to False, unless ``gradient`` is a volatile | ||||
|                 products. Defaults to ``False``, unless ``gradient`` is a volatile | ||||
|                 Variable. | ||||
|         """ | ||||
|         torch.autograd.backward(self, gradient, retain_graph, create_graph, retain_variables) | ||||
| @ -205,20 +205,31 @@ class Variable(_C._VariableBase): | ||||
|         return handle | ||||
|  | ||||
|     def reinforce(self, reward): | ||||
|         """Registers a reward obtained as a result of a stochastic process. | ||||
|         def trim(str): | ||||
|             return '\n'.join([line.strip() for line in str.split('\n')]) | ||||
|  | ||||
|         Differentiating stochastic nodes requires providing them with reward | ||||
|         value. If your graph contains any stochastic operations, you should | ||||
|         call this function on their outputs. Otherwise an error will be raised. | ||||
|         raise RuntimeError(trim(r"""reinforce() was removed. | ||||
|             Use torch.distributions instead. | ||||
|             See http://pytorch.org/docs/master/distributions.html | ||||
|  | ||||
|         Parameters: | ||||
|             reward(Tensor): Tensor with per-element rewards. It has to match | ||||
|                 the device location and shape of Variable's data. | ||||
|         """ | ||||
|         if not isinstance(self.grad_fn, StochasticFunction): | ||||
|             raise RuntimeError("reinforce() can be only called on outputs " | ||||
|                                "of stochastic functions") | ||||
|         self.grad_fn._reinforce(reward) | ||||
|             Instead of: | ||||
|  | ||||
|             probs = policy_network(state) | ||||
|             action = probs.multinomial() | ||||
|             next_state, reward = env.step(action) | ||||
|             action.reinforce(reward) | ||||
|             action.backward() | ||||
|  | ||||
|             Use: | ||||
|  | ||||
|             probs = policy_network(state) | ||||
|             # NOTE: categorical is equivalent to what used to be called multinomial | ||||
|             m = torch.distributions.Categorical(probs) | ||||
|             action = m.sample() | ||||
|             next_state, reward = env.step(action) | ||||
|             loss = -m.log_prob(action) * reward | ||||
|             loss.backward() | ||||
|         """)) | ||||
|  | ||||
|     def detach(self): | ||||
|         """Returns a new Variable, detached from the current graph. | ||||
| @ -422,7 +433,7 @@ class Variable(_C._VariableBase): | ||||
|         return self.expand(tensor.size()) | ||||
|  | ||||
|     def multinomial(self, num_samples=1, replacement=False): | ||||
|         return Multinomial.apply(self, num_samples, replacement) | ||||
|         return Categorical.apply(self, num_samples, replacement) | ||||
|  | ||||
|     def bernoulli(self): | ||||
|         return Bernoulli.apply(self) | ||||
|  | ||||
| @ -257,10 +257,11 @@ class RNNDescriptor(object): | ||||
|                 CUDNN_RNN_ALGO_STANDARD, | ||||
|                 datatype | ||||
|             )) | ||||
|         if version() >= 7000 and int(cuda[0]) >= 9: | ||||
|             lib.cudnnSetRNNMatrixMathType(self, CUDNN_DEFAULT_MATH) | ||||
|             if datatype == CUDNN_DATA_HALF: | ||||
|                 lib.cudnnSetRNNMatrixMathType(self, CUDNN_TENSOR_OP_MATH) | ||||
|             if version() >= 7000 and int(cuda[0]) >= 9 and ( | ||||
|                     torch.cuda.get_device_capability(torch.cuda.current_device())[0] >= 7): | ||||
|                 lib.cudnnSetRNNMatrixMathType(self, CUDNN_DEFAULT_MATH) | ||||
|                 if datatype == CUDNN_DATA_HALF: | ||||
|                     lib.cudnnSetRNNMatrixMathType(self, CUDNN_TENSOR_OP_MATH) | ||||
|         else: | ||||
|             check_error(lib.cudnnSetRNNDescriptor( | ||||
|                 self, | ||||
|  | ||||
| @ -203,13 +203,6 @@ def forward(fn, input, hx, weight, output, hy): | ||||
|         if fn.batch_first and not is_input_packed: | ||||
|             input = input.transpose(0, 1) | ||||
|  | ||||
|         if (not is_input_packed and input.dim() != 3) or (is_input_packed and input.dim() != 2): | ||||
|             raise RuntimeError( | ||||
|                 'input must have 3 dimensions, got {}'.format(input.dim())) | ||||
|         if fn.input_size != input.size(-1): | ||||
|             raise RuntimeError('input.size(-1) must be equal to input_size. Expected {}, got {}'.format( | ||||
|                 fn.input_size, input.size(-1) | ||||
|             )) | ||||
|         if fn.dropout != 0 and cudnn.version() < 5103: | ||||
|             raise RuntimeError('dropout supported only in cudnn v5.1 and above') | ||||
|  | ||||
| @ -261,9 +254,6 @@ def forward(fn, input, hx, weight, output, hy): | ||||
|             fn.w_desc = init_weight_descriptor(fn, fn.weight_buf) | ||||
|             w = fn.weight_buf | ||||
|  | ||||
|         if tuple(hx.size()) != hidden_size: | ||||
|             raise RuntimeError('Expected hidden size {}, got {}'.format( | ||||
|                 hidden_size, tuple(hx.size()))) | ||||
|         if cx is not None and tuple(cx.size()) != hidden_size: | ||||
|             raise RuntimeError('Expected cell size {}, got {}'.format( | ||||
|                 hidden_size, tuple(cx.size()))) | ||||
|  | ||||
							
								
								
									
										203
									
								
								torch/csrc/DataLoader.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										203
									
								
								torch/csrc/DataLoader.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,203 @@ | ||||
| #include <sys/wait.h> | ||||
| #include <map> | ||||
| #include <set> | ||||
| #include <atomic> | ||||
| #include <signal.h> | ||||
| #include "THP.h" | ||||
|  | ||||
| // In cases like DataLoader, if a worker process die due to bus error/segfault | ||||
| // or just hang, the main process, if implemented with | ||||
| // multiprocessing.queue.SimpleQueue, will hang waiting for data. This is | ||||
| // difficult to avoid on PyTorch side as it can be caused by limited shm, or | ||||
| // other libraries users call in the workers. The following methods is an effort | ||||
| // to do our best provide some error message to users when such unfortunate | ||||
| // events happen. | ||||
|  | ||||
| // TODO: The following don't work on Windows. Specifically, sigaction, waitid | ||||
| // calls ,and SIGCHLD handler. Currently, dummy implementations are provided | ||||
| // for Windows. | ||||
|  | ||||
| #ifndef _WIN32 | ||||
|  | ||||
| // Critical signal handlers should be registered on worker processes before | ||||
| // doing work. | ||||
| // The handler will raise default handler so that the kill information will be | ||||
| // retrieved from main process. | ||||
| // Python handle is _set_worker_signal_handlers(). | ||||
| #define SIGNAL_HANDLER(SIGNAL, HANDLER_NAME, ERROR_MSG)                       \ | ||||
| static void HANDLER_NAME(int sig, siginfo_t *info, void *ctx)                 \ | ||||
| {                                                                             \ | ||||
|   write(STDERR_FILENO, ERROR_MSG, sizeof(ERROR_MSG) / sizeof(char));          \ | ||||
|   struct sigaction sa;                                                        \ | ||||
|   sa.sa_handler = SIG_DFL;                                                    \ | ||||
|   sa.sa_flags = 0;                                                            \ | ||||
|   if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGNAL, &sa, NULL) != 0) {   \ | ||||
|     _exit(EXIT_FAILURE);                                                      \ | ||||
|   } else {                                                                    \ | ||||
|     raise(SIGNAL);                                                            \ | ||||
|   }                                                                           \ | ||||
| } | ||||
|  | ||||
| // signal(2) is really not portable. So use sigaction. | ||||
| // http://man7.org/linux/man-pages/man2/signal.2.html | ||||
| static inline void setSignalHandler(int signal, void(*handler)(int, siginfo_t *, void *), struct sigaction *old_sa_ptr) | ||||
| { | ||||
|   struct sigaction sa; | ||||
|   sa.sa_sigaction = handler; | ||||
|   sa.sa_flags = SA_RESTART|SA_SIGINFO|SA_NOCLDSTOP|SA_NODEFER; | ||||
|   if (sigemptyset(&sa.sa_mask) != 0 || sigaction(signal, &sa, old_sa_ptr) != 0) { | ||||
|     std::ostringstream oss; | ||||
|     oss << "An error occurred while setting handler for " << strsignal(signal) << "."; | ||||
|     throw std::runtime_error(oss.str()); | ||||
|   } | ||||
| } | ||||
|  | ||||
| SIGNAL_HANDLER(SIGBUS, handler_SIGBUS, "ERROR: Unexpected bus error encountered in worker. " | ||||
|   "This might be caused by insufficient shared memory (shm).\n"); | ||||
| SIGNAL_HANDLER(SIGSEGV, handler_SIGSEGV, "ERROR: Unexpected segmentation fault encountered in worker.\n"); | ||||
|  | ||||
| // When an error happend in DataLoader methods and Python starts to exit, the | ||||
| // error trace will keep the loader alive, and Python may kill the children | ||||
| // processes first before deleting the loader object. Then the cleaning up | ||||
| // methods in DataLoader.__del__ are not yet called, and SIGCHILD will print an | ||||
| // error saying a worker is killed by SIGTERM. So we suppress SIGTERM from main | ||||
| // loader process here to avoid this by _exit(EXIT_SUCCESS). Note that if we | ||||
| // exit with nonzero code, the loader SIGCHLD handler may report RuntimeError | ||||
| // again, and then it defeats the whole purpose. | ||||
| static void handler_SIGTERM(int sig, siginfo_t *info, void *ctx) | ||||
| { | ||||
|   if (info->si_pid == getppid()) { | ||||
|     _exit(EXIT_SUCCESS); | ||||
|   } | ||||
|   struct sigaction sa; | ||||
|   sa.sa_handler = SIG_DFL; | ||||
|   sa.sa_flags = 0; | ||||
|   if (sigemptyset(&sa.sa_mask) != 0 || sigaction(SIGTERM, &sa, NULL) != 0) { | ||||
|     _exit(EXIT_FAILURE); | ||||
|   } else { | ||||
|     raise(SIGTERM); | ||||
|   } | ||||
| } | ||||
|  | ||||
| PyObject *THPModule_setWorkerSignalHandlers(PyObject *module, PyObject *arg) { | ||||
|   HANDLE_TH_ERRORS | ||||
|   setSignalHandler(SIGBUS, &handler_SIGBUS, NULL); | ||||
|   setSignalHandler(SIGSEGV, &handler_SIGSEGV, NULL); | ||||
|   setSignalHandler(SIGTERM, &handler_SIGTERM, NULL); | ||||
|   Py_RETURN_TRUE; | ||||
|   END_HANDLE_TH_ERRORS | ||||
| } | ||||
|  | ||||
| static std::map<int64_t, std::set<pid_t>> worker_pids = {}; | ||||
|  | ||||
| PyObject *THPModule_errorIfAnyWorkerFails(PyObject *module) { | ||||
|   HANDLE_TH_ERRORS | ||||
|   int error; | ||||
|   std::set<pid_t> *pid_set; | ||||
|   pid_t worker_pid; | ||||
|   siginfo_t infop; | ||||
|  | ||||
|   // Only check the pids we care about | ||||
|   for (auto it = worker_pids.begin(); it != worker_pids.end(); ++it) { | ||||
|     pid_set = &(it->second); | ||||
|     for (auto pid_it = pid_set->begin(); pid_it != pid_set->end(); ++pid_it) { | ||||
|       worker_pid = *pid_it; | ||||
|       // Use waitid rather than waitpid so that we can set NOWAIT, and that Python | ||||
|       // and other handlers can get whatever info they want about the child. | ||||
|       infop.si_pid = 0; | ||||
|       error = waitid(P_PID, worker_pid, &infop, WEXITED|WNOHANG|WNOWAIT); | ||||
|       // ignore errors and case with no waitable child | ||||
|       if (error < 0 || infop.si_pid == 0) | ||||
|         continue; | ||||
|       if (infop.si_code == CLD_EXITED && infop.si_status != EXIT_SUCCESS) {  // exit with error | ||||
|         std::ostringstream oss; | ||||
|         oss << "DataLoader worker (pid " << worker_pid << ") exited " | ||||
|             << "unexpectedly with exit code " << infop.si_status << "."; | ||||
|         // This is necessary. Otherwise, the runtime error will kill the other | ||||
|         // workers, and trigger this again. | ||||
|         pid_set->clear(); | ||||
|         throw std::runtime_error(oss.str()); | ||||
|       }  else if (infop.si_code == CLD_KILLED || infop.si_code == CLD_DUMPED) {  // killed by signal | ||||
|         std::ostringstream oss; | ||||
|         oss << "DataLoader worker (pid " << worker_pid << ") is killed " | ||||
|             << "by signal: " << strsignal(infop.si_status) << "."; | ||||
|         // This is necessary. Otherwise, the runtime error will kill the other | ||||
|         // workers, and trigger this again. | ||||
|         pid_set->clear(); | ||||
|         throw std::runtime_error(oss.str()); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   Py_RETURN_NONE; | ||||
|   END_HANDLE_TH_ERRORS | ||||
| } | ||||
|  | ||||
| // We don't want to exit on any SIGCHLD from any child. child_pids is a tuple | ||||
| // of pids we are interested in. | ||||
| PyObject *THPModule_updateWorkerPIDs(PyObject *module, PyObject *args) { | ||||
|   HANDLE_TH_ERRORS | ||||
|   Py_ssize_t num_args = args ? (Py_ssize_t) PyTuple_Size(args) : 0; | ||||
|   THPUtils_assert(num_args == 2, "_update_worker_pids expectes exactly 2 arguments."); | ||||
|   int64_t key = THPUtils_unpackLong(PyTuple_GET_ITEM(args, 0)); | ||||
|   THPUtils_assert(worker_pids.find(key) == worker_pids.end(), "_update_worker_pids " | ||||
|         "should be called only once for each DataLoader."); | ||||
|   PyObject *child_pids = PyTuple_GET_ITEM(args, 1); | ||||
|   THPUtils_assert(PyTuple_Check(child_pids), "_update_worker_pids " | ||||
|         "expects a tuple for child_pids, but got %s.", THPUtils_typename(child_pids)); | ||||
|  | ||||
|   std::set<pid_t> pids_set = {}; | ||||
|   auto size = PyTuple_GET_SIZE(child_pids); | ||||
|   for (int idx = 0; idx < size; idx++) { | ||||
|     PyObject* obj = PyTuple_GET_ITEM(child_pids, idx); | ||||
|     pids_set.insert((pid_t) THPUtils_unpackLong(obj)); | ||||
|   } | ||||
|  | ||||
|   worker_pids[key] = pids_set; | ||||
|  | ||||
|   Py_RETURN_NONE; | ||||
|   END_HANDLE_TH_ERRORS | ||||
| } | ||||
|  | ||||
| PyObject *THPModule_removeWorkerPIDs(PyObject *module, PyObject *loader_id) { | ||||
|   HANDLE_TH_ERRORS | ||||
|  | ||||
|   int64_t key = THPUtils_unpackLong(loader_id); | ||||
|   THPUtils_assert(worker_pids.find(key) != worker_pids.end(), "Cannot find worker " | ||||
|         "information for DataLoader with id %ld.", key); | ||||
|  | ||||
|   worker_pids.erase(key); | ||||
|  | ||||
|   Py_RETURN_NONE; | ||||
|   END_HANDLE_TH_ERRORS | ||||
| } | ||||
|  | ||||
| #undef SIGNAL_HANDLER | ||||
|  | ||||
| #else | ||||
| // dummy implementations for windows | ||||
|  | ||||
| PyObject *THPModule_setWorkerSignalHandlers(PyObject *module, PyObject *_ignored) { | ||||
|     Py_RETURN_TRUE; | ||||
| } | ||||
|  | ||||
| PyObject *THPModule_updateWorkerPIDs(PyObject *module, PyObject *_ignored) { | ||||
|     Py_RETURN_TRUE; | ||||
| } | ||||
|  | ||||
| PyObject *THPModule_removeWorkerPIDs(PyObject *module, PyObject *_ignored) { | ||||
|     Py_RETURN_NONE; | ||||
| } | ||||
|  | ||||
| PyObject *THPModule_exitIfAnyWorkerFails(PyObject *module, PyObject *_ignored) { | ||||
|     Py_RETURN_NONE; | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| PyMethodDef DataLoaderMethods[] = { | ||||
|   {"_set_worker_signal_handlers",  (PyCFunction)THPModule_setWorkerSignalHandlers,  METH_NOARGS,   NULL}, | ||||
|   {"_update_worker_pids",          (PyCFunction)THPModule_updateWorkerPIDs,         METH_VARARGS,  NULL}, | ||||
|   {"_remove_worker_pids",          (PyCFunction)THPModule_removeWorkerPIDs,         METH_O,        NULL}, | ||||
|   {"_error_if_any_worker_fails",   (PyCFunction)THPModule_errorIfAnyWorkerFails,    METH_NOARGS,   NULL}, | ||||
|   {NULL, NULL, 0, NULL} | ||||
| }; | ||||
| @ -1,5 +1,8 @@ | ||||
| #include <Python.h> | ||||
|  | ||||
| #include <utility> | ||||
| #include <vector> | ||||
|  | ||||
| #include "THP.h" | ||||
|  | ||||
| PyObject *THPException_FatalError; | ||||
| @ -11,3 +14,61 @@ bool THPException_init(PyObject *module) | ||||
|   ASSERT_TRUE(PyModule_AddObject(module, "FatalError", THPException_FatalError) == 0); | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| namespace torch { | ||||
|  | ||||
| void replaceAll(std::string & str, | ||||
|     const std::string & old_str, | ||||
|     const std::string & new_str) { | ||||
|   std::string::size_type pos = 0u; | ||||
|   while ((pos = str.find(old_str, pos)) != std::string::npos){ | ||||
|      str.replace(pos, old_str.length(), new_str); | ||||
|   } | ||||
| } | ||||
|  | ||||
| std::string processErrorMsg(std::string str) { | ||||
|  | ||||
|   // Translate Aten types to their respective pytorch ones | ||||
|   std::vector<std::pair<std::string, std::string>> changes { | ||||
|     {"SparseCUDAByteType", "torch.cuda.sparse.ByteTensor"}, | ||||
|     {"SparseCUDACharType", "torch.cuda.sparse.CharTensor"}, | ||||
|     {"SparseCUDADoubleType", "torch.cuda.sparse.DoubleTensor"}, | ||||
|     {"SparseCUDAFloatType", "torch.cuda.sparse.FloatTensor"}, | ||||
|     {"SparseCUDAIntType", "torch.cuda.sparse.IntTensor"}, | ||||
|     {"SparseCUDALongType", "torch.cuda.sparse.LongTensor"}, | ||||
|     {"SparseCUDAShortType", "torch.cuda.sparse.ShortTensor"}, | ||||
|     {"SparseCUDAHalfType", "torch.cuda.sparse.HalfTensor"}, | ||||
|     {"SparseCPUByteType", "torch.sparse.ByteTensor"}, | ||||
|     {"SparseCPUCharType", "torch.sparse.CharTensor"}, | ||||
|     {"SparseCPUDoubleType", "torch.sparse.DoubleTensor"}, | ||||
|     {"SparseCPUFloatType", "torch.sparse.FloatTensor"}, | ||||
|     {"SparseCPUIntType", "torch.sparse.IntTensor"}, | ||||
|     {"SparseCPULongType", "torch.sparse.LongTensor"}, | ||||
|     {"SparseCPUShortType", "torch.sparse.ShortTensor"}, | ||||
|     {"SparseCPUHalfType", "torch.sparse.HalfTensor"}, | ||||
|     {"CUDAByteType", "torch.cuda.ByteTensor"}, | ||||
|     {"CUDACharType", "torch.cuda.CharTensor"}, | ||||
|     {"CUDADoubleType", "torch.cuda.DoubleTensor"}, | ||||
|     {"CUDAFloatType", "torch.cuda.FloatTensor"}, | ||||
|     {"CUDAIntType", "torch.cuda.IntTensor"}, | ||||
|     {"CUDALongType", "torch.cuda.LongTensor"}, | ||||
|     {"CUDAShortType", "torch.cuda.ShortTensor"}, | ||||
|     {"CUDAHalfType", "torch.cuda.HalfTensor"}, | ||||
|     {"CPUByteType", "torch.ByteTensor"}, | ||||
|     {"CPUCharType", "torch.CharTensor"}, | ||||
|     {"CPUDoubleType", "torch.DoubleTensor"}, | ||||
|     {"CPUFloatType", "torch.FloatTensor"}, | ||||
|     {"CPUIntType", "torch.IntTensor"}, | ||||
|     {"CPULongType", "torch.LongTensor"}, | ||||
|     {"CPUShortType", "torch.ShortTensor"}, | ||||
|     {"CPUHalfType", "torch.HalfTensor"}, | ||||
|   }; | ||||
|  | ||||
|   for (const auto & it : changes) { | ||||
|     replaceAll(str, it.first, it.second); | ||||
|   } | ||||
|  | ||||
|   return str; | ||||
| } | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -14,7 +14,8 @@ | ||||
|   } catch (python_error &e) {                                                  \ | ||||
|     return retval;                                                             \ | ||||
|   } catch (std::exception &e) {                                                \ | ||||
|     PyErr_SetString(PyExc_RuntimeError, e.what());                             \ | ||||
|     auto msg = torch::processErrorMsg(e.what());                               \ | ||||
|     PyErr_SetString(PyExc_RuntimeError, msg.c_str());                          \ | ||||
|     return retval;                                                             \ | ||||
|   } | ||||
|  | ||||
| @ -68,4 +69,8 @@ struct python_error : public std::exception { | ||||
| bool THPException_init(PyObject *module); | ||||
| #endif | ||||
|  | ||||
| namespace torch { | ||||
| std::string processErrorMsg(std::string str); | ||||
| } | ||||
|  | ||||
| #endif | ||||
|  | ||||
| @ -25,6 +25,7 @@ | ||||
| #include "THP.h" | ||||
|  | ||||
| #include "ModuleSparse.cpp" | ||||
| #include "DataLoader.cpp" | ||||
|  | ||||
| PyObject* module; | ||||
| PyObject* tensor_classes; | ||||
| @ -792,6 +793,7 @@ static PyObject* initModule() { | ||||
| #define ASSERT_TRUE(cmd) if (!(cmd)) return NULL | ||||
|  | ||||
|   THPUtils_addPyMethodDefs(methods, TorchMethods); | ||||
|   THPUtils_addPyMethodDefs(methods, DataLoaderMethods); | ||||
| #ifdef WITH_CUDA | ||||
|   THPUtils_addPyMethodDefs(methods, THCPModule_methods()); | ||||
| #endif | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| #define __STDC_FORMAT_MACROS | ||||
|  | ||||
| #include <Python.h> | ||||
| #include <structmember.h> | ||||
|  | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| #define __STDC_FORMAT_MACROS | ||||
|  | ||||
| #include <Python.h> | ||||
| #include <structmember.h> | ||||
|  | ||||
|  | ||||
| @ -55,7 +55,8 @@ auto BatchNormForward::apply(const variable_list& inputs) -> variable_list { | ||||
|   bool use_cudnn = false; | ||||
| #ifdef WITH_CUDNN | ||||
|   use_cudnn = (input.type().isCuda() | ||||
|                && input.type().scalarType() != at::kHalf | ||||
|                && (input.type().scalarType() != at::kHalf | ||||
|                || weight.type().scalarType() == at::kFloat) | ||||
|                && weight.defined() && bias.defined() | ||||
|                && input.size(0) <= 131070 | ||||
|                && cudnn_enabled && CUDNN_VERSION >= 5110L); | ||||
| @ -115,7 +116,8 @@ auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_lis | ||||
|   bool use_cudnn = false; | ||||
| #ifdef WITH_CUDNN | ||||
|   use_cudnn = (input.type().backend() == at::kCUDA | ||||
|                && input.type().scalarType() != at::kHalf | ||||
|                && (input.type().scalarType() != at::kHalf | ||||
|                || weight.type().scalarType() == at::kFloat) | ||||
|                && weight.defined() && bias.defined() && training | ||||
|                && input.size(0) <= 131070 | ||||
|                && cudnn_enabled && CUDNN_VERSION >= 5110L); | ||||
| @ -164,7 +166,7 @@ auto BatchNormBackward::apply(const variable_list& grad_outputs) -> variable_lis | ||||
|   // Add saved variables used out of the pure autograd to inputs | ||||
|   variable_list all_inputs(grad_outputs); | ||||
|   all_inputs.push_back(input_var); | ||||
|   if (weight.get()) { | ||||
|   if (weight.defined()) { | ||||
|     all_inputs.push_back(weight_var); | ||||
|   } | ||||
|   auto outputs =  as_tensor_list(std::move(grad_input), | ||||
|  | ||||
| @ -365,7 +365,7 @@ auto ConvForward::apply(const variable_list& inputs) -> variable_list { | ||||
| // For Convolution strategies that don't implicitly handle grad_bias, we add a helper | ||||
| // function here to perform it using simple Tensor operators | ||||
| static at::Tensor compute_grad_bias(const at::Tensor& grad_output) { | ||||
|   // grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches  | ||||
|   // grad_output is in N, C, H, W, we re-shape and reduce over spatial dims and batches | ||||
|   return grad_output.contiguous().view({grad_output.size(0), grad_output.size(1), -1}).sum(0).sum(1); | ||||
| } | ||||
|  | ||||
| @ -727,7 +727,18 @@ auto ConvBackwardBackward::apply(const variable_list& grad_grad_inputs) -> varia | ||||
|       gI = apply_fn<Transpose>(0, 1)(gIt); | ||||
|     } | ||||
|   } | ||||
|   return {ggO, gI, gW}; | ||||
|  | ||||
|   if (should_compute_output(0) && !ggO.defined()) ggO = at::zeros_like(gO); | ||||
|   if (should_compute_output(1) && !gI.defined()) gI = at::zeros_like(input); | ||||
|   if (should_compute_output(2) && !gW.defined()) gW = at::zeros_like(weight); | ||||
|   bool is_volatile = std::any_of(grad_grad_inputs.begin(), grad_grad_inputs.end(), [](const Variable& v){ | ||||
|     return v.defined() && v.is_volatile(); | ||||
|   }); | ||||
|   auto results = variable_list({ggO, gI, gW}); | ||||
|   for (auto& result : results) { | ||||
|     result.is_volatile() |= is_volatile; | ||||
|   } | ||||
|   return results; | ||||
| } | ||||
|  | ||||
| auto ConvBackwardBackward::releaseVariables() -> void { | ||||
|  | ||||
| @ -9,7 +9,7 @@ namespace autograd { | ||||
| jit::node_list BatchNormForward::symbolic(SymbolicContext* ctx, jit::node_list inputs) { | ||||
|   auto & g = ctx->graph; | ||||
|   // X, Scale, Bias | ||||
|   auto bn = g->appendNode(g->create(jit::kSpatialBN,{inputs.at(0),inputs.at(1),inputs.at(2)})); | ||||
|   auto bn = g->appendNode(g->create(jit::kBatchNormalization, {inputs.at(0),inputs.at(1),inputs.at(2)})); | ||||
|   bn->addInput(jit::tracer::getBufferTrace(*ctx->buffer_map, running_mean)); | ||||
|   bn->addInput(jit::tracer::getBufferTrace(*ctx->buffer_map, running_var)); | ||||
|   bn->i_(jit::kis_test, !this->training); | ||||
|  | ||||
| @ -18,7 +18,7 @@ namespace torch { namespace autograd { | ||||
| jit::node_list ConvForward::symbolic(SymbolicContext* ctx, jit::node_list inputs) { | ||||
|   auto & g = ctx->graph; | ||||
|   // See Note [Caffe2ConvTranspose] | ||||
|   auto n = g->create(!transposed ? jit::kConv : jit::kCaffe2ConvTranspose, | ||||
|   auto n = g->create(!transposed ? jit::kConv : jit::kConvTranspose, | ||||
|                                    {inputs.at(0), inputs.at(1)}); | ||||
|  | ||||
|   // Irritatingly, Caffe2 requires us to specify kernels, | ||||
| @ -55,6 +55,8 @@ jit::node_list ConvForward::symbolic(SymbolicContext* ctx, jit::node_list inputs | ||||
|   n->i_(jit::kgroup,groups); | ||||
|  | ||||
|   // Not in ONNX? | ||||
|   // TODO: implement it once ConvTranspose in ONNX gets `adj` argument instead | ||||
|   // of providing `output_shape` | ||||
|   for (int p : output_padding) { | ||||
|     JIT_EXPECTM(p == 0, "output padding is not supported."); | ||||
|   } | ||||
|  | ||||
| @ -1,5 +1,6 @@ | ||||
| #include "torch/csrc/autograd/input_buffer.h" | ||||
|  | ||||
| #include "torch/csrc/assertions.h" | ||||
| #include "torch/csrc/autograd/functions/basic_ops.h" | ||||
| #include "torch/csrc/utils/auto_gpu.h" | ||||
|  | ||||
| @ -10,6 +11,7 @@ InputBuffer::InputBuffer(size_t size) | ||||
|   {} | ||||
|  | ||||
| void InputBuffer::add(size_t pos, Variable var) { | ||||
|   TORCH_ASSERT(pos >= 0 && pos < buffer.size()); | ||||
|   if (!var.defined()) { | ||||
|     return; | ||||
|   } | ||||
|  | ||||
| @ -43,6 +43,10 @@ PyObject * THPVariable_Wrap(Variable var) | ||||
|     Py_RETURN_NONE; | ||||
|   } | ||||
|  | ||||
|   if (var.dim() == 0) { | ||||
|     throw std::runtime_error("Variable API does not support Scalars"); | ||||
|   } | ||||
|  | ||||
|   if (auto obj = var.get()->pyobj) { | ||||
|     Py_INCREF(obj); | ||||
|     return obj; | ||||
| @ -96,26 +100,21 @@ static int THPVariable_traverse(THPVariable *self, visitproc visit, void *arg) | ||||
| { | ||||
|   Py_VISIT(self->data); | ||||
|   Py_VISIT(self->backward_hooks); | ||||
|   // We don't want to traverse the grad_fn, even if the Variable owns it and the | ||||
|   // shared pointer's use count is 1. This is because we would need to treat | ||||
|   // the grad_fn as part of the Python state and hold the GIL sometimes when | ||||
|   // grad_fn's shared_ptr is copied, otherwise a race condition with the Python | ||||
|   // GC could occur. Holding the GIL when the shared_ptr is copied adds | ||||
|   // undesirable complexity/overhead. | ||||
|   // | ||||
|   // When hooks, a Variable, and its grad_fn are involved in a Python reference | ||||
|   // cycle, because we're not traversing the grad_fn, the reference cycle will | ||||
|   // in fact leak. | ||||
|   // | ||||
|   // See https://gist.github.com/zou3519/7ac92b84dd7d206dcc6eae55fee8372c | ||||
|   // for more details about the race condition involving traversing the grad_fn | ||||
|   // and the python GC. | ||||
|   if (self->cdata.defined()) { | ||||
|     // Only visit this if we actually own it (no one else use the shared pointer) | ||||
|     auto& grad_fn = self->cdata.grad_fn(); | ||||
|     if (grad_fn.use_count() == 1) { | ||||
|       if (auto fn = dynamic_cast<PyFunction*>(grad_fn.get())) { | ||||
|         Py_VISIT(fn->obj); | ||||
|       } else { | ||||
|         // visit hooks in C++ implemented autograd functions | ||||
|         for (auto& hook : grad_fn->pre_hooks) { | ||||
|           if (auto pyhook = dynamic_cast<PyFunctionPreHook*>(hook.get())) { | ||||
|             Py_VISIT(pyhook->dict); | ||||
|           } | ||||
|         } | ||||
|         for (auto& hook : grad_fn->post_hooks) { | ||||
|           if (auto pyhook = dynamic_cast<PyFunctionPostHook*>(hook.get())) { | ||||
|             Py_VISIT(pyhook->dict); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|     for (auto& hook : self->cdata.hooks()) { | ||||
|       if (auto pyhook = dynamic_cast<PyFunctionPreHook*>(hook.get())) { | ||||
|         Py_VISIT(pyhook->dict); | ||||
|  | ||||
| @ -13,6 +13,10 @@ | ||||
| namespace torch { namespace autograd { namespace utils { | ||||
|  | ||||
| inline PyObject* wrap(at::Tensor tensor) { | ||||
|   if (tensor.defined() && tensor.dim() == 0) { | ||||
|     // don't expose 0-dim tensors to Variable API. | ||||
|     Variable(tensor).data().as_strided_({1}, {1}); | ||||
|   } | ||||
|   return THPVariable_Wrap(Variable(std::move(tensor))); | ||||
| } | ||||
|  | ||||
| @ -54,6 +58,10 @@ inline PyObject* wrap(int64_t value) { | ||||
|   return THPUtils_packInt64(value); | ||||
| } | ||||
|  | ||||
| inline PyObject* wrap(void* value) { | ||||
|   return THPUtils_packInt64(reinterpret_cast<intptr_t>(value)); | ||||
| } | ||||
|  | ||||
| inline PyObject* wrap(at::Scalar scalar) { | ||||
|   return wrap(scalar.toTensor()); | ||||
| } | ||||
|  | ||||
| @ -133,6 +133,18 @@ PyObject * THCPModule_getDeviceName_wrap(PyObject *self, PyObject *arg) | ||||
|   END_HANDLE_TH_ERRORS | ||||
| } | ||||
|  | ||||
| PyObject * THCPModule_getDeviceCapability_wrap(PyObject *self, PyObject *arg) | ||||
| { | ||||
|   HANDLE_TH_ERRORS | ||||
|   THPUtils_assert(THPUtils_checkLong(arg), "invalid argument to getDeviceCapability"); | ||||
|   long device = THPUtils_unpackLong(arg); | ||||
|  | ||||
|   cudaDeviceProp prop; | ||||
|   THCudaCheck(cudaGetDeviceProperties(&prop, device)); | ||||
|   return Py_BuildValue("(ii)", prop.major, prop.minor); | ||||
|   END_HANDLE_TH_ERRORS | ||||
| } | ||||
|  | ||||
| PyObject * THCPModule_getCurrentStream_wrap(PyObject *self) | ||||
| { | ||||
|   HANDLE_TH_ERRORS | ||||
| @ -174,6 +186,11 @@ PyObject * THCPModule_getDriverVersion(PyObject *self) | ||||
|   return PyLong_FromLong((long) driverVersion); | ||||
| } | ||||
|  | ||||
| PyObject * THCPModule_getCompiledVersion(PyObject *self) | ||||
| { | ||||
|   return PyLong_FromLong((long) CUDA_VERSION); | ||||
| } | ||||
|  | ||||
| PyObject * THCPModule_getRNGState(PyObject *_unused) | ||||
| { | ||||
|   HANDLE_TH_ERRORS | ||||
| @ -297,6 +314,15 @@ PyObject * THCPModule_cudaUnlockMutex(PyObject *module) | ||||
|   Py_RETURN_NONE; | ||||
| } | ||||
|  | ||||
| PyObject * THCPModule_emptyCache(PyObject *_unused) | ||||
| { | ||||
|   HANDLE_TH_ERRORS | ||||
|   auto device_allocator = THCState_getDeviceAllocator(state); | ||||
|   THCudaCheck(device_allocator->emptyCache(device_allocator->state)); | ||||
|   END_HANDLE_TH_ERRORS | ||||
|   Py_RETURN_NONE; | ||||
| } | ||||
|  | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| // Cuda module initialization | ||||
| //////////////////////////////////////////////////////////////////////////////// | ||||
| @ -369,13 +395,16 @@ static struct PyMethodDef _THCPModule_methods[] = { | ||||
|   {"_cuda_getDevice",   (PyCFunction)THCPModule_getDevice_wrap,   METH_NOARGS,  NULL}, | ||||
|   {"_cuda_getDeviceCount", (PyCFunction)THCPModule_getDeviceCount_wrap, METH_NOARGS, NULL}, | ||||
|   {"_cuda_getDeviceName", (PyCFunction)THCPModule_getDeviceName_wrap, METH_O,   NULL}, | ||||
|   {"_cuda_getDeviceCapability", (PyCFunction)THCPModule_getDeviceCapability_wrap, METH_O,   NULL}, | ||||
|   {"_cuda_getCurrentStream", (PyCFunction)THCPModule_getCurrentStream_wrap, METH_NOARGS, NULL}, | ||||
|   {"_cuda_getCurrentBlasHandle", (PyCFunction)THCPModule_getCurrentBlasHandle_wrap, METH_NOARGS, NULL}, | ||||
|   {"_cuda_setStream",    (PyCFunction)THCPModule_setStream_wrap,  METH_O, NULL}, | ||||
|   {"_cuda_isDriverSufficient", (PyCFunction)THCPModule_isDriverSufficient, METH_NOARGS, NULL}, | ||||
|   {"_cuda_getDriverVersion", (PyCFunction)THCPModule_getDriverVersion, METH_NOARGS, NULL}, | ||||
|   {"_cuda_getCompiledVersion", (PyCFunction)THCPModule_getCompiledVersion, METH_NOARGS, NULL}, | ||||
|   {"_cuda_getRNGState", (PyCFunction)THCPModule_getRNGState,      METH_NOARGS,  NULL}, | ||||
|   {"_cuda_setRNGState", (PyCFunction)THCPModule_setRNGState,      METH_O,       NULL}, | ||||
|   {"_cuda_emptyCache", (PyCFunction) THCPModule_emptyCache,       METH_NOARGS,  NULL}, | ||||
|   {"_cuda_manualSeed",  (PyCFunction)THCPModule_manualSeed,       METH_O,       NULL}, | ||||
|   {"_cuda_manualSeedAll", (PyCFunction)THCPModule_manualSeedAll,  METH_O,       NULL}, | ||||
|   {"_cuda_seed",        (PyCFunction)THCPModule_seed,             METH_NOARGS,  NULL}, | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| #define __STDC_FORMAT_MACROS | ||||
|  | ||||
| #include <Python.h> | ||||
| #include <structmember.h> | ||||
|  | ||||
|  | ||||
| @ -1,3 +1,5 @@ | ||||
| #define __STDC_FORMAT_MACROS | ||||
|  | ||||
| #include <Python.h> | ||||
| #include <structmember.h> | ||||
|  | ||||
|  | ||||
| @ -228,12 +228,12 @@ struct algorithm_search<cudnnConvolutionFwdAlgo_t> { | ||||
|         conv.cdesc.desc, | ||||
|         conv.odesc.desc, | ||||
|         out, | ||||
|         1, | ||||
|         n_algo, | ||||
|         &algoCount, | ||||
|         perfResults, | ||||
|         ws.data, | ||||
|         ws.size)); | ||||
|     return getBestAlgorithm<cudnnConvolutionFwdAlgoPerf_t>(perfResults, deterministic, n_algo); | ||||
|     return getBestAlgorithm<cudnnConvolutionFwdAlgoPerf_t>(perfResults, deterministic, algoCount); | ||||
|   } | ||||
|  | ||||
|   static void getAlgorithm( | ||||
| @ -302,12 +302,12 @@ struct algorithm_search<cudnnConvolutionBwdDataAlgo_t> { | ||||
|         conv.cdesc.desc, | ||||
|         conv.idesc.desc, | ||||
|         in, | ||||
|         1, | ||||
|         n_algo, | ||||
|         &algoCount, | ||||
|         perfResults, | ||||
|         ws.data, | ||||
|         ws.size)); | ||||
|     return getBestAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>(perfResults, deterministic, n_algo); | ||||
|     return getBestAlgorithm<cudnnConvolutionBwdDataAlgoPerf_t>(perfResults, deterministic, algoCount); | ||||
|   } | ||||
|  | ||||
|   static void getAlgorithm(cudnnHandle_t handle, const Convolution& conv, cudnnConvolutionBwdDataAlgo_t* algo) { | ||||
| @ -376,12 +376,12 @@ struct algorithm_search<cudnnConvolutionBwdFilterAlgo_t> { | ||||
|         conv.cdesc.desc, | ||||
|         conv.wdesc.desc, | ||||
|         wght, | ||||
|         1, | ||||
|         n_algo, | ||||
|         &algoCount, | ||||
|         perfResults, | ||||
|         ws.data, | ||||
|         ws.size)); | ||||
|     return getBestAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>(perfResults, deterministic, n_algo); | ||||
|     return getBestAlgorithm<cudnnConvolutionBwdFilterAlgoPerf_t>(perfResults, deterministic, algoCount); | ||||
|   } | ||||
|  | ||||
|   static void getAlgorithm( | ||||
|  | ||||
| @ -36,7 +36,7 @@ | ||||
| #define COPY_FROM_ARRAY_CUDA(ELTYPE, ARRAY, STORAGE, SIZE) \ | ||||
| { \ | ||||
|   ELTYPE *arrdata = (ELTYPE*)PyArray_DATA(ARRAY);              \ | ||||
|   std::unique_ptr<load_real> data_guard(new load_real[SIZE]);  \ | ||||
|   std::unique_ptr<load_real[]> data_guard(new load_real[SIZE]);  \ | ||||
|   load_real *data = data_guard.get();                          \ | ||||
|   for (size_t i=0; i<SIZE; i++) {                              \ | ||||
|     data[i] = arrdata[i];                                      \ | ||||
| @ -51,7 +51,7 @@ | ||||
| #define COPY_FROM_ARRAY_CUDA_HALF(ELTYPE, ARRAY, STORAGE, SIZE) \ | ||||
| { \ | ||||
|   ELTYPE *arrdata = (ELTYPE*)PyArray_DATA(ARRAY);                  \ | ||||
|   std::unique_ptr<load_real> data_guard(new load_real[SIZE]);      \ | ||||
|   std::unique_ptr<load_real[]> data_guard(new load_real[SIZE]);      \ | ||||
|   load_real *data = data_guard.get();                              \ | ||||
|   for (size_t i=0; i<SIZE; i++) {                                  \ | ||||
|     data[i] = arrdata[i];                                          \ | ||||
| @ -379,7 +379,7 @@ static PyObject * THPTensor_(pynew)(PyTypeObject *type, PyObject *args, PyObject | ||||
|     real *data = tensor->storage->data; | ||||
| #else | ||||
|     size_t numel = THTensor_(numel)(LIBRARY_STATE tensor); | ||||
|     std::unique_ptr<load_real> data_guard(new load_real[numel]); | ||||
|     std::unique_ptr<load_real[]> data_guard(new load_real[numel]); | ||||
|     load_real *data = data_guard.get(); | ||||
| #endif | ||||
|     THPObjectPtr final_sequence; | ||||
| @ -778,7 +778,7 @@ static bool THPTensor_(_convertToTensorIndexers)( | ||||
|   // store THPTensors rather than THTensors. | ||||
|  | ||||
|   std::vector<Py_ssize_t> indexingDims; | ||||
|   std::vector<THPIndexTensor*>indexers; | ||||
|   std::vector<THPPointer<THPIndexTensor>> indexers; | ||||
|  | ||||
|   if (THPTensor_(_checkSingleSequenceTriggersAdvancedIndexing)(index)) { | ||||
|     // Handle the special case where we only have a single indexer | ||||
| @ -791,7 +791,7 @@ static bool THPTensor_(_convertToTensorIndexers)( | ||||
|       return false; | ||||
|     } | ||||
|     indexingDims.push_back(0); | ||||
|     indexers.push_back(indexer); | ||||
|     indexers.push_back(THPPointer<THPIndexTensor>(indexer)); | ||||
|   } else { | ||||
|     // The top-level indexer should be a sequence, per the check above | ||||
|     THPObjectPtr fast(PySequence_Fast(index, NULL)); | ||||
| @ -827,15 +827,10 @@ static bool THPTensor_(_convertToTensorIndexers)( | ||||
|               "convertible to LongTensors. The indexing object at position %zd is of type %s " | ||||
|               "and cannot be converted", i, THPUtils_typename(obj)); | ||||
|  | ||||
|           // Clean up Indexers | ||||
|           for (auto& idx : indexers) { | ||||
|             THIndexTensor_(free)(LIBRARY_STATE idx->cdata); | ||||
|             Py_DECREF(idx); | ||||
|           } | ||||
|           return false; | ||||
|         } | ||||
|         indexingDims.push_back(i + ellipsisOffset); | ||||
|         indexers.push_back(indexer); | ||||
|         indexers.push_back(THPPointer<THPIndexTensor>(indexer)); | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| @ -849,7 +844,7 @@ static bool THPTensor_(_convertToTensorIndexers)( | ||||
|   for (const auto& indexer : indexers) { | ||||
|     maybeBroadcasted.emplace_back(THIndexTensor_(new)(LIBRARY_STATE_NOARGS)); | ||||
|     // borrow the underlying Tensor from the indexer map | ||||
|     candidates.emplace_back(indexer->cdata); | ||||
|     candidates.emplace_back(indexer.get()->cdata); | ||||
|   } | ||||
|  | ||||
|   // Broadcast/Expand indexing Tensors as necessary | ||||
| @ -888,11 +883,6 @@ static bool THPTensor_(_convertToTensorIndexers)( | ||||
|               "for dimension %lld (of size %lld)", | ||||
|               (long long)indexAtDim, (long long)dim, (long long)sizeAtDim); | ||||
|  | ||||
|           // Clean up Indexers | ||||
|           for (auto& idx : indexers) { | ||||
|             THIndexTensor_(free)(LIBRARY_STATE idx->cdata); | ||||
|             Py_DECREF(idx); | ||||
|           } | ||||
|  | ||||
|           return false; | ||||
|         } | ||||
| @ -907,19 +897,9 @@ static bool THPTensor_(_convertToTensorIndexers)( | ||||
|     } | ||||
|     PyErr_Format(PyExc_IndexError, "The advanced indexing objects could not be broadcast"); | ||||
|  | ||||
|     // Clean up Indexers | ||||
|     for (auto& idx : indexers) { | ||||
|       THIndexTensor_(free)(LIBRARY_STATE idx->cdata); | ||||
|       Py_DECREF(idx); | ||||
|     } | ||||
|     return false; | ||||
|   } | ||||
|  | ||||
|   // Clean up Indexers | ||||
|   for (auto& idx : indexers) { | ||||
|     THIndexTensor_(free)(LIBRARY_STATE idx->cdata); | ||||
|     Py_DECREF(idx); | ||||
|   } | ||||
|   return true; | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -761,6 +761,12 @@ PyObject * THPTensor_(stride)(PyObject *self, PyObject *args, PyObject *kwargs) | ||||
|         - accreal start | ||||
|         - accreal end | ||||
|         - CONSTANT 1 | ||||
|       - arguments: | ||||
|         - arg: THTensor* result | ||||
|           output: True | ||||
|         - CONSTANT 0 | ||||
|         - accreal end | ||||
|         - CONSTANT 1 | ||||
| ]] | ||||
|  | ||||
| [[ | ||||
|  | ||||
| @ -78,7 +78,7 @@ using GraphsAttr = VectorAttributeValue<std::shared_ptr<Graph>,AttributeKind::gs | ||||
|  | ||||
| // CRTP so that Node which inherits Attributes can be return for | ||||
| // method chaining e.g: | ||||
| // Node * n = g->create(kSelect)->set_i(kOffset,3)->set_f(kValue,3.5); | ||||
| // Node * n = g->create(kSelect)->i_(kOffset,3)->f_(kValue,3.5); | ||||
| // we return Derived* pointers because Nodes are normally held as pointers. | ||||
| template<typename Derived> | ||||
| struct Attributes { | ||||
|  | ||||
| @ -69,7 +69,8 @@ void encodeTensor(onnx::TensorProto * p, const at::Tensor & tensor) { | ||||
|       break; | ||||
|   } | ||||
|   p->set_data_type(onnx_type); | ||||
|   at::Tensor cont = tensor.toType(at::CPU(at_type)).contiguous(); | ||||
|   // CPU's HalfTensor doesn't have contiguous(), so first calling contiguous() | ||||
|   at::Tensor cont = tensor.contiguous().toType(at::CPU(at_type)); | ||||
|   p->set_raw_data(cont); | ||||
| } | ||||
|  | ||||
| @ -79,40 +80,50 @@ void addAttribute(onnx::NodeProto * n_p, jit::Node * n, jit::Symbol name) { | ||||
|   switch(n->kindOf(name)) { | ||||
|     case AttributeKind::f: | ||||
|       attr->set_f(n->f(name)); | ||||
|       attr->set_type(onnx::aFLOAT); | ||||
|       break; | ||||
|     case AttributeKind::fs: | ||||
|       attr->set_type(onnx::aFLOATS); | ||||
|       for(auto & v : n->fs(name)) | ||||
|         attr->add_floats(v); | ||||
|       break; | ||||
|     case AttributeKind::i: | ||||
|       attr->set_type(onnx::aINT); | ||||
|       attr->set_i(n->i(name)); | ||||
|       break; | ||||
|     case AttributeKind::is: | ||||
|       attr->set_type(onnx::aINTS); | ||||
|       for(auto & v : n->is(name)) | ||||
|         attr->add_ints(v); | ||||
|       break; | ||||
|     case AttributeKind::s: | ||||
|       attr->set_type(onnx::aSTRING); | ||||
|       attr->set_s(n->s(name)); | ||||
|       break; | ||||
|     case AttributeKind::ss: | ||||
|       attr->set_type(onnx::aSTRINGS); | ||||
|       for(auto & v : n->ss(name)) | ||||
|         attr->add_strings(v); | ||||
|       break; | ||||
|     case AttributeKind::t: { | ||||
|       attr->set_type(onnx::aTENSOR); | ||||
|       auto t = attr->mutable_t(); | ||||
|       encodeTensor(t, n->t(name)); | ||||
|     } break; | ||||
|     case AttributeKind::ts: | ||||
|       attr->set_type(onnx::aTENSORS); | ||||
|       for(auto & v : n->ts(name)) { | ||||
|         auto t = attr->add_tensors(); | ||||
|         encodeTensor(t, v); | ||||
|       } | ||||
|       break; | ||||
|     case AttributeKind::g: { | ||||
|       attr->set_type(onnx::aGRAPH); | ||||
|       auto g = attr->mutable_g(); | ||||
|       encodeGraph(g, n->g(name), {}); | ||||
|     } break; | ||||
|     case AttributeKind::gs: | ||||
|       attr->set_type(onnx::aGRAPHS); | ||||
|       for(auto & v : n->gs(name)) { | ||||
|         auto g = attr->add_graphs(); | ||||
|         encodeGraph(g, v, {}); | ||||
| @ -191,6 +202,9 @@ void encodeGraph(onnx::GraphProto * p_g, const std::shared_ptr<Graph> & g, const | ||||
|       continue; | ||||
|     } | ||||
|     auto p_n = p_g->add_node(); | ||||
|     if (node->getSourceLocation()) { | ||||
|       p_n->set_doc_string(node->getSourceLocation()->python_traceback); | ||||
|     } | ||||
|     for(auto input : node->inputs()) { | ||||
|       p_n->add_input(node_name(input)); | ||||
|     } | ||||
| @ -256,11 +270,18 @@ void validateGraph(const std::shared_ptr<Graph>& graph) { | ||||
| } | ||||
|  | ||||
| std::string ExportGraph(const std::shared_ptr<Graph>& graph, | ||||
|                         const std::vector<at::Tensor> & initializers) { | ||||
|                         const std::vector<at::Tensor> & initializers, | ||||
|                         int64_t onnx_opset_version) { | ||||
|  | ||||
|   validateGraph(graph); | ||||
|  | ||||
|   onnx::ModelProto model_proto; | ||||
|   model_proto.set_producer_name("pytorch"); | ||||
|   model_proto.set_producer_version("0.3"); | ||||
|   auto* imp = model_proto.add_opset_import(); | ||||
|   // This is the version of ONNX operator set we are targeting | ||||
|   imp->set_version(onnx_opset_version); | ||||
|  | ||||
|   // Set up nanopb callbacks and compute the amount of space needed to store | ||||
|   // the resulting protobuf | ||||
|   encodeModel(&model_proto, graph, initializers); | ||||
|  | ||||
| @ -5,6 +5,7 @@ | ||||
| namespace torch { namespace jit { | ||||
|  | ||||
| std::string ExportGraph(const std::shared_ptr<Graph>& graph, | ||||
|                         const std::vector<at::Tensor> & initializers); | ||||
|                         const std::vector<at::Tensor> & initializers, | ||||
|                         int64_t onnx_opset_version); | ||||
|  | ||||
| }} | ||||
|  | ||||
| @ -261,6 +261,14 @@ CompiledFusionFunction::CompiledFusionFunction(const std::string & name, Annotat | ||||
|   , output_desc(agraph.output_desc) { | ||||
|   JIT_CUDA_CHECK(cudaGetDevice(&device)); | ||||
|   JIT_CUDA_CHECK(cudaGetDeviceProperties(&prop, device)); | ||||
|   if ((prop.major >= 6 && CUDA_VERSION < 8000) || | ||||
|       (prop.major >= 7 && CUDA_VERSION < 9000)) { | ||||
|     std::stringstream err_string; | ||||
|     err_string << "PyTorch compiled with insufficient CUDA version: "  | ||||
| 	       << CUDA_VERSION << " for the current GPU device " << prop.name  | ||||
| 	       << " with device capability " << prop.major << "." << prop.minor; | ||||
|     throw std::runtime_error(err_string.str()); | ||||
|   } | ||||
|  | ||||
|   std::stringstream cu; | ||||
|   concat_desc = codegen::emitCompilationUnit(cu, name, agraph); | ||||
|  | ||||
| @ -43,9 +43,8 @@ _(split) \ | ||||
| _(Offset) \ | ||||
| _(value) \ | ||||
| _(Subgraph) \ | ||||
| _(SpatialBN) \ | ||||
| _(BatchNormalization) \ | ||||
| _(Conv) \ | ||||
| _(Caffe2ConvTranspose) \ | ||||
| _(ConvTranspose) \ | ||||
| _(is_test) \ | ||||
| _(epsilon) \ | ||||
| @ -75,6 +74,8 @@ _(shape) \ | ||||
| _(axes) \ | ||||
| _(group) \ | ||||
| _(inplace) \ | ||||
| _(transA) \ | ||||
| _(transB) \ | ||||
| _(other) | ||||
|  | ||||
| enum BuiltinSymbol { | ||||
|  | ||||
| @ -41,6 +41,7 @@ void printNodeRef(std::ostream & out, const Node * n) { | ||||
| template <typename T> | ||||
| std::ostream& operator<<(std::ostream & out, const std::vector<T> & nodes) { | ||||
|   out << at::ArrayRef<T>{nodes}; | ||||
|   return out; | ||||
| } | ||||
|  | ||||
| template <typename T> | ||||
| @ -262,7 +263,15 @@ std::ostream& printNode(std::ostream & out, const Node * n, std::vector<const No | ||||
|   } else { | ||||
|     emitUses(out,n); | ||||
|   } | ||||
|   out << "];\n"; | ||||
|   out << "]"; | ||||
|   std::string scopeName = n->scopeName(); | ||||
|   if (scopeName.empty()) { | ||||
|     out << ";\n"; | ||||
|   } | ||||
|   else { | ||||
|     out << ", "; | ||||
|     out << "scope: " << scopeName << ";\n"; | ||||
|   } | ||||
|   return out; | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -60,6 +60,73 @@ static inline bool operator==(const Use & a, const Use & b) { | ||||
| // Graph holds a list of parameters. | ||||
| struct Param; | ||||
|  | ||||
| // SourceLocation represents source code-level debug information for a node. | ||||
| // It contains a Python stack trace that represents the provenance of a given | ||||
| // node in the trace. | ||||
| struct SourceLocation { | ||||
|   SourceLocation(std::string python_traceback) | ||||
|   : python_traceback(std::move(python_traceback)) {} | ||||
|   std::string python_traceback; | ||||
| }; | ||||
|  | ||||
| // Scope is a node of a trie that represents the tree of nested scopes. | ||||
| // Individual scopes are pushed and popped from Graph, which holds a | ||||
| // pointer to the current scope. Each Node in Graph holds a pointer | ||||
| // to the scope that was current when the node was created. | ||||
| // The trie never needs to shrink, it only grows until it is disposed | ||||
| // of when Graph is deallocated. Hence, pointers to scopes held by nodes | ||||
| // will always be valid as long as Graph is alive. | ||||
| struct Scope { | ||||
| private: | ||||
|   Scope* parent_; | ||||
|   Symbol name_; | ||||
|   std::vector<std::unique_ptr<Scope> > children_; | ||||
| public: | ||||
|   Scope() { | ||||
|     name_ = stringToSymbol(""); | ||||
|     parent_ = NULL; | ||||
|   } | ||||
|   Scope(Scope* parent, Symbol name) { | ||||
|     name_ = name; | ||||
|     parent_ = parent; | ||||
|   } | ||||
|   Scope* push(Symbol name) { | ||||
|     children_.push_back(std::unique_ptr<Scope>(new Scope(this, name))); | ||||
|     return children_.back().get(); | ||||
|   } | ||||
|   Scope* parent() { | ||||
|     if (parent_ == NULL) { | ||||
|       throw std::runtime_error("Cannot get parent from Scope with no parent"); | ||||
|     } | ||||
|     return parent_; | ||||
|   } | ||||
|   bool isRoot() { | ||||
|     return parent_ == NULL; | ||||
|   } | ||||
|   Scope* getRoot() { | ||||
|     Scope* current = this; | ||||
|     while (current->parent_) { | ||||
|       current = current->parent_; | ||||
|     } | ||||
|     return current; | ||||
|   } | ||||
|   Symbol name() { | ||||
|     return name_; | ||||
|   } | ||||
|   std::string namesFromRoot(const std::string& separator="/") { | ||||
|     std::string out = std::string(symbolToString(this->name_)); | ||||
|     if (this->isRoot()) { | ||||
|       return out; | ||||
|     } | ||||
|     Scope* parent = this->parent_; | ||||
|     while (!parent->isRoot()) { | ||||
|       out = std::string(symbolToString(parent->name_)) + separator + out; | ||||
|       parent = parent->parent_; | ||||
|     } | ||||
|     return out; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| // the list types are intentionally simple, but we type-def | ||||
| // them here so if we need to change them, refactoring will be easier | ||||
| using node_list = std::vector<Node*>; | ||||
| @ -113,6 +180,8 @@ private: | ||||
|   size_t unique_ = 0;          // unique id | ||||
|   size_t stage_ = 0;           // 0-forward, 1-backward, 2-double-backward,... | ||||
|   std::string debug_name_; | ||||
|   std::shared_ptr<SourceLocation> source_location_; | ||||
|   Scope* scope_; | ||||
| protected: | ||||
|   TypePtr type_; | ||||
|   Node(Graph * graph_, NodeKind kind_); //defined after graph | ||||
| @ -150,6 +219,13 @@ public: | ||||
|   const std::string & debugName() const { | ||||
|     return debug_name_; | ||||
|   } | ||||
|   Node* setSourceLocation(std::shared_ptr<SourceLocation> sl) { | ||||
|     source_location_ = sl; | ||||
|     return this; | ||||
|   } | ||||
|   std::shared_ptr<SourceLocation> getSourceLocation() const { | ||||
|     return source_location_; | ||||
|   } | ||||
|   Graph * owningGraph() { | ||||
|     return graph_; | ||||
|   } | ||||
| @ -171,6 +247,18 @@ public: | ||||
|   size_t stage() const { | ||||
|     return stage_; | ||||
|   } | ||||
|   Scope* scope() { | ||||
|     return scope_; | ||||
|   } | ||||
|   void setScope(Scope* scope) { | ||||
|     scope_ = scope; | ||||
|   } | ||||
|   std::string scopeName() const { | ||||
|     if (scope_ == NULL) { | ||||
|       return ""; | ||||
|     } | ||||
|     return scope_->namesFromRoot(); | ||||
|   } | ||||
|   // NB: This returns an ArrayRef; that means that it will | ||||
|   // get invalidated if you resize inputs (e.g., using addInput) | ||||
|   // We can't return a std::vector<Node*>& because there's no | ||||
| @ -511,11 +599,7 @@ protected: | ||||
|   // | ||||
|   // NB: This does NOT clone stages.  You're expected to set the stage correctly | ||||
|   // if you are going to preserve it. | ||||
|   virtual void cloneFrom(Node * s) { | ||||
|     if (s->hasType()) setType(s->type()); | ||||
|     setDebugName(s->debugName()); | ||||
|     copyAttributes(*s); | ||||
|   } | ||||
|   virtual void cloneFrom(Node * s); | ||||
| }; | ||||
|  | ||||
| struct Graph { | ||||
| @ -533,6 +617,9 @@ private: | ||||
|  | ||||
|   size_t new_node_stage_; | ||||
|  | ||||
|   std::shared_ptr<Scope> scope_root_; | ||||
|   Scope * current_scope_; | ||||
|  | ||||
|   // holds outputs in a way that can be reflected | ||||
|   // as a Use object | ||||
|   // also used as the beginning/end of the circular node list to avoid | ||||
| @ -540,11 +627,17 @@ private: | ||||
|   Node * const output_; | ||||
|  | ||||
| public: | ||||
|   Graph() | ||||
|  | ||||
|   Graph(std::shared_ptr<Scope> scope_root) | ||||
|   : next_unique_(0) | ||||
|   , new_node_stage_(0) | ||||
|   , scope_root_(scope_root) | ||||
|   , current_scope_(scope_root_.get()) | ||||
|   , output_(initOutput(create(kReturn))) {} | ||||
|  | ||||
|   Graph() | ||||
|   : Graph( std::make_shared<Scope>()) {} | ||||
|  | ||||
|   at::ArrayRef<Node*> inputs() { | ||||
|     return inputs_; | ||||
|   } | ||||
| @ -600,6 +693,29 @@ public: | ||||
|   Node * addInput() { | ||||
|     return addInput(create(kParam)); | ||||
|   } | ||||
|   void push_scope(const std::string& scope_name) { | ||||
|     current_scope_ = current_scope_->push(stringToSymbol(scope_name)); | ||||
|   } | ||||
|   void pop_scope() { | ||||
|     current_scope_ = current_scope_->parent(); | ||||
|   } | ||||
|   Scope * current_scope() { | ||||
|     return current_scope_; | ||||
|   } | ||||
|   void set_current_scope(Scope* scope) { | ||||
|     if (scope->getRoot() != scope_root_.get()) { | ||||
|       throw std::runtime_error("trying to set a scope as current that does not belong to the Graph's scope trie"); | ||||
|     } | ||||
|     current_scope_ = scope; | ||||
|   } | ||||
|   ResourceGuard set_current_scope_temporary(Scope* scope) { | ||||
|     auto prev_scope = current_scope_; | ||||
|     this->set_current_scope(scope); | ||||
|     return ResourceGuard([prev_scope, this]() { this->current_scope_ = prev_scope; }); | ||||
|   } | ||||
|   std::shared_ptr<Scope> scope_root() { | ||||
|     return scope_root_; | ||||
|   } | ||||
|  | ||||
|   Node * addInput(Node* n) { | ||||
|     JIT_ASSERT(n->kind() == kParam); | ||||
| @ -676,7 +792,8 @@ public: | ||||
|   } | ||||
|   Node * createFusionGroup() { | ||||
|     auto n = create(kFusionGroup); | ||||
|     n->g_(kSubgraph,std::make_shared<Graph>()); | ||||
|     auto subgraph = std::make_shared<Graph>(scope_root_); | ||||
|     n->g_(kSubgraph, subgraph); | ||||
|     return n; | ||||
|   } | ||||
|   Node * createPythonOp(THPObjectPtr&& pyobj, const std::string & cconv, bool is_legacy, pyobj_list&& scalar_args); | ||||
| @ -746,9 +863,10 @@ inline Node::Node(Graph * graph_, NodeKind kind_) : | ||||
|   graph_(graph_), | ||||
|   unique_(graph_->next_unique_++), | ||||
|   stage_(graph_->new_node_stage_), | ||||
|   scope_(graph_->current_scope_) , | ||||
|   type_(getInitialType(kind_)) { | ||||
|   graph_->all_nodes.emplace(this); | ||||
| } | ||||
|     graph_->all_nodes.emplace(this); | ||||
|   } | ||||
|  | ||||
| inline void Node::destroy() { | ||||
|   JIT_ASSERT(inGraphList()); | ||||
| @ -770,6 +888,16 @@ inline Node* Node::makeMultireturn() { | ||||
|   return select; | ||||
| } | ||||
|  | ||||
| inline void Node::cloneFrom(Node * s) { | ||||
|   if (s->hasType()) setType(s->type()); | ||||
|   setDebugName(s->debugName()); | ||||
|   setSourceLocation(s->getSourceLocation()); | ||||
| 	if (s->owningGraph()->scope_root_ == owningGraph()->scope_root_) { | ||||
|     scope_ = s->scope_; | ||||
|   } | ||||
|   copyAttributes(*s); | ||||
| } | ||||
|  | ||||
| // Helper macros for constructing switch statements over Node types | ||||
| // instead of heavy-weight visitors | ||||
| // read 'between' these defines to see how they turn into a big switch | ||||
|  | ||||
| @ -33,7 +33,7 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) { | ||||
|     throw std::logic_error("ToONNX: tracing state is expired"); | ||||
|   } | ||||
|  | ||||
|   auto new_graph = std::make_shared<Graph>(); | ||||
|   auto new_graph = std::make_shared<Graph>(state->graph->scope_root()); | ||||
|   std::unordered_map<void*, Node*> new_buffer_map; | ||||
|  | ||||
|   torch::autograd::SymbolicContext ctx; | ||||
| @ -86,6 +86,9 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) { | ||||
|         if (!outputs[i]->hasType()) { | ||||
|           outputs[i]->setType(old->typeOption()); | ||||
|         } | ||||
|         // Copy over source location information to all nodes created by | ||||
|         // the symbolic | ||||
|         outputs[i]->setSourceLocation(node->getSourceLocation()); | ||||
|         env[old] = outputs[i]; | ||||
|       } else { | ||||
|         // Null output means that the ONNX op doesn't have outputs corresponding | ||||
| @ -121,6 +124,31 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) { | ||||
|     } | ||||
|   }; | ||||
|  | ||||
|   // Cast output of symbolic() python implementation | ||||
|   auto processSymbolicOutput = [&](const std::string& op_name, Node* n, const py::object& raw_output) { | ||||
|     if (raw_output.ptr() == Py_None) { | ||||
|       cloneNode(n); | ||||
|       return; | ||||
|     } | ||||
|     // Cast the outputs back to C++ and put them in the new graph | ||||
|     std::vector<Node*> outputs; | ||||
|     try { | ||||
|       if (py::isinstance<Node>(raw_output)) { | ||||
|         outputs = node_list{py::cast<Node*>(raw_output)}; | ||||
|       } else { | ||||
|         outputs = py::cast<std::vector<Node*>>(raw_output); | ||||
|       } | ||||
|     } catch (const std::exception& ex) { | ||||
|       std::ostringstream ss; | ||||
|       ss << "Error casting results of symbolic for " << op_name | ||||
|          << ": expected to return list of op nodes, instead received type ''" | ||||
|          << py::str(raw_output.get_type()) << "': " << py::str(raw_output); | ||||
|       throw std::runtime_error(ss.str()); | ||||
|     } | ||||
|  | ||||
|     setOutputs(op_name, n, outputs); | ||||
|   }; | ||||
|  | ||||
|   auto callPySymbolicFunction = [&](Node* n) { | ||||
|     // The idea is delegate as much of the actual argument massaging to | ||||
|     // Python as possible | ||||
| @ -131,21 +159,11 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) { | ||||
|         py_inputs[input_nr++] = py::cast(envFn(input)); | ||||
|     } | ||||
|  | ||||
|     auto scope_guard = ctx.graph->set_current_scope_temporary(n->scope()); | ||||
|  | ||||
|     py::object raw_output = onnx.attr("_run_symbolic_function")(ctx.graph, n, py_inputs); | ||||
|  | ||||
|     if (raw_output.ptr() == Py_None) { | ||||
|       cloneNode(n); | ||||
|     } else { | ||||
|       // Cast the outputs back to C++ and put them in the new graph | ||||
|       node_list outputs; | ||||
|       if (py::isinstance<Node>(raw_output)) { | ||||
|         outputs = node_list{py::cast<Node*>(raw_output)}; | ||||
|       } else { | ||||
|         outputs = py::cast<std::vector<Node*>>(raw_output); | ||||
|       } | ||||
|  | ||||
|       setOutputs(symbolToString(n->kind()), n, outputs); | ||||
|     } | ||||
|     processSymbolicOutput(symbolToString(n->kind()), n, raw_output); | ||||
|   }; | ||||
|  | ||||
|   auto callPySymbolicMethod = [&](PythonOp* op) { | ||||
| @ -179,25 +197,14 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) { | ||||
|       py_symbolic_args[input_nr++] = obj; | ||||
|     } | ||||
|  | ||||
|     auto scope_guard = ctx.graph->set_current_scope_temporary(op->scope()); | ||||
|  | ||||
|     // Call the symbolic function | ||||
|     // Use a little trampoline function so we can give good error messages | ||||
|     // upon argument mismatch | ||||
|     py::object raw_output = onnx.attr("_run_symbolic_method")(op->name(), pyobj.attr("symbolic"), py_symbolic_args); | ||||
|  | ||||
|     if (raw_output.ptr() == Py_None) { | ||||
|       cloneNode(op); | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     // Cast the outputs back to C++ and put them in the new graph | ||||
|     std::vector<Node*> outputs; | ||||
|     if (py::isinstance<Node>(raw_output)) { | ||||
|       outputs = node_list{py::cast<Node*>(raw_output)}; | ||||
|     } else { | ||||
|       outputs = py::cast<std::vector<Node*>>(raw_output); | ||||
|     } | ||||
|  | ||||
|     setOutputs(op->name(), op, outputs); | ||||
|     processSymbolicOutput(op->name(), op, raw_output); | ||||
|   }; | ||||
|  | ||||
|   // Finally, visit all nodes in the graph | ||||
| @ -215,6 +222,7 @@ void ToONNX(std::shared_ptr<tracer::TracingState>& state) { | ||||
|       // Selects are translated by multi-return nodes. | ||||
|       JIT_ASSERT(env.count(value) > 0); | ||||
|     IR_ELSEIFM(CppOp) | ||||
|       auto scope_guard = new_graph->set_current_scope_temporary(node->scope()); | ||||
|       if (auto fn = std::dynamic_pointer_cast<autograd::HasSymbolic>(value->fn)) { | ||||
|         auto outputs = fn->symbolic(&ctx, fmap(node->inputs(), envFn)); | ||||
|         setOutputs(value->name(), node, outputs); | ||||
|  | ||||
| @ -15,24 +15,62 @@ std::unordered_set<NodeKind> broadcasting = { | ||||
|   kGemm, | ||||
| }; | ||||
|  | ||||
| bool isNopTranspose(const std::vector<int64_t> & perm) { | ||||
|   for (size_t i = 0; i < perm.size(); i++) | ||||
|     if (perm[i] != i) | ||||
|       return false; | ||||
|   return true; | ||||
| } | ||||
|  | ||||
| // returns a vector `ret` such that transposing by `ret` is equivalent | ||||
| // to transposing by `t1` and then by `t2` | ||||
| std::vector<int64_t> composeTransposes(const std::vector<int64_t> & t1, | ||||
|                                        const std::vector<int64_t> & t2) { | ||||
|   JIT_ASSERT(t1.size() == t2.size()); | ||||
|   std::vector<int64_t> ret; | ||||
|   for (size_t i = 0; i < t1.size(); i++) { | ||||
|     JIT_ASSERT(   t1[i]  < t2.size()); | ||||
|     JIT_ASSERT(t2[t1[i]] < t2.size()); | ||||
|     ret.push_back(t2[t1[i]]); | ||||
|   } | ||||
|   return ret; | ||||
| } | ||||
|  | ||||
| bool isBroadcasting(Node *node) { | ||||
|   return broadcasting.count(node->kind()); | ||||
| } | ||||
|  | ||||
| // When iterating over the dimension sizes, starting at the trailing dimension, | ||||
| // the dimension sizes must either be equal, or one of them does not exist. | ||||
| // First iterate over the 'from' tensor sizes. Ignore all leading and trailing | ||||
| // dimensions that are simply one, since they can be trivially broadcasted. | ||||
| // When iterating over the dimension sizes (with reduced 'from' tensor), | ||||
| // starting at the trailing dimension, the dimension sizes must either be equal, | ||||
| // or one of them does not exist. | ||||
| // | ||||
| //  equivalently: | ||||
| // | ||||
| // Test that 'from' is a suffix of 'to'. | ||||
| // Note that this is NOT equivalent to numpy broadcasting semantics, and do | ||||
| // not represent that generalized broadcasting that Pytorch implements in | ||||
| // general. Rather, this is Caffe2-style broadcasting. | ||||
| bool fusibleExpandTo(at::IntList from, at::IntList to) { | ||||
|   auto f = from.rbegin(); | ||||
|   auto t = to.rbegin(); | ||||
|   for (; f != from.rend() && t != to.rend(); f++, t++) { | ||||
|     // TODO: if 1->n expansion is supported, adjust this conditional. | ||||
|     if (*f != *t) return false; | ||||
|   if (from.size() > to.size()) { | ||||
|     return false; | ||||
|   } | ||||
|   return f == from.rend(); | ||||
|   ssize_t from_dim_start = 0, from_dim_end = from.size() - 1; | ||||
|   while (from_dim_start < from.size() && from[from_dim_start] == 1) { | ||||
|     from_dim_start++; | ||||
|   } | ||||
|   while (from_dim_end > from_dim_start && from[from_dim_end] == 1) { | ||||
|     from_dim_end--; | ||||
|   } | ||||
|  | ||||
|   ssize_t f = from_dim_end; | ||||
|   ssize_t t = to.size() - 1; | ||||
|   for (; f >= from_dim_start && t >= 0; --f, --t) { | ||||
|     if (from[f] != to[t]) return false; | ||||
|   } | ||||
|  | ||||
|   // In the case that the 'to' tensor has leading ones in the same place that | ||||
|   // the 'from' tensor does, f will be less than from_dim_start rather than | ||||
|   // strictly equal. E.x.: to := [5, 1, 768] and from := [1, 1, 768] | ||||
|   return f <= from_dim_start; | ||||
| } | ||||
|  | ||||
| void fuseBroadcast(std::shared_ptr<Graph>& graph) { | ||||
| @ -76,6 +114,58 @@ void fuseBroadcast(std::shared_ptr<Graph>& graph) { | ||||
|   } | ||||
| } | ||||
|  | ||||
| void fuseConsecutiveTransposes(std::shared_ptr<Graph>& graph) { | ||||
|   for (auto it = graph->begin(); it != graph->end(); ++it) { | ||||
|     auto* n = *it; | ||||
|  | ||||
|     if (n->kind() == kTranspose && n->input()->kind() == kTranspose) { | ||||
|       auto origInput = n->input(); | ||||
|       n->is_(kperm, composeTransposes(origInput->is(kperm), n->is(kperm))); | ||||
|       n->replaceInput(0, origInput->input()); | ||||
|       if (origInput->uses().size() == 0) { | ||||
|         origInput->destroy(); | ||||
|       } | ||||
|       continue; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| void eliminateNopTranspose(std::shared_ptr<Graph>& graph) { | ||||
|   for (auto it = graph->begin(); it != graph->end(); ++it) { | ||||
|     auto* n = *it; | ||||
|  | ||||
|     if (n->kind() == kTranspose) { | ||||
|       if (isNopTranspose(n->is(kperm))) { | ||||
|         n->replaceAllUsesWith(n->input()); | ||||
|         it.destroyCurrent(); | ||||
|         continue; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| void fuseTransposeIntoGemm(std::shared_ptr<Graph>& graph) { | ||||
|   static const std::vector<int64_t> simpleTransPerm({1,0}); | ||||
|  | ||||
|   for (auto it = graph->begin(); it != graph->end(); ++it) { | ||||
|     auto* n = *it; | ||||
|  | ||||
|     if (n->kind() == kGemm) { | ||||
|       for (size_t i : {0,1}) { | ||||
|         auto inp = n->inputs()[i]; | ||||
|         auto trans = i == 0 ? ktransA : ktransB; | ||||
|         if (inp->kind() == kTranspose && inp->is(kperm) == simpleTransPerm) { | ||||
|           n->replaceInput(i, inp->input()); | ||||
|           n->i_(trans, n->hasAttribute(trans) ? !n->i(trans) : 1); | ||||
|           if (inp->uses().size() == 0) { | ||||
|             inp->destroy(); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| // This optimization does ONNX-specific peephole optimizations. | ||||
| // | ||||
| // At the moment, here are the optimizations it does: | ||||
| @ -83,6 +173,9 @@ void fuseBroadcast(std::shared_ptr<Graph>& graph) { | ||||
| //    easier for non-strided backends to more efficiently do broadcasts if this is | ||||
| //    local information.  This optimization is not useful for PyTorch as 'expand' | ||||
| //    is free. | ||||
| //  - Fusing of consecutive transposes | ||||
| //  - Elimiation of NOP transposes | ||||
| //  - Fusing of transposes into Gemm | ||||
| // | ||||
| // Before you write an optimization here, ask yourself, "Could I do this | ||||
| // optimization on ATen operators"?  If so, you should seriously consider | ||||
| @ -94,6 +187,9 @@ void PeepholeOptimizeONNX(std::shared_ptr<Graph>& graph) { | ||||
|   // TODO: make it easier not to do O(k) iterations over the graph, where | ||||
|   // k is the number of distinct peephole optimizations | ||||
|   fuseBroadcast(graph); | ||||
|   fuseConsecutiveTransposes(graph); | ||||
|   eliminateNopTranspose(graph); | ||||
|   fuseTransposeIntoGemm(graph); | ||||
| } | ||||
|  | ||||
| }} | ||||
|  | ||||
| @ -13,6 +13,7 @@ void PeepholeOptimize(std::shared_ptr<Graph>& graph) { | ||||
|   for (auto it = graph->begin(); it != graph->end(); ++it) { | ||||
|     auto* n = *it; | ||||
|  | ||||
|     // eliminate redundant expand | ||||
|     if (n->kind() == kexpand) { | ||||
|       if (n->is(ksize) == n->input()->type()->expect<TensorType>()->sizes()) { | ||||
|         n->replaceAllUsesWith(n->input()); | ||||
|  | ||||
| @ -105,6 +105,7 @@ void initPythonIRBindings(PyObject * module_) { | ||||
|       node->setType(other->typeOption()); | ||||
|       return node; | ||||
|     }) | ||||
|     .NS(scopeName) | ||||
| #define AS(name) def(#name,&Attributes<Node> :: name) | ||||
|     // methods from Attributes | ||||
|     .AS(copyAttributes) | ||||
|  | ||||
| @ -19,7 +19,7 @@ namespace torch { namespace jit { | ||||
|  | ||||
| void initPythonTracerBindings(PyObject* module_) { | ||||
|   auto m = py::handle(module_).cast<py::module>(); | ||||
|   py::class_<TracingState,std::shared_ptr<TracingState>>(m, "TracingState") | ||||
|   py::class_<TracingState,std::shared_ptr<TracingState>>(m, "TracingState", py::dynamic_attr()) | ||||
|     // NB: no constructor; you have to get it from C++ code | ||||
|     .def("__repr__", [](const TracingState& s) { | ||||
|       std::ostringstream ss; | ||||
| @ -32,13 +32,17 @@ void initPythonTracerBindings(PyObject* module_) { | ||||
|       ss << *s.graph; | ||||
|       return ss.str(); | ||||
|     }) | ||||
|     .def("export", [](TracingState& s) { | ||||
|       ASSERT_UNEXPIRED("export"); | ||||
|       return py::bytes(ExportGraph(s.graph, {})); | ||||
|     .def("push_scope", [](TracingState& s, const std::string& scope_name) { | ||||
|       ASSERT_UNEXPIRED("push_scope"); | ||||
|       s.push_scope(scope_name); | ||||
|     }) | ||||
|     .def("export", [](TracingState& s, const std::vector<at::Tensor>& initializers) { | ||||
|     .def("pop_scope", [](TracingState& s) { | ||||
|       ASSERT_UNEXPIRED("pop_scope"); | ||||
|       s.pop_scope(); | ||||
|     }) | ||||
|     .def("export", [](TracingState& s, const std::vector<at::Tensor>& initializers, int64_t onnx_opset_version) { | ||||
|       ASSERT_UNEXPIRED("export"); | ||||
|       return py::bytes(ExportGraph(s.graph, initializers)); | ||||
|       return py::bytes(ExportGraph(s.graph, initializers, onnx_opset_version)); | ||||
|     }) | ||||
|     .def("graph", [](TracingState& s) { | ||||
|       return s.graph; | ||||
| @ -56,6 +60,12 @@ void initPythonTracerBindings(PyObject* module_) { | ||||
|   m.def("_tracer_exit", [](variable_list var_outputs) { | ||||
|     tracer::exit(var_outputs); | ||||
|   }); | ||||
|   m.def("_get_tracing_state", [](const variable_list& vars) { | ||||
|     return getTracingState(vars); | ||||
|   }); | ||||
|   m.def("_is_tracing", [](const variable_list& vars) { | ||||
|     return isTracing(vars); | ||||
|   }); | ||||
| } | ||||
|  | ||||
| }} // namespace torch::jit | ||||
|  | ||||
| @ -4,6 +4,11 @@ | ||||
| #include "torch/csrc/autograd/function.h" | ||||
| #include "torch/csrc/autograd/python_engine.h" | ||||
| #include "torch/csrc/autograd/functions/special.h" | ||||
| #include "torch/csrc/utils/auto_gil.h" | ||||
| #include "torch/csrc/utils/python_strings.h" | ||||
|  | ||||
| #include <frameobject.h> | ||||
| #include <patchlevel.h> | ||||
|  | ||||
| namespace torch { namespace jit { namespace tracer { | ||||
|  | ||||
| @ -89,6 +94,28 @@ void nontraceableBackwardSubgraph(const variable_list& inputs, const variable_li | ||||
|   std::make_shared<autograd::Eval>()->replaceSubgraph(inputs, outputs); | ||||
| } | ||||
|  | ||||
| namespace { | ||||
| // Python interpreter retrieval routine adapted from | ||||
| // https://stackoverflow.com/a/8706144 | ||||
| std::string getPythonInterpreterStackTrace() { | ||||
|   std::stringstream stack_trace; | ||||
|   AutoGIL gil; | ||||
|   PyThreadState *tstate = PyThreadState_GET(); | ||||
|   if (NULL != tstate && NULL != tstate->frame) { | ||||
|     PyFrameObject *frame = tstate->frame; | ||||
|  | ||||
|     while (NULL != frame) { | ||||
|       int line = PyCode_Addr2Line(frame->f_code, frame->f_lasti); | ||||
|       std::string filename = THPUtils_unpackString(frame->f_code->co_filename); | ||||
|       std::string funcname = THPUtils_unpackString(frame->f_code->co_name); | ||||
|       stack_trace << filename << "(" << line << "): " << funcname << "\n"; | ||||
|       frame = frame->f_back; | ||||
|     } | ||||
|   } | ||||
|   return stack_trace.str(); | ||||
| } | ||||
| }  // namespace | ||||
|  | ||||
| Node* recordTrace(std::string op, // TODO: make this a Symbol | ||||
|                   at::ArrayRef<Variable> inputs, | ||||
|                   at::ArrayRef<Variable> outputs) { | ||||
| @ -99,6 +126,9 @@ Node* recordTrace(std::string op, // TODO: make this a Symbol | ||||
|   auto state_lock = state->lock(); | ||||
|  | ||||
|   Node *n = graph->create(stringToSymbol(op)); | ||||
|   auto sl = std::make_shared<SourceLocation>(getPythonInterpreterStackTrace()); | ||||
|   n->setSourceLocation(sl); | ||||
|  | ||||
|   for (Variable input : inputs) { | ||||
|     n->addInput(getValueTrace(state, input)); | ||||
|   } | ||||
|  | ||||
| @ -80,6 +80,14 @@ struct TracingState : public std::enable_shared_from_this<TracingState> { | ||||
|   bool is_complete() const { | ||||
|     return !is_expired() && graph->stage() == num_stages - 1; | ||||
|   } | ||||
|  | ||||
|   void push_scope(const std::string& scope_name) { | ||||
|     graph->push_scope(scope_name); | ||||
|   } | ||||
|  | ||||
|   void pop_scope() { | ||||
|     graph->pop_scope(); | ||||
|   } | ||||
| }; | ||||
|  | ||||
| struct ValueTracingStateElem { | ||||
|  | ||||
| @ -168,6 +168,21 @@ DEFINE_CONST(UINT64) | ||||
| DEFINE_CONST(COMPLEX64) | ||||
| DEFINE_CONST(COMPLEX128) | ||||
| #undef DEFINE_CONST | ||||
|  | ||||
| #define DEFINE_CONST(C) \ | ||||
| const auto a##C = onnx_AttributeProto_AttributeType_##C; | ||||
| DEFINE_CONST(FLOAT) | ||||
| DEFINE_CONST(INT) | ||||
| DEFINE_CONST(STRING) | ||||
| DEFINE_CONST(TENSOR) | ||||
| DEFINE_CONST(GRAPH) | ||||
| DEFINE_CONST(FLOATS) | ||||
| DEFINE_CONST(INTS) | ||||
| DEFINE_CONST(STRINGS) | ||||
| DEFINE_CONST(TENSORS) | ||||
| DEFINE_CONST(GRAPHS) | ||||
| #undef DEFINE_CONST | ||||
|  | ||||
| // C++ wrappers which simulate the Google C++ Protobuf API | ||||
| // | ||||
| // These are NOT COMPLETE wrappers. If you find something is missing, add it! | ||||
| @ -270,6 +285,7 @@ public: | ||||
|     proto.graphs  = list<GraphProto, onnx_GraphProto_fields>(&graphs); | ||||
|   } | ||||
|   void set_name(const std::string& s) { proto.name = string(&name, s); } | ||||
|   void set_type(onnx_AttributeProto_AttributeType t) { proto.has_type = true; proto.type = t; } | ||||
|   void set_f(float f) { proto.has_f = true; proto.f = f; } | ||||
|   void set_i(int64_t i) { proto.has_i = true; proto.i = i; } | ||||
|   void set_s(std::string s_) { proto.s = string(&s, s_); } | ||||
| @ -290,6 +306,7 @@ public: | ||||
| class NodeProto : public MicroProto<onnx_NodeProto> { | ||||
| private: | ||||
|   std::string op_type; | ||||
|   std::string doc_string; | ||||
|   unique_vector<std::string> inputs; | ||||
|   unique_vector<std::string> outputs; | ||||
|   unique_vector<AttributeProto> attributes; | ||||
| @ -309,6 +326,7 @@ public: | ||||
|     return ptr; | ||||
|   } | ||||
|   void set_op_type(const std::string& s) { proto.op_type= string(&op_type, s); } | ||||
|   void set_doc_string(const std::string& s) { proto.doc_string = string(&doc_string, s); } | ||||
| }; | ||||
|  | ||||
| class GraphProto : public MicroProto<onnx_GraphProto> { | ||||
| @ -349,6 +367,15 @@ public: | ||||
|   } | ||||
| }; | ||||
|  | ||||
| class OperatorSetIdProto : public MicroProto<onnx_OperatorSetIdProto> { | ||||
| private: | ||||
|   std::string domain; | ||||
| public: | ||||
|   OperatorSetIdProto() : MicroProto(onnx_OperatorSetIdProto_init_default) {} | ||||
|   void set_domain(const std::string& s) { proto.domain = string(&domain, s); } | ||||
|   void set_version(int64_t v) { proto.has_version = true; proto.version = v; } | ||||
| }; | ||||
|  | ||||
| class ModelProto : public MicroProto<onnx_ModelProto> { | ||||
| private: | ||||
|   std::string producer_name; | ||||
| @ -356,21 +383,26 @@ private: | ||||
|   std::string domain; | ||||
|   std::string doc_string; | ||||
|   std::unique_ptr<GraphProto> graph; | ||||
|   unique_vector<OperatorSetIdProto> opset_import; | ||||
| public: | ||||
|   ModelProto() : MicroProto(onnx_ModelProto_init_default) { | ||||
|     proto.has_ir_version = true; | ||||
|     proto.ir_version = onnx_Version_IR_VERSION; | ||||
|     proto.producer_name = string(&producer_name, "pytorch"); | ||||
|     // TODO: stop hard-coding this | ||||
|     proto.producer_version = string(&producer_version, "0.2"); | ||||
|     proto.domain = string(&domain, "com.facebook"); | ||||
|     proto.opset_import = list<OperatorSetIdProto, onnx_OperatorSetIdProto_fields>(&opset_import); | ||||
|   } | ||||
|   void set_model_version(int64_t i) { proto.has_model_version = true; proto.model_version = i; } | ||||
|   void set_doc_string(const std::string& s) { proto.doc_string = string(&doc_string, s); } | ||||
|   void set_producer_name(const std::string& s) { proto.producer_name = string(&producer_name, s); } | ||||
|   void set_producer_version(const std::string& s) { proto.producer_version = string(&producer_version, s); } | ||||
|   GraphProto* mutable_graph() { | ||||
|     proto.graph = msg<GraphProto, onnx_GraphProto_fields>(&graph); | ||||
|     return graph.get(); | ||||
|   } | ||||
|   OperatorSetIdProto* add_opset_import() { | ||||
|     auto ptr = new OperatorSetIdProto(); | ||||
|     opset_import.emplace_back(ptr); | ||||
|     return ptr; | ||||
|   } | ||||
| }; | ||||
|  | ||||
| }} // namespace torch::onnx | ||||
|  | ||||
| @ -10,7 +10,7 @@ | ||||
|  | ||||
|  | ||||
|  | ||||
| const pb_field_t onnx_AttributeProto_fields[12] = { | ||||
| const pb_field_t onnx_AttributeProto_fields[13] = { | ||||
|     PB_FIELD(  1, STRING  , OPTIONAL, CALLBACK, FIRST, onnx_AttributeProto, name, name, 0), | ||||
|     PB_FIELD(  2, FLOAT   , OPTIONAL, STATIC  , OTHER, onnx_AttributeProto, f, name, 0), | ||||
|     PB_FIELD(  3, INT64   , OPTIONAL, STATIC  , OTHER, onnx_AttributeProto, i, f, 0), | ||||
| @ -22,6 +22,7 @@ const pb_field_t onnx_AttributeProto_fields[12] = { | ||||
|     PB_FIELD(  9, BYTES   , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, strings, ints, 0), | ||||
|     PB_FIELD( 10, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, tensors, strings, &onnx_TensorProto_fields), | ||||
|     PB_FIELD( 11, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_AttributeProto, graphs, tensors, &onnx_GraphProto_fields), | ||||
|     PB_FIELD( 20, UENUM   , OPTIONAL, STATIC  , OTHER, onnx_AttributeProto, type, graphs, 0), | ||||
|     PB_LAST_FIELD | ||||
| }; | ||||
|  | ||||
| @ -31,17 +32,18 @@ const pb_field_t onnx_ValueInfoProto_fields[3] = { | ||||
|     PB_LAST_FIELD | ||||
| }; | ||||
|  | ||||
| const pb_field_t onnx_NodeProto_fields[7] = { | ||||
| const pb_field_t onnx_NodeProto_fields[8] = { | ||||
|     PB_FIELD(  1, STRING  , REPEATED, CALLBACK, FIRST, onnx_NodeProto, input, input, 0), | ||||
|     PB_FIELD(  2, STRING  , REPEATED, CALLBACK, OTHER, onnx_NodeProto, output, input, 0), | ||||
|     PB_FIELD(  3, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, name, output, 0), | ||||
|     PB_FIELD(  4, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, op_type, name, 0), | ||||
|     PB_FIELD(  5, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_NodeProto, attribute, op_type, &onnx_AttributeProto_fields), | ||||
|     PB_FIELD(  6, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, doc_string, attribute, 0), | ||||
|     PB_FIELD(  7, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_NodeProto, domain, doc_string, 0), | ||||
|     PB_LAST_FIELD | ||||
| }; | ||||
|  | ||||
| const pb_field_t onnx_ModelProto_fields[8] = { | ||||
| const pb_field_t onnx_ModelProto_fields[9] = { | ||||
|     PB_FIELD(  1, INT64   , OPTIONAL, STATIC  , FIRST, onnx_ModelProto, ir_version, ir_version, 0), | ||||
|     PB_FIELD(  2, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, producer_name, ir_version, 0), | ||||
|     PB_FIELD(  3, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, producer_version, producer_name, 0), | ||||
| @ -49,6 +51,7 @@ const pb_field_t onnx_ModelProto_fields[8] = { | ||||
|     PB_FIELD(  5, INT64   , OPTIONAL, STATIC  , OTHER, onnx_ModelProto, model_version, domain, 0), | ||||
|     PB_FIELD(  6, STRING  , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, doc_string, model_version, 0), | ||||
|     PB_FIELD(  7, MESSAGE , OPTIONAL, CALLBACK, OTHER, onnx_ModelProto, graph, doc_string, &onnx_GraphProto_fields), | ||||
|     PB_FIELD(  8, MESSAGE , REPEATED, CALLBACK, OTHER, onnx_ModelProto, opset_import, graph, &onnx_OperatorSetIdProto_fields), | ||||
|     PB_LAST_FIELD | ||||
| }; | ||||
|  | ||||
| @ -120,6 +123,13 @@ const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3] = { | ||||
|     PB_LAST_FIELD | ||||
| }; | ||||
|  | ||||
| const pb_field_t onnx_OperatorSetIdProto_fields[3] = { | ||||
|     PB_FIELD(  1, STRING  , OPTIONAL, CALLBACK, FIRST, onnx_OperatorSetIdProto, domain, domain, 0), | ||||
|     PB_FIELD(  2, INT64   , OPTIONAL, STATIC  , OTHER, onnx_OperatorSetIdProto, version, domain, 0), | ||||
|     PB_LAST_FIELD | ||||
| }; | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -132,7 +142,7 @@ const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3] = { | ||||
|  * numbers or field sizes that are larger than what can fit in 8 or 16 bit | ||||
|  * field descriptors. | ||||
|  */ | ||||
| PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_membersize(onnx_SparseTensorProto, indices) < 65536 && pb_membersize(onnx_SparseTensorProto, values) < 65536 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 65536 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto) | ||||
| PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_membersize(onnx_SparseTensorProto, indices) < 65536 && pb_membersize(onnx_SparseTensorProto, values) < 65536 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 65536 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 65536), YOU_MUST_DEFINE_PB_FIELD_32BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto_onnx_OperatorSetIdProto) | ||||
| #endif | ||||
|  | ||||
| #if !defined(PB_FIELD_16BIT) && !defined(PB_FIELD_32BIT) | ||||
| @ -143,7 +153,7 @@ PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 65536 && pb_members | ||||
|  * numbers or field sizes that are larger than what can fit in the default | ||||
|  * 8 bit descriptors. | ||||
|  */ | ||||
| PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 256 && pb_membersize(onnx_SparseTensorProto, indices) < 256 && pb_membersize(onnx_SparseTensorProto, values) < 256 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 256 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto) | ||||
| PB_STATIC_ASSERT((pb_membersize(onnx_TensorProto, segment) < 256 && pb_membersize(onnx_SparseTensorProto, indices) < 256 && pb_membersize(onnx_SparseTensorProto, values) < 256 && pb_membersize(onnx_TypeProto, sparse_tensor_type) < 256 && pb_membersize(onnx_TypeProto_SparseTensorTypeProto, shape) < 256), YOU_MUST_DEFINE_PB_FIELD_16BIT_FOR_MESSAGES_onnx_AttributeProto_onnx_ValueInfoProto_onnx_NodeProto_onnx_ModelProto_onnx_GraphProto_onnx_TensorProto_onnx_TensorProto_Segment_onnx_SparseTensorProto_onnx_TypeProto_onnx_TypeProto_TensorShapeProto_onnx_TypeProto_TensorShapeProto_Dimension_onnx_TypeProto_TensorTypeProto_onnx_TypeProto_SparseTensorTypeProto_onnx_OperatorSetIdProto) | ||||
| #endif | ||||
|  | ||||
|  | ||||
|  | ||||
| @ -16,12 +16,31 @@ extern "C" { | ||||
|  | ||||
| /* Enum definitions */ | ||||
| typedef enum _onnx_Version { | ||||
|     onnx_Version_IR_VERSION = 1 | ||||
|     onnx_Version__START_VERSION = 0, | ||||
|     onnx_Version_IR_VERSION_2017_10_10 = 1, | ||||
|     onnx_Version_IR_VERSION = 2 | ||||
| } onnx_Version; | ||||
| #define _onnx_Version_MIN onnx_Version_IR_VERSION | ||||
| #define _onnx_Version_MIN onnx_Version__START_VERSION | ||||
| #define _onnx_Version_MAX onnx_Version_IR_VERSION | ||||
| #define _onnx_Version_ARRAYSIZE ((onnx_Version)(onnx_Version_IR_VERSION+1)) | ||||
|  | ||||
| typedef enum _onnx_AttributeProto_AttributeType { | ||||
|     onnx_AttributeProto_AttributeType_UNDEFINED = 0, | ||||
|     onnx_AttributeProto_AttributeType_FLOAT = 1, | ||||
|     onnx_AttributeProto_AttributeType_INT = 2, | ||||
|     onnx_AttributeProto_AttributeType_STRING = 3, | ||||
|     onnx_AttributeProto_AttributeType_TENSOR = 4, | ||||
|     onnx_AttributeProto_AttributeType_GRAPH = 5, | ||||
|     onnx_AttributeProto_AttributeType_FLOATS = 6, | ||||
|     onnx_AttributeProto_AttributeType_INTS = 7, | ||||
|     onnx_AttributeProto_AttributeType_STRINGS = 8, | ||||
|     onnx_AttributeProto_AttributeType_TENSORS = 9, | ||||
|     onnx_AttributeProto_AttributeType_GRAPHS = 10 | ||||
| } onnx_AttributeProto_AttributeType; | ||||
| #define _onnx_AttributeProto_AttributeType_MIN onnx_AttributeProto_AttributeType_UNDEFINED | ||||
| #define _onnx_AttributeProto_AttributeType_MAX onnx_AttributeProto_AttributeType_GRAPHS | ||||
| #define _onnx_AttributeProto_AttributeType_ARRAYSIZE ((onnx_AttributeProto_AttributeType)(onnx_AttributeProto_AttributeType_GRAPHS+1)) | ||||
|  | ||||
| typedef enum _onnx_TensorProto_DataType { | ||||
|     onnx_TensorProto_DataType_UNDEFINED = 0, | ||||
|     onnx_TensorProto_DataType_FLOAT = 1, | ||||
| @ -63,6 +82,7 @@ typedef struct _onnx_NodeProto { | ||||
|     pb_callback_t op_type; | ||||
|     pb_callback_t attribute; | ||||
|     pb_callback_t doc_string; | ||||
|     pb_callback_t domain; | ||||
| /* @@protoc_insertion_point(struct:onnx_NodeProto) */ | ||||
| } onnx_NodeProto; | ||||
|  | ||||
| @ -91,6 +111,8 @@ typedef struct _onnx_AttributeProto { | ||||
|     pb_callback_t strings; | ||||
|     pb_callback_t tensors; | ||||
|     pb_callback_t graphs; | ||||
|     bool has_type; | ||||
|     onnx_AttributeProto_AttributeType type; | ||||
| /* @@protoc_insertion_point(struct:onnx_AttributeProto) */ | ||||
| } onnx_AttributeProto; | ||||
|  | ||||
| @ -104,9 +126,17 @@ typedef struct _onnx_ModelProto { | ||||
|     int64_t model_version; | ||||
|     pb_callback_t doc_string; | ||||
|     pb_callback_t graph; | ||||
|     pb_callback_t opset_import; | ||||
| /* @@protoc_insertion_point(struct:onnx_ModelProto) */ | ||||
| } onnx_ModelProto; | ||||
|  | ||||
| typedef struct _onnx_OperatorSetIdProto { | ||||
|     pb_callback_t domain; | ||||
|     bool has_version; | ||||
|     int64_t version; | ||||
| /* @@protoc_insertion_point(struct:onnx_OperatorSetIdProto) */ | ||||
| } onnx_OperatorSetIdProto; | ||||
|  | ||||
| typedef struct _onnx_TensorProto_Segment { | ||||
|     bool has_begin; | ||||
|     int64_t begin; | ||||
| @ -173,10 +203,10 @@ typedef struct _onnx_SparseTensorProto { | ||||
| /* Default values for struct fields */ | ||||
|  | ||||
| /* Initializer values for message structs */ | ||||
| #define onnx_AttributeProto_init_default         {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_AttributeProto_init_default         {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, (onnx_AttributeProto_AttributeType)0} | ||||
| #define onnx_ValueInfoProto_init_default         {{{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_NodeProto_init_default              {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_ModelProto_init_default             {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_NodeProto_init_default              {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_ModelProto_init_default             {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_GraphProto_init_default             {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_TensorProto_init_default            {{{NULL}, NULL}, false, (onnx_TensorProto_DataType)0, false, onnx_TensorProto_Segment_init_default, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_TensorProto_Segment_init_default    {false, 0, false, 0} | ||||
| @ -186,10 +216,11 @@ typedef struct _onnx_SparseTensorProto { | ||||
| #define onnx_TypeProto_TensorShapeProto_Dimension_init_default {false, 0, {{NULL}, NULL}} | ||||
| #define onnx_TypeProto_TensorTypeProto_init_default {false, (onnx_TensorProto_DataType)0, {{NULL}, NULL}} | ||||
| #define onnx_TypeProto_SparseTensorTypeProto_init_default {false, (onnx_TensorProto_DataType)0, false, onnx_TypeProto_TensorShapeProto_init_default} | ||||
| #define onnx_AttributeProto_init_zero            {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_OperatorSetIdProto_init_default     {{{NULL}, NULL}, false, 0} | ||||
| #define onnx_AttributeProto_init_zero            {{{NULL}, NULL}, false, 0, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, (onnx_AttributeProto_AttributeType)0} | ||||
| #define onnx_ValueInfoProto_init_zero            {{{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_NodeProto_init_zero                 {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_ModelProto_init_zero                {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_NodeProto_init_zero                 {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_ModelProto_init_zero                {false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, false, 0, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_GraphProto_init_zero                {{{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_TensorProto_init_zero               {{{NULL}, NULL}, false, (onnx_TensorProto_DataType)0, false, onnx_TensorProto_Segment_init_zero, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}, {{NULL}, NULL}} | ||||
| #define onnx_TensorProto_Segment_init_zero       {false, 0, false, 0} | ||||
| @ -199,6 +230,7 @@ typedef struct _onnx_SparseTensorProto { | ||||
| #define onnx_TypeProto_TensorShapeProto_Dimension_init_zero {false, 0, {{NULL}, NULL}} | ||||
| #define onnx_TypeProto_TensorTypeProto_init_zero {false, (onnx_TensorProto_DataType)0, {{NULL}, NULL}} | ||||
| #define onnx_TypeProto_SparseTensorTypeProto_init_zero {false, (onnx_TensorProto_DataType)0, false, onnx_TypeProto_TensorShapeProto_init_zero} | ||||
| #define onnx_OperatorSetIdProto_init_zero        {{{NULL}, NULL}, false, 0} | ||||
|  | ||||
| /* Field tags (for use in manual encoding/decoding) */ | ||||
| #define onnx_GraphProto_node_tag                 1 | ||||
| @ -212,12 +244,14 @@ typedef struct _onnx_SparseTensorProto { | ||||
| #define onnx_NodeProto_output_tag                2 | ||||
| #define onnx_NodeProto_name_tag                  3 | ||||
| #define onnx_NodeProto_op_type_tag               4 | ||||
| #define onnx_NodeProto_domain_tag                7 | ||||
| #define onnx_NodeProto_attribute_tag             5 | ||||
| #define onnx_NodeProto_doc_string_tag            6 | ||||
| #define onnx_TypeProto_TensorShapeProto_dim_tag  1 | ||||
| #define onnx_ValueInfoProto_name_tag             1 | ||||
| #define onnx_ValueInfoProto_type_tag             2 | ||||
| #define onnx_AttributeProto_name_tag             1 | ||||
| #define onnx_AttributeProto_type_tag             20 | ||||
| #define onnx_AttributeProto_f_tag                2 | ||||
| #define onnx_AttributeProto_i_tag                3 | ||||
| #define onnx_AttributeProto_s_tag                4 | ||||
| @ -229,12 +263,15 @@ typedef struct _onnx_SparseTensorProto { | ||||
| #define onnx_AttributeProto_tensors_tag          10 | ||||
| #define onnx_AttributeProto_graphs_tag           11 | ||||
| #define onnx_ModelProto_ir_version_tag           1 | ||||
| #define onnx_ModelProto_opset_import_tag         8 | ||||
| #define onnx_ModelProto_producer_name_tag        2 | ||||
| #define onnx_ModelProto_producer_version_tag     3 | ||||
| #define onnx_ModelProto_domain_tag               4 | ||||
| #define onnx_ModelProto_model_version_tag        5 | ||||
| #define onnx_ModelProto_doc_string_tag           6 | ||||
| #define onnx_ModelProto_graph_tag                7 | ||||
| #define onnx_OperatorSetIdProto_domain_tag       1 | ||||
| #define onnx_OperatorSetIdProto_version_tag      2 | ||||
| #define onnx_TensorProto_Segment_begin_tag       1 | ||||
| #define onnx_TensorProto_Segment_end_tag         2 | ||||
| #define onnx_TypeProto_SparseTensorTypeProto_elem_type_tag 1 | ||||
| @ -261,10 +298,10 @@ typedef struct _onnx_SparseTensorProto { | ||||
| #define onnx_SparseTensorProto_values_tag        3 | ||||
|  | ||||
| /* Struct field encoding specification for nanopb */ | ||||
| extern const pb_field_t onnx_AttributeProto_fields[12]; | ||||
| extern const pb_field_t onnx_AttributeProto_fields[13]; | ||||
| extern const pb_field_t onnx_ValueInfoProto_fields[3]; | ||||
| extern const pb_field_t onnx_NodeProto_fields[7]; | ||||
| extern const pb_field_t onnx_ModelProto_fields[8]; | ||||
| extern const pb_field_t onnx_NodeProto_fields[8]; | ||||
| extern const pb_field_t onnx_ModelProto_fields[9]; | ||||
| extern const pb_field_t onnx_GraphProto_fields[8]; | ||||
| extern const pb_field_t onnx_TensorProto_fields[12]; | ||||
| extern const pb_field_t onnx_TensorProto_Segment_fields[3]; | ||||
| @ -274,6 +311,7 @@ extern const pb_field_t onnx_TypeProto_TensorShapeProto_fields[2]; | ||||
| extern const pb_field_t onnx_TypeProto_TensorShapeProto_Dimension_fields[3]; | ||||
| extern const pb_field_t onnx_TypeProto_TensorTypeProto_fields[3]; | ||||
| extern const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3]; | ||||
| extern const pb_field_t onnx_OperatorSetIdProto_fields[3]; | ||||
|  | ||||
| /* Maximum encoded size of messages (where known) */ | ||||
| /* onnx_AttributeProto_size depends on runtime parameters */ | ||||
| @ -289,6 +327,7 @@ extern const pb_field_t onnx_TypeProto_SparseTensorTypeProto_fields[3]; | ||||
| /* onnx_TypeProto_TensorShapeProto_Dimension_size depends on runtime parameters */ | ||||
| /* onnx_TypeProto_TensorTypeProto_size depends on runtime parameters */ | ||||
| #define onnx_TypeProto_SparseTensorTypeProto_size (8 + onnx_TypeProto_TensorShapeProto_size) | ||||
| /* onnx_OperatorSetIdProto_size depends on runtime parameters */ | ||||
|  | ||||
| /* Message IDs (where set with "msgid" option) */ | ||||
| #ifdef PB_MSGID | ||||
|  | ||||
| @ -14,6 +14,7 @@ import ctypes | ||||
| import os | ||||
| import torch | ||||
| import traceback | ||||
| import warnings | ||||
| from torch._six import raise_from | ||||
| from multiprocessing.util import register_after_fork as _register_after_fork | ||||
|  | ||||
| @ -65,11 +66,37 @@ http://www.nvidia.com/Download/index.aspx""") | ||||
| The NVIDIA driver on your system is too old (found version {}). | ||||
| Please update your GPU driver by downloading and installing a new | ||||
| version from the URL: http://www.nvidia.com/Download/index.aspx | ||||
| Alternatively, go to: https://pytorch.org/binaries to install | ||||
| Alternatively, go to: http://pytorch.org to install | ||||
| a PyTorch version that has been compiled with your version | ||||
| of the CUDA driver.""".format(str(torch._C._cuda_getDriverVersion()))) | ||||
|  | ||||
|  | ||||
| def _check_capability(): | ||||
|     incorrect_binary_warn = """ | ||||
|     Found GPU%d %s which requires CUDA_VERSION >= %d for | ||||
|      optimal performance and fast startup time, but your PyTorch was compiled | ||||
|      with CUDA_VERSION %d. Please install the correct PyTorch binary | ||||
|      using instructions from http://pytorch.org | ||||
|     """ | ||||
|  | ||||
|     old_gpu_warn = """ | ||||
|     Found GPU%d %s which is of cuda capability %d.%d. | ||||
|     PyTorch no longer supports this GPU because it is too old. | ||||
|     """ | ||||
|  | ||||
|     CUDA_VERSION = torch._C._cuda_getCompiledVersion() | ||||
|     for d in range(device_count()): | ||||
|         capability = get_device_capability(d) | ||||
|         major = capability[0] | ||||
|         name = get_device_name(d) | ||||
|         if CUDA_VERSION < 8000 and major >= 6: | ||||
|             warnings.warn(incorrect_binary_warn % (d, name, 8000, CUDA_VERSION)) | ||||
|         elif CUDA_VERSION < 9000 and major >= 7: | ||||
|             warnings.warn(incorrect_binary_warn % (d, name, 9000, CUDA_VERSION)) | ||||
|         elif capability == (3, 0) or capability == (5, 0) or major < 3: | ||||
|             warnings.warn(old_gpu_warn % (d, name, major, capability[1])) | ||||
|  | ||||
|  | ||||
| def _lazy_call(callable): | ||||
|     if _initialized: | ||||
|         callable() | ||||
| @ -77,11 +104,26 @@ def _lazy_call(callable): | ||||
|         # Don't store the actual traceback to avoid memory cycle | ||||
|         _queued_calls.append((callable, traceback.format_stack())) | ||||
|  | ||||
| _lazy_call(_check_capability) | ||||
|  | ||||
|  | ||||
| class DeferredCudaCallError(Exception): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| def init(): | ||||
|     """Initialize PyTorch's CUDA state.  You may need to call | ||||
|     this explicitly if you are interacting with PyTorch via | ||||
|     its C API, as Python bindings for CUDA functionality will not | ||||
|     be until this initialization takes place.  Ordinary users | ||||
|     should not need this, as all of PyTorch's CUDA methods | ||||
|     automatically initialize CUDA state on-demand. | ||||
|  | ||||
|     Does nothing if the CUDA state is already initialized. | ||||
|     """ | ||||
|     _lazy_init() | ||||
|  | ||||
|  | ||||
| def _lazy_init(): | ||||
|     global _initialized, _cudart, _original_pid, _queued_calls | ||||
|     if _initialized: | ||||
| @ -162,10 +204,10 @@ class device(object): | ||||
|     def __enter__(self): | ||||
|         if self.idx is -1: | ||||
|             return | ||||
|         _lazy_init() | ||||
|         self.prev_idx = torch._C._cuda_getDevice() | ||||
|         if self.prev_idx != self.idx: | ||||
|             torch._C._cuda_setDevice(self.idx) | ||||
|         _lazy_init() | ||||
|  | ||||
|     def __exit__(self, *args): | ||||
|         if self.prev_idx != self.idx: | ||||
| @ -213,6 +255,19 @@ def get_device_name(device): | ||||
|         return torch._C._cuda_getDeviceName(device) | ||||
|  | ||||
|  | ||||
| def get_device_capability(device): | ||||
|     """Gets the cuda capability of a device. | ||||
|  | ||||
|     Arguments: | ||||
|         device (int): device for which to return the name. This function is a | ||||
|             no-op if this argument is negative. | ||||
|     Returns: | ||||
|         tuple(int, int): the major and minor cuda capability of the device | ||||
|     """ | ||||
|     if device >= 0: | ||||
|         return torch._C._cuda_getDeviceCapability(device) | ||||
|  | ||||
|  | ||||
| @contextlib.contextmanager | ||||
| def stream(stream): | ||||
|     """Context-manager that selects a given stream. | ||||
| @ -223,6 +278,10 @@ def stream(stream): | ||||
|     Arguments: | ||||
|         stream (Stream): selected stream. This manager is a no-op if it's | ||||
|             ``None``. | ||||
|  | ||||
|     .. note:: Streams are per-device, and this function changes the "current | ||||
|        stream" only for the currently selected device.  It is illegal to select | ||||
|        a stream that belongs to a different device. | ||||
|     """ | ||||
|     if stream is None: | ||||
|         yield | ||||
| @ -238,7 +297,6 @@ def stream(stream): | ||||
| def device_count(): | ||||
|     """Returns the number of GPUs available.""" | ||||
|     if is_available(): | ||||
|         _lazy_init() | ||||
|         return torch._C._cuda_getDeviceCount() | ||||
|     else: | ||||
|         return 0 | ||||
| @ -264,9 +322,18 @@ def current_stream(): | ||||
|  | ||||
| def current_blas_handle(): | ||||
|     """Returns cublasHandle_t pointer to current cuBLAS handle""" | ||||
|     _lazy_init() | ||||
|     return torch._C._cuda_getCurrentBlasHandle() | ||||
|  | ||||
|  | ||||
| def empty_cache(): | ||||
|     """Releases all unoccupied cached memory currently held by the caching | ||||
|     allocator so that those can be used in other GPU application and visible in | ||||
|     `nvidia-smi`.""" | ||||
|     if _initialized: | ||||
|         return torch._C._cuda_emptyCache() | ||||
|  | ||||
|  | ||||
| def _host_allocator(): | ||||
|     _lazy_init() | ||||
|     return torch._C._cuda_cudaHostAllocator() | ||||
|  | ||||
| @ -6,6 +6,10 @@ from . import cudart, check_error, cudaStatus | ||||
| class Stream(torch._C._CudaStreamBase): | ||||
|     """Wrapper around a CUDA stream. | ||||
|  | ||||
|     A CUDA stream is a linear sequence of execution that belongs to a specific | ||||
|     device, independent from other streams.  See :ref:`cuda-semantics` for | ||||
|     details. | ||||
|  | ||||
|     Arguments: | ||||
|         device(int, optional): a device on which to allocate the Stream. | ||||
|         priority(int, optional): priority of the stream. Lower numbers | ||||
| @ -21,6 +25,15 @@ class Stream(torch._C._CudaStreamBase): | ||||
|  | ||||
|         Arguments: | ||||
|             event (Event): an event to wait for. | ||||
|  | ||||
|         .. note:: This is a wrapper around ``cudaStreamWaitEvent()``: see `CUDA | ||||
|            documentation`_ for more info. | ||||
|  | ||||
|            This function returns without waiting for :attr:`event`: only future | ||||
|            operations are affected. | ||||
|  | ||||
|         .. _CUDA documentation: | ||||
|            http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html | ||||
|         """ | ||||
|         check_error(cudart().cudaStreamWaitEvent(self, event, ctypes.c_int(0))) | ||||
|  | ||||
| @ -32,6 +45,9 @@ class Stream(torch._C._CudaStreamBase): | ||||
|  | ||||
|         Arguments: | ||||
|             stream (Stream): a stream to synchronize. | ||||
|  | ||||
|         .. note:: This function returns without waiting for currently enqueued | ||||
|            kernels in :attr:`stream`: only future operations are affected. | ||||
|         """ | ||||
|         self.wait_event(stream.record_event()) | ||||
|  | ||||
| @ -63,7 +79,14 @@ class Stream(torch._C._CudaStreamBase): | ||||
|         return True | ||||
|  | ||||
|     def synchronize(self): | ||||
|         """Wait for all the kernels in this stream to complete.""" | ||||
|         """Wait for all the kernels in this stream to complete. | ||||
|  | ||||
|         .. note:: This is a wrapper around ``cudaStreamSynchronize()``: see | ||||
|            `CUDA documentation`_ for more info. | ||||
|  | ||||
|         .. _CUDA documentation: | ||||
|            http://docs.nvidia.com/cuda/cuda-runtime-api/group__CUDART__STREAM.html | ||||
|         """ | ||||
|         check_error(cudart().cudaStreamSynchronize(self)) | ||||
|  | ||||
|     @staticmethod | ||||
| @ -107,10 +130,10 @@ class Event(object): | ||||
|  | ||||
|     Arguments: | ||||
|         enable_timing (bool): indicates if the event should measure time | ||||
|             (default: False) | ||||
|         blocking (bool): if true, :meth:`wait` will be blocking (default: False) | ||||
|         interprocess (bool): if true, the event can be shared between processes | ||||
|             (default: False) | ||||
|             (default: ``False``) | ||||
|         blocking (bool): if ``True``, :meth:`wait` will be blocking (default: ``False``) | ||||
|         interprocess (bool): if ``True``, the event can be shared between processes | ||||
|             (default: ``False``) | ||||
|     """ | ||||
|  | ||||
|     DEFAULT = 0x0 | ||||
|  | ||||
| @ -1,17 +1,32 @@ | ||||
| """ | ||||
| r""" | ||||
| The ``distributions`` package contains parameterizable probability distributions | ||||
| and sampling functions. | ||||
|  | ||||
| The :meth:`log_prob` method is useful for policy gradient based methods. If the | ||||
| parameters of the distribution are differentiable, then the result of ``log_prob`` | ||||
| is also differentiable. | ||||
| Policy gradient methods can be implemented using the | ||||
| :meth:`~torch.distributions.Distribution.log_prob` method, when the probability | ||||
| density function is differentiable with respect to its parameters. A basic | ||||
| method is the REINFORCE rule: | ||||
|  | ||||
| Example:: | ||||
| .. math:: | ||||
|  | ||||
|     probs = network(input) | ||||
|     m = Multinomial(probs) | ||||
|     \Delta\theta  = \alpha r \frac{\partial\log p(a|\pi^\theta(s))}{\partial\theta} | ||||
|  | ||||
| where :math:`\theta` are the parameters, :math:`\alpha` is the learning rate, | ||||
| :math:`r` is the reward and :math:`p(a|\pi^\theta(s))` is the probability of | ||||
| taking action :math:`a` in state :math:`s` given policy :math:`\pi^\theta`. | ||||
|  | ||||
| In practice we would sample an action from the output of a network, apply this | ||||
| action in an environment, and then use ``log_prob`` to construct an equivalent | ||||
| loss function. Note that we use a negative because optimisers use gradient | ||||
| descent, whilst the rule above assumes gradient ascent. With a categorical | ||||
| policy, the code for implementing REINFORCE would be as follows:: | ||||
|  | ||||
|     probs = policy_network(state) | ||||
|     # NOTE: this is equivalent to what used to be called multinomial | ||||
|     m = Categorical(probs) | ||||
|     action = m.sample() | ||||
|     loss = -m.log_prob(action) * get_reward(env, action) | ||||
|     next_state, reward = env.step(action) | ||||
|     loss = -m.log_prob(action) * reward | ||||
|     loss.backward() | ||||
| """ | ||||
| import math | ||||
| @ -19,7 +34,7 @@ from numbers import Number | ||||
| import torch | ||||
|  | ||||
|  | ||||
| __all__ = ['Distribution', 'Bernoulli', 'Multinomial', 'Normal'] | ||||
| __all__ = ['Distribution', 'Bernoulli', 'Categorical', 'Normal'] | ||||
|  | ||||
|  | ||||
| class Distribution(object): | ||||
| @ -87,9 +102,12 @@ class Bernoulli(Distribution): | ||||
|         return log_pmf.gather(0, value.unsqueeze(0).long()).squeeze(0) | ||||
|  | ||||
|  | ||||
| class Multinomial(Distribution): | ||||
| class Categorical(Distribution): | ||||
|     r""" | ||||
|     Creates a multinomial distribution parameterized by `probs`. | ||||
|     Creates a categorical distribution parameterized by `probs`. | ||||
|  | ||||
|     .. note:: | ||||
|         It is equivalent to the distribution that ``multinomial()`` samples from. | ||||
|  | ||||
|     Samples are integers from `0 ... K-1` where `K` is probs.size(-1). | ||||
|  | ||||
| @ -102,7 +120,7 @@ class Multinomial(Distribution): | ||||
|  | ||||
|     Example:: | ||||
|  | ||||
|         >>> m = Multinomial(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ])) | ||||
|         >>> m = Categorical(torch.Tensor([ 0.25, 0.25, 0.25, 0.25 ])) | ||||
|         >>> m.sample()  # equal probability of 0, 1, 2, 3 | ||||
|          3 | ||||
|         [torch.LongTensor of size 1] | ||||
|  | ||||
| @ -9,15 +9,15 @@ __all__ = [ | ||||
|  | ||||
|  | ||||
| def split(tensor, split_size, dim=0): | ||||
|     """Splits the tensor into equally sized chunks (if possible). | ||||
|     """Splits the tensor into chunks all of size :attr:`split_size` (if possible). | ||||
|  | ||||
|     Last chunk will be smaller if the tensor size along a given dimension | ||||
|     is not divisible by ``split_size``. | ||||
|     is not divisible by :attr`split_size`. | ||||
|  | ||||
|     Arguments: | ||||
|         tensor (Tensor): tensor to split. | ||||
|         split_size (int): size of a single chunk. | ||||
|         dim (int): dimension along which to split the tensor. | ||||
|         tensor (Tensor): the tensor to split | ||||
|         split_size (int): size of a single chunk | ||||
|         dim (int): dimension along which to split the tensor | ||||
|     """ | ||||
|     if dim < 0: | ||||
|         dim += tensor.dim() | ||||
| @ -32,12 +32,12 @@ def split(tensor, split_size, dim=0): | ||||
|  | ||||
|  | ||||
| def chunk(tensor, chunks, dim=0): | ||||
|     """Splits a tensor into a number of chunks along a given dimension. | ||||
|     """Splits a tensor into a specific number of chunks. | ||||
|  | ||||
|     Arguments: | ||||
|         tensor (Tensor): tensor to split. | ||||
|         chunks (int): number of chunks to return. | ||||
|         dim (int): dimension along which to split the tensor. | ||||
|         tensor (Tensor): the tensor to split | ||||
|         chunks (int): number of chunks to return | ||||
|         dim (int): dimension along which to split the tensor | ||||
|     """ | ||||
|     if dim < 0: | ||||
|         dim += tensor.dim() | ||||
| @ -51,9 +51,9 @@ def stack(sequence, dim=0, out=None): | ||||
|     All tensors need to be of the same size. | ||||
|  | ||||
|     Arguments: | ||||
|         sequence (Sequence): sequence of tensors to concatenate. | ||||
|         sequence (Sequence): sequence of tensors to concatenate | ||||
|         dim (int): dimension to insert. Has to be between 0 and the number | ||||
|             of dimensions of concatenated tensors (inclusive). | ||||
|             of dimensions of concatenated tensors (inclusive) | ||||
|     """ | ||||
|     if len(sequence) == 0: | ||||
|         raise ValueError("stack expects a non-empty sequence of tensors") | ||||
| @ -72,8 +72,8 @@ def unbind(tensor, dim=0): | ||||
|     Returns a tuple of all slices along a given dimension, already without it. | ||||
|  | ||||
|     Arguments: | ||||
|         tensor (Tensor): tensor to unbind. | ||||
|         dim (int): dimension to remove. | ||||
|         tensor (Tensor): the tensor to unbind | ||||
|         dim (int): dimension to remove | ||||
|     """ | ||||
|     return tuple(tensor.select(dim, i) for i in _range(tensor.size(dim))) | ||||
|  | ||||
| @ -87,10 +87,10 @@ def btriunpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True): | ||||
|       2: The U tensor. | ||||
|  | ||||
|     Arguments: | ||||
|         LU_data (Tensor): The packed LU factorization data. | ||||
|         LU_pivots (Tensor): The packed LU factorization pivots. | ||||
|         unpack_data (bool): Flag indicating if the data should be unpacked. | ||||
|         unpack_pivots (bool): Flag indicating if the pivots should be unpacked. | ||||
|         LU_data (Tensor): the packed LU factorization data | ||||
|         LU_pivots (Tensor): the packed LU factorization pivots | ||||
|         unpack_data (bool): flag indicating if the data should be unpacked | ||||
|         unpack_pivots (bool): tlag indicating if the pivots should be unpacked | ||||
|     """ | ||||
|  | ||||
|     nBatch, sz, _ = LU_data.size() | ||||
| @ -122,7 +122,7 @@ def btriunpack(LU_data, LU_pivots, unpack_data=True, unpack_pivots=True): | ||||
|  | ||||
|  | ||||
| def matmul(tensor1, tensor2, out=None): | ||||
|     """Matrix product of two tensors. | ||||
|     r"""Matrix product of two tensors. | ||||
|  | ||||
|     The behavior depends on the dimensionality of the tensors as follows: | ||||
|  | ||||
| @ -139,17 +139,18 @@ def matmul(tensor1, tensor2, out=None): | ||||
|       batched matrix multiply and removed after.  If the second argument is 1-dimensional, a | ||||
|       1 is appended to its dimension for the purpose of the batched matrix multiple and removed after. | ||||
|       The non-matrix (i.e. batch) dimensions are :ref:`broadcasted <broadcasting-semantics>` (and thus | ||||
|       must be broadcastable).  For example, if :attr:`tensor1` is a `j x 1 x n x m` Tensor | ||||
|       and :attr:`tensor2` is a `k x m x p` Tensor, :attr:`out` will be an `j x k x n x p` Tensor. | ||||
|       must be broadcastable).  For example, if :attr:`tensor1` is a | ||||
|       :math:`(j \times 1 \times n \times m)` tensor and :attr:`tensor2` is a :math:`(k \times m \times p)` | ||||
|       tensor, :attr:`out` will be an :math:`(j \times k \times n \times p)` tensor. | ||||
|  | ||||
|     .. note:: | ||||
|  | ||||
|         The 1-dimensional dot product version of this function does not support an :attr:`out` parameter. | ||||
|  | ||||
|     Arguments: | ||||
|         tensor1 (Tensor): First tensor to be multiplied | ||||
|         tensor2 (Tensor): Second tensor to be multiplied | ||||
|         out (Tensor, optional): Output tensor | ||||
|         tensor1 (Tensor): the first tensor to be multiplied | ||||
|         tensor2 (Tensor): the second tensor to be multiplied | ||||
|         out (Tensor, optional): the output tensor | ||||
|     """ | ||||
|     dim_tensor1 = tensor1.dim() | ||||
|     dim_tensor2 = tensor2.dim() | ||||
|  | ||||
| @ -31,6 +31,30 @@ HOLE = Placeholder("HOLE") | ||||
| VOLATILE = Placeholder("VOLATILE") | ||||
|  | ||||
|  | ||||
| # This global variable is set when we are tracing a *forwards* computation. | ||||
| # It is intended to be a cheap way to test if tracing has occurred, before | ||||
| # doing the slower path using `get_tracing_state` (below.) | ||||
| _tracing = False | ||||
|  | ||||
|  | ||||
| def get_tracing_state(args): | ||||
|     if not torch._C._is_tracing(args): | ||||
|         return None | ||||
|     return torch._C._get_tracing_state(args) | ||||
|  | ||||
|  | ||||
| @contextlib.contextmanager | ||||
| def scope(scope_name, *vars): | ||||
|     tracing_state = get_tracing_state(vars) | ||||
|     if tracing_state: | ||||
|         tracing_state.push_scope(scope_name) | ||||
|     try: | ||||
|         yield | ||||
|     finally: | ||||
|         if tracing_state: | ||||
|             tracing_state.pop_scope() | ||||
|  | ||||
|  | ||||
| def compile(arg=None, **kwargs): | ||||
|     """ | ||||
|     Decorator which marks a function or module class as eligible for | ||||
| @ -69,10 +93,10 @@ def compile(arg=None, **kwargs): | ||||
|             (as we always wait to see all derivatives before compiling.) | ||||
|             Default: 1 (i.e., we will compile forwards and backwards, but not | ||||
|             double-backwards). | ||||
|         optimize (bool, optional): whether or not to apply optimizations.  Default: True. | ||||
|         optimize (bool, optional): whether or not to apply optimizations.  Default: ``True``. | ||||
|  | ||||
|     Debug arguments: | ||||
|         time (bool, optional): if True, whenever we execute the model in question, we | ||||
|         time (bool, optional): if ``True``, whenever we execute the model in question, we | ||||
|             will also print out some timing information for how long the model | ||||
|             took to execute.  At the moment, there are three types of timings we | ||||
|             emit: | ||||
| @ -87,10 +111,10 @@ def compile(arg=None, **kwargs): | ||||
|                 - optimized: the time it took to execute the optimized model. | ||||
|  | ||||
|             At the moment, all of these timings are for the forward pass only. | ||||
|             Default: False. | ||||
|         enabled (bool, optional): if False, compilation is disabled and you | ||||
|             Default: ``False``. | ||||
|         enabled (bool, optional): if ``False``, compilation is disabled and you | ||||
|             will get back your original model.  This is a convenient way to | ||||
|             disable tracing without having to delete the annotation. Default: True. | ||||
|             disable tracing without having to delete the annotation. Default: ``True``. | ||||
|  | ||||
|     Example: Compile as class decorator. | ||||
|  | ||||
| @ -227,6 +251,8 @@ class TracedModule(Module): | ||||
|         self.nderivs = nderivs | ||||
|  | ||||
|     def forward(self, *args, **kwargs): | ||||
|         global _tracing | ||||
|  | ||||
|         # TODO: Possible optimization: use the unflattened | ||||
|         # output so we don't unflatten it when we get out | ||||
|         # NB: Not a method because _raw_trace can't deal | ||||
| @ -238,7 +264,9 @@ class TracedModule(Module): | ||||
|         kw_items = list(kwargs.items()) | ||||
|         kw_items.sort() | ||||
|         in_vars, in_struct = _flatten((args, tuple(kw_items)), self.state_dict(keep_vars=True).values()) | ||||
|         _tracing = True | ||||
|         trace, (out_vars, out_struct) = traced_inner(in_vars, in_struct) | ||||
|         _tracing = False | ||||
|         out, unmatched = _unflatten(out_vars, out_struct) | ||||
|         assert len(unmatched) == 0 | ||||
|         return trace, out | ||||
| @ -396,6 +424,10 @@ class _CompiledMixin(object): | ||||
|         # TODO: Figure out how to call parent destructor, if there is one. | ||||
|         # Apparently, this is buggy: | ||||
|         #     https://stackoverflow.com/questions/22972720/python-cant-invoke-parent-class-destructor-with-super | ||||
|         # NB: Have to mangle this by hand! | ||||
|         if not (hasattr(self, '_CompiledMixin__misses') and hasattr(self, '_CompiledMixin___hits')): | ||||
|             # Probably died during construction | ||||
|             return | ||||
|         if self.__misses != 0 and self.__hits == 0: | ||||
|             warnings.warn("{} was marked with JIT and invoked {} times, " | ||||
|                           "but we never successfully used compiled code." | ||||
|  | ||||
| @ -18,18 +18,22 @@ class DistKLDivCriterion(Criterion): | ||||
|             input, | ||||
|             target, | ||||
|             self.output_tensor, | ||||
|             self.sizeAverage | ||||
|             self.sizeAverage, | ||||
|             True,  # reduce | ||||
|         ) | ||||
|         self.output = self.output_tensor[0] | ||||
|         return self.output | ||||
|  | ||||
|     def updateGradInput(self, input, target): | ||||
|         assert input.is_same_size(target) | ||||
|         implicit_gradOutput = torch.ones(1).type_as(input) | ||||
|         self._backend.DistKLDivCriterion_updateGradInput( | ||||
|             self._backend.library_state, | ||||
|             input, | ||||
|             target, | ||||
|             implicit_gradOutput, | ||||
|             self.gradInput, | ||||
|             self.sizeAverage | ||||
|             self.sizeAverage, | ||||
|             True,  # reduce | ||||
|         ) | ||||
|         return self.gradInput | ||||
|  | ||||
| @ -29,7 +29,6 @@ class ELU(Module): | ||||
|     def updateGradInput(self, input, gradOutput): | ||||
|         self._backend.ELU_updateGradInput( | ||||
|             self._backend.library_state, | ||||
|             input, | ||||
|             gradOutput, | ||||
|             self.gradInput, | ||||
|             self.output, | ||||
|  | ||||
| @ -20,14 +20,14 @@ class Padding(Module): | ||||
|         super(Padding, self).__init__() | ||||
|  | ||||
|     def updateOutput(self, input): | ||||
|         outputSize = list(input.size()) | ||||
|         outputSize[self.dim] += abs(self.pad) | ||||
|         self.outputSize = torch.Size(outputSize) | ||||
|         dim = self.dim | ||||
|  | ||||
|         if hasattr(self, "nInputDim") and self.nInputDim > 0 and input.dim() != self.nInputDim: | ||||
|             dim = dim + 1 | ||||
|  | ||||
|         outputSize = list(input.size()) | ||||
|         outputSize[dim] += abs(self.pad) | ||||
|         self.outputSize = torch.Size(outputSize) | ||||
|  | ||||
|         self.output.resize_(self.outputSize) | ||||
|         self.output.fill_(self.value) | ||||
|         index = self.index | ||||
|  | ||||
| @ -66,6 +66,7 @@ IF ($ENV{TH_BINARY_BUILD}) | ||||
|   IF (UNIX AND NOT APPLE) | ||||
|     # hiding statically linked library symbols, this flag is not available for the linker under MACOSX | ||||
|     SET(CMAKE_CXX_FLAGS "-Wl,--exclude-libs,libstdc++.a ${CMAKE_CXX_FLAGS}") | ||||
|     set (CMAKE_SHARED_LINKER_FLAGS "-Wl,--version-script=${CMAKE_CURRENT_SOURCE_DIR}/../../../tools/pytorch.version") | ||||
|   ENDIF(UNIX AND NOT APPLE) | ||||
| ENDIF() | ||||
|  | ||||
|  | ||||
| @ -17,8 +17,15 @@ public: | ||||
|   Type & getType(Backend p, ScalarType s) { | ||||
|     initCUDAIfNeeded(p); | ||||
|     auto & type = type_registry[static_cast<int>(p)][static_cast<int>(s)]; | ||||
|     if(!type) | ||||
|  | ||||
|     if(!type) { | ||||
|       // there is only a single Undefined Type. | ||||
|       if (p == Backend::Undefined || s == ScalarType::Undefined) { | ||||
|         auto & undef = type_registry[static_cast<int>(Backend::Undefined)][static_cast<int>(ScalarType::Undefined)]; | ||||
|         if (undef) return *undef; | ||||
|       } | ||||
|       runtime_error("%s%sType is not enabled.",toString(p),toString(s)); | ||||
|     } | ||||
|     return *type; | ||||
|   } | ||||
|   Generator & defaultGenerator(Backend p) { | ||||
|  | ||||
| @ -13,28 +13,28 @@ static DLDataType getDLDataType(const Type& type) { | ||||
|   dtype.bits = type.elementSizeInBytes() * 8; | ||||
|   switch (type.scalarType()) { | ||||
|     case ScalarType::Byte: | ||||
|       dtype.code = DLDataTypeCode::kUInt; | ||||
|       dtype.code = DLDataTypeCode::kDLUInt; | ||||
|       break; | ||||
|     case ScalarType::Char: | ||||
|       dtype.code = DLDataTypeCode::kInt; | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Double: | ||||
|       dtype.code = DLDataTypeCode::kFloat; | ||||
|       dtype.code = DLDataTypeCode::kDLFloat; | ||||
|       break; | ||||
|     case ScalarType::Float: | ||||
|       dtype.code = DLDataTypeCode::kFloat; | ||||
|       dtype.code = DLDataTypeCode::kDLFloat; | ||||
|       break; | ||||
|     case ScalarType::Int: | ||||
|       dtype.code = DLDataTypeCode::kInt; | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Long: | ||||
|       dtype.code = DLDataTypeCode::kInt; | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Short: | ||||
|       dtype.code = DLDataTypeCode::kInt; | ||||
|       dtype.code = DLDataTypeCode::kDLInt; | ||||
|       break; | ||||
|     case ScalarType::Half: | ||||
|       dtype.code = DLDataTypeCode::kFloat; | ||||
|       dtype.code = DLDataTypeCode::kDLFloat; | ||||
|       break; | ||||
|     case ScalarType::NumOptions: | ||||
|       throw std::logic_error("NumOptions is not a valid ScalarType"); | ||||
| @ -47,9 +47,9 @@ static DLContext getDLContext(const Type& type, const int64_t& device_id) { | ||||
|   DLContext ctx; | ||||
|   ctx.device_id = device_id; | ||||
|   if (type.isCuda()) { | ||||
|     ctx.device_type = DLDeviceType::kGPU; | ||||
|     ctx.device_type = DLDeviceType::kDLGPU; | ||||
|   } else { | ||||
|     ctx.device_type = DLDeviceType::kCPU; | ||||
|     ctx.device_type = DLDeviceType::kDLCPU; | ||||
|   } | ||||
|   return ctx; | ||||
| } | ||||
| @ -58,10 +58,10 @@ static DLContext getDLContext(const Type& type, const int64_t& device_id) { | ||||
| static Backend getATenBackend(const DLContext& ctx) { | ||||
|   Backend backend; | ||||
|   switch (ctx.device_type) { | ||||
|     case DLDeviceType::kCPU: | ||||
|     case DLDeviceType::kDLCPU: | ||||
|       backend = Backend::CPU; | ||||
|       break; | ||||
|     case DLDeviceType::kGPU: | ||||
|     case DLDeviceType::kDLGPU: | ||||
|       backend = Backend::CUDA; | ||||
|       break; | ||||
|     default: | ||||
| @ -75,7 +75,7 @@ ScalarType toScalarType(const DLDataType& dtype) { | ||||
|   ScalarType stype; | ||||
|   if (dtype.lanes != 1) throw std::logic_error("ATen does not support lanes != 1"); | ||||
|   switch (dtype.code) { | ||||
|     case DLDataTypeCode::kUInt: | ||||
|     case DLDataTypeCode::kDLUInt: | ||||
|       switch (dtype.bits) { | ||||
|         case 8: | ||||
|           stype = ScalarType::Byte; | ||||
| @ -84,7 +84,7 @@ ScalarType toScalarType(const DLDataType& dtype) { | ||||
|           throw std::logic_error("Unsupported kUInt bits " + std::to_string(dtype.bits)); | ||||
|       } | ||||
|       break; | ||||
|     case DLDataTypeCode::kInt: | ||||
|     case DLDataTypeCode::kDLInt: | ||||
|       switch (dtype.bits) { | ||||
|         case 8: | ||||
|           stype = ScalarType::Char; | ||||
| @ -102,7 +102,7 @@ ScalarType toScalarType(const DLDataType& dtype) { | ||||
|           throw std::logic_error("Unsupported kInt bits " + std::to_string(dtype.bits)); | ||||
|       } | ||||
|       break; | ||||
|     case DLDataTypeCode::kFloat: | ||||
|     case DLDataTypeCode::kDLFloat: | ||||
|       switch (dtype.bits) { | ||||
|         case 16: | ||||
|           stype = ScalarType::Half; | ||||
| @ -128,8 +128,8 @@ struct ATenDLMTensor { | ||||
|   DLManagedTensor tensor; | ||||
| }; | ||||
|  | ||||
| void destructor(DLManagedTensor * arg) { | ||||
|   delete static_cast<ATenDLMTensor*>(arg->ctx); | ||||
| void deleter(DLManagedTensor * arg) { | ||||
|   delete static_cast<ATenDLMTensor*>(arg->manager_ctx); | ||||
| } | ||||
|  | ||||
|  | ||||
| @ -138,33 +138,33 @@ void destructor(DLManagedTensor * arg) { | ||||
| DLManagedTensor* toDLPack(const Tensor& src) { | ||||
|   ATenDLMTensor * atDLMTensor(new ATenDLMTensor); | ||||
|   atDLMTensor->handle = src; | ||||
|   atDLMTensor->tensor.ctx = atDLMTensor; | ||||
|   atDLMTensor->tensor.destructor = &destructor; | ||||
|   atDLMTensor->tensor.dlTensor.data = src.data_ptr(); | ||||
|   atDLMTensor->tensor.manager_ctx = atDLMTensor; | ||||
|   atDLMTensor->tensor.deleter = &deleter; | ||||
|   atDLMTensor->tensor.dl_tensor.data = src.data_ptr(); | ||||
|   int64_t device_id = 0; | ||||
|   if (src.type().isCuda()) { | ||||
|     device_id = src.get_device(); | ||||
|   } | ||||
|   atDLMTensor->tensor.dlTensor.ctx = getDLContext(src.type(), device_id); | ||||
|   atDLMTensor->tensor.dlTensor.ndim = src.dim(); | ||||
|   atDLMTensor->tensor.dlTensor.dtype = getDLDataType(src.type()); | ||||
|   atDLMTensor->tensor.dlTensor.shape = const_cast<int64_t*>(src.sizes().data()); | ||||
|   atDLMTensor->tensor.dlTensor.strides = const_cast<int64_t*>(src.strides().data()); | ||||
|   atDLMTensor->tensor.dlTensor.byte_offset = 0; | ||||
|   atDLMTensor->tensor.dl_tensor.ctx = getDLContext(src.type(), device_id); | ||||
|   atDLMTensor->tensor.dl_tensor.ndim = src.dim(); | ||||
|   atDLMTensor->tensor.dl_tensor.dtype = getDLDataType(src.type()); | ||||
|   atDLMTensor->tensor.dl_tensor.shape = const_cast<int64_t*>(src.sizes().data()); | ||||
|   atDLMTensor->tensor.dl_tensor.strides = const_cast<int64_t*>(src.strides().data()); | ||||
|   atDLMTensor->tensor.dl_tensor.byte_offset = 0; | ||||
|   return &(atDLMTensor->tensor); | ||||
| } | ||||
|  | ||||
|  | ||||
| Tensor fromDLPack(const DLManagedTensor* src) { | ||||
|   Backend backend = getATenBackend(src->dlTensor.ctx); | ||||
|   ScalarType stype = toScalarType(src->dlTensor.dtype); | ||||
|   Backend backend = getATenBackend(src->dl_tensor.ctx); | ||||
|   ScalarType stype = toScalarType(src->dl_tensor.dtype); | ||||
|   auto deleter = [src](void * self) { | ||||
|     src->destructor(const_cast<DLManagedTensor*>(src)); | ||||
|     src->deleter(const_cast<DLManagedTensor*>(src)); | ||||
|   }; | ||||
|   return getType(backend, stype).tensorFromBlob( | ||||
|       src->dlTensor.data, | ||||
|       IntList(src->dlTensor.shape, src->dlTensor.ndim), | ||||
|       IntList(src->dlTensor.strides, src->dlTensor.ndim), | ||||
|       src->dl_tensor.data, | ||||
|       IntList(src->dl_tensor.shape, src->dl_tensor.ndim), | ||||
|       IntList(src->dl_tensor.strides, src->dl_tensor.ndim), | ||||
|       deleter); | ||||
| } | ||||
| } //namespace at | ||||
|  | ||||
| @ -579,13 +579,22 @@ | ||||
|     - CPU | ||||
|     - CUDA | ||||
|   return: argument 0 | ||||
|   arguments: | ||||
|     - arg: THTensor* result | ||||
|       output: True | ||||
|     - accreal start | ||||
|     - accreal end | ||||
|     - arg: accreal step | ||||
|       default: 1 | ||||
|   options: | ||||
|     - cname: arange | ||||
|       arguments: | ||||
|         - arg: THTensor* result | ||||
|           output: True | ||||
|         - accreal start | ||||
|         - accreal end | ||||
|         - arg: accreal step | ||||
|           default: 1 | ||||
|     - cname: arange | ||||
|       arguments: | ||||
|         - arg: THTensor* result | ||||
|           output: True | ||||
|         - CONSTANT 0 | ||||
|         - accreal end | ||||
|         - CONSTANT 1 | ||||
| ]] | ||||
| [[ | ||||
|   name: scatter_ | ||||
|  | ||||
| @ -1,10 +1,20 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ATen/Tensor.h" | ||||
| #include <functional> | ||||
| #include <sstream> | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| // avoid copy-construction of Tensor by using a reference_wrapper. | ||||
| inline void check_defined(std::initializer_list<std::reference_wrapper<const Tensor>> tensors, const char *api_name) { | ||||
|   for (auto& t : tensors) { | ||||
|     if (!t.get().defined()) { | ||||
|       runtime_error("%s(...) called with an undefined Tensor", api_name); | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand) { | ||||
|   if (tensor.sizes().equals(to_expand.sizes())) { | ||||
|     return std::make_tuple(to_expand); | ||||
| @ -13,6 +23,11 @@ inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_ | ||||
|   return std::make_tuple(to_expand.expand(tensor.sizes())); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand, const char *api_name) { | ||||
|   check_defined({tensor, to_expand}, api_name); | ||||
|   return expand_inplace(tensor, to_expand); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2) { | ||||
|   if (tensor.sizes().equals(to_expand1.sizes()) && tensor.sizes().equals((to_expand2.sizes()))) { | ||||
|     return std::make_tuple(to_expand1, to_expand2); | ||||
| @ -21,6 +36,12 @@ inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Ten | ||||
|   return std::make_tuple(to_expand1.expand(tensor.sizes()), to_expand2.expand(tensor.sizes())); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor> expand_inplace(const Tensor &tensor, const Tensor &to_expand1, const Tensor &to_expand2, | ||||
|                                                  const char *api_name) { | ||||
|   check_defined({tensor, to_expand1, to_expand2}, api_name); | ||||
|   return expand_inplace(tensor, to_expand1, to_expand2); | ||||
| } | ||||
|  | ||||
| inline std::vector<int64_t> infer_size2(IntList a, IntList b) { | ||||
|   auto dimsA = a.size(); | ||||
|   auto dimsB = b.size(); | ||||
| @ -55,9 +76,14 @@ inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, cons | ||||
|   return std::make_tuple(to_expand1.expand(expanded_size), to_expand2.expand(expanded_size)); | ||||
| } | ||||
|  | ||||
| std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1, | ||||
|                                                    const Tensor &to_expand2, | ||||
|                                                    const Tensor &to_expand3) { | ||||
| inline std::tuple<Tensor, Tensor> expand_outplace(const Tensor &to_expand1, const Tensor &to_expand2, const char *api_name) { | ||||
|   check_defined({to_expand1, to_expand2}, api_name); | ||||
|   return expand_outplace(to_expand1, to_expand2); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1, | ||||
|                                                           const Tensor &to_expand2, | ||||
|                                                           const Tensor &to_expand3) { | ||||
|   if (to_expand1.sizes().equals(to_expand2.sizes()) && to_expand1.sizes().equals(to_expand3.sizes())) { | ||||
|     return std::make_tuple(to_expand1, to_expand2, to_expand3); | ||||
|   } | ||||
| @ -67,6 +93,14 @@ std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1, | ||||
|   return std::make_tuple(to_expand1.expand(expanded_size), to_expand2.expand(expanded_size), to_expand3.expand(expanded_size)); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor, Tensor, Tensor> expand_outplace(const Tensor &to_expand1, | ||||
|                                                           const Tensor &to_expand2, | ||||
|                                                           const Tensor &to_expand3, | ||||
|                                                           const char *api_name) { | ||||
|   check_defined({to_expand1, to_expand2, to_expand3}, api_name); | ||||
|   return expand_outplace(to_expand1, to_expand2, to_expand3); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) { | ||||
|   if(to_expand.sizes().equals(sizes)) { | ||||
|     return std::make_tuple(to_expand); | ||||
| @ -75,4 +109,9 @@ inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes) { | ||||
|   return std::make_tuple(to_expand.expand(sizes)); | ||||
| } | ||||
|  | ||||
| inline std::tuple<Tensor> expand_size(const Tensor &to_expand, IntList sizes, const char *api_name) { | ||||
|   check_defined({to_expand}, api_name); | ||||
|   return expand_size(to_expand, sizes); | ||||
| } | ||||
|  | ||||
| } | ||||
|  | ||||
| @ -128,6 +128,24 @@ | ||||
|     ${THTensor}_setStorage(${state,}result_->tensor, self_->tensor->storage, self_->tensor->storageOffset, size_, stride_); | ||||
| ]] | ||||
|  | ||||
| [[ | ||||
|   name: as_strided_ | ||||
|   variants: [method,function] | ||||
|   return: argument 0 | ||||
|   arguments: | ||||
|     - THTensor* self | ||||
|     - THSize* size | ||||
|     - THStride* stride | ||||
|     - arg: int64_t storage_offset | ||||
|       default: -1 | ||||
|   aten_custom_call: | | ||||
|     if (storage_offset == -1) { | ||||
|       storage_offset = self_->tensor->storageOffset; | ||||
|     } | ||||
|     ${THTensor}_setStorage(${state,}self_->tensor, self_->tensor->storage, storage_offset, size_, stride_); | ||||
|     self_->maybeScalar(size.size() == 0); | ||||
| ]] | ||||
|  | ||||
| [[ | ||||
|   name: cat | ||||
|   cname: catArray | ||||
|  | ||||
| @ -23,7 +23,7 @@ public: | ||||
|  | ||||
|   explicit Scalar(const detail::TensorBase & t) | ||||
|   : tag(Tag::HAS_t), t(t) { | ||||
|     AT_ASSERT(t.pImpl, "Attempting to create a Scalar from an undefined tensor"); | ||||
|     AT_ASSERT(t.defined(), "Attempting to create a Scalar from an undefined tensor"); | ||||
|     AT_ASSERT(t.dim() == 0, "Attempting to create a Scalar from a %d dim tensor", t.dim()); | ||||
|   } | ||||
|  | ||||
|  | ||||
| @ -23,6 +23,7 @@ enum class ScalarType { | ||||
|   n, | ||||
|   AT_FORALL_SCALAR_TYPES(DEFINE_ENUM) | ||||
| #undef DEFINE_ENUM | ||||
|   Undefined, | ||||
|   NumOptions | ||||
| }; | ||||
|  | ||||
| @ -31,6 +32,7 @@ enum class Backend { | ||||
|   CUDA, | ||||
|   SparseCPU, | ||||
|   SparseCUDA, | ||||
|   Undefined, | ||||
|   NumOptions | ||||
| }; | ||||
|  | ||||
| @ -62,7 +64,7 @@ static inline const char * toString(ScalarType t) { | ||||
|   switch(t) { | ||||
|     AT_FORALL_SCALAR_TYPES(DEFINE_CASE) | ||||
|     default: | ||||
|       return "UNKNOWN_SCALAR_TYPE"; | ||||
|       return "UNKNOWN_SCALAR"; | ||||
|   } | ||||
| #undef DEFINE_CASE | ||||
| } | ||||
|  | ||||
| @ -1,29 +1,32 @@ | ||||
| #pragma once | ||||
|  | ||||
| #include "ATen/TensorImpl.h" | ||||
| #include "ATen/UndefinedTensor.h" | ||||
|  | ||||
| namespace at { namespace detail { | ||||
|  | ||||
| // TensorBase is the base class for Tensor which handles the reference counting | ||||
| struct TensorBase { | ||||
|   TensorBase() | ||||
|   : pImpl(nullptr) {} | ||||
|   TensorBase(): TensorBase(UndefinedTensor::singleton(), false) {} | ||||
|   TensorBase(TensorImpl * self, bool retain) | ||||
|   : pImpl(self) { | ||||
|     if(pImpl != nullptr && retain) | ||||
|     if (pImpl == nullptr) { | ||||
|       throw std::runtime_error("TensorBase with nullptr not supported"); | ||||
|     } | ||||
|     if(retain && pImpl != UndefinedTensor::singleton()) | ||||
|       pImpl->retain(); | ||||
|   } | ||||
|   TensorBase(const TensorBase & rhs) | ||||
|   : pImpl(rhs.pImpl) { | ||||
|     if(pImpl != nullptr) | ||||
|     if (pImpl != UndefinedTensor::singleton()) | ||||
|       pImpl->retain(); | ||||
|   } | ||||
|   TensorBase(TensorBase && rhs) noexcept | ||||
|   : pImpl(rhs.pImpl) { | ||||
|     rhs.pImpl = nullptr; | ||||
|     rhs.pImpl = UndefinedTensor::singleton(); | ||||
|   } | ||||
|   ~TensorBase() { | ||||
|     if(pImpl != nullptr) | ||||
|     if (pImpl != UndefinedTensor::singleton()) | ||||
|       pImpl->release(); | ||||
|   } | ||||
|   TensorBase & operator=(TensorBase && rhs) & { | ||||
| @ -48,6 +51,9 @@ struct TensorBase { | ||||
|   TensorImpl * get() const { | ||||
|     return pImpl; | ||||
|   } | ||||
|   bool defined() const { | ||||
|     return pImpl != UndefinedTensor::singleton(); | ||||
|   } | ||||
|  | ||||
|   friend struct Type; | ||||
|  | ||||
|  | ||||
| @ -11,6 +11,7 @@ inline Tensor & Tensor::operator=(Scalar v) && { | ||||
|   return assign_(v); | ||||
| } | ||||
| inline Tensor & Tensor::assign_(Scalar v) { | ||||
|   AT_ASSERT(defined(), "attempting to assign a scalar to an undefined tensor"); | ||||
|   AT_ASSERT(dim() == 0, "attempting to assign a scalar to %d dim tensor", dim()); | ||||
|   pImpl->assign_(v); | ||||
|   return *this; | ||||
|  | ||||
							
								
								
									
										42
									
								
								torch/lib/ATen/UndefinedTensor.cpp
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								torch/lib/ATen/UndefinedTensor.cpp
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,42 @@ | ||||
| #include "ATen/UndefinedTensor.h" | ||||
| #include "ATen/Context.h" | ||||
|  | ||||
| namespace at { | ||||
|  | ||||
| // should this use the globalContext?  Can it get a context passed in somehow? | ||||
| UndefinedTensor::UndefinedTensor() | ||||
| : TensorImpl(&(globalContext().getType(Backend::Undefined,ScalarType::Undefined))) { | ||||
| } | ||||
|  | ||||
| const char * UndefinedTensor::toString() const { | ||||
|   return "UndefinedTensor"; | ||||
| } | ||||
|  | ||||
| IntList UndefinedTensor::sizes() const { | ||||
|   runtime_error("sizes() called on undefined Tensor"); | ||||
| } | ||||
|  | ||||
| int64_t UndefinedTensor::dim() const { | ||||
|   runtime_error("dim() called on undefined Tensor"); | ||||
| } | ||||
|  | ||||
| const char * UndefinedTensor::typeString() { | ||||
|   return "UndefinedType"; | ||||
| } | ||||
| void * UndefinedTensor::unsafeGetTH(bool retain) { | ||||
|   runtime_error("unsafeGetTH(bool retain) called on undefined Tensor"); | ||||
| } | ||||
|  | ||||
| IntList UndefinedTensor::strides() const { | ||||
|   runtime_error("strides() called on undefined Tensor"); | ||||
| } | ||||
| Scalar UndefinedTensor::localScalar() { | ||||
|   runtime_error("localScalar() called on undefined Tensor"); | ||||
| } | ||||
| void UndefinedTensor::assign_(Scalar s) { | ||||
|   runtime_error("assign_() called on undefined Tensor"); | ||||
| } | ||||
|  | ||||
| UndefinedTensor UndefinedTensor::_singleton; | ||||
|  | ||||
| } | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user
	