diff --git a/.circleci/docker/requirements-ci.txt b/.circleci/docker/requirements-ci.txt index 8b18a1745808..5662eadc4f66 100644 --- a/.circleci/docker/requirements-ci.txt +++ b/.circleci/docker/requirements-ci.txt @@ -164,11 +164,16 @@ pytest-rerunfailures #Pinned versions: #test that import: -#xdoctest +xdoctest==1.0.2 #Description: runs doctests in pytest -#Pinned versions: +#Pinned versions: 1.0.2 #test that import: +pygments==2.12.0 +#Description: support doctest highlighting +#Pinned versions: 2.12.0 +#test that import: the doctests + #PyYAML #Description: data serialization format #Pinned versions: diff --git a/.jenkins/pytorch/macos-test.sh b/.jenkins/pytorch/macos-test.sh index 68f7f2619209..323ce3965a86 100755 --- a/.jenkins/pytorch/macos-test.sh +++ b/.jenkins/pytorch/macos-test.sh @@ -16,9 +16,9 @@ fi pip install "unittest-xml-reporting<=3.2.0,>=2.0.0" \ pytest \ pytest-xdist \ - pytest-rerunfailures - # TODO: enable xdoctest later - # xdoctest + pytest-rerunfailures \ + "xdoctest==1.0.2" \ + "pygments==2.12.0" if [ -z "${CI}" ]; then rm -rf "${WORKSPACE_DIR}"/miniconda3/lib/python3.6/site-packages/torch* diff --git a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat index c598a04e0f97..79e8aedfab75 100644 --- a/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat +++ b/.jenkins/pytorch/win-test-helpers/setup_pytorch_env.bat @@ -36,8 +36,7 @@ popd ======= :: Pin unittest-xml-reporting to freeze printing test summary logic, related: https://github.com/pytorch/pytorch/issues/69014 -pip install "ninja==1.10.0.post1" future "hypothesis==5.35.1" "expecttest==0.1.3" "librosa>=0.6.2" "scipy==1.6.3" psutil pillow "unittest-xml-reporting<=3.2.0,>=2.0.0" pytest pytest-xdist pytest-rerunfailures -:: # TODO: enable xdoctest later +pip install "ninja==1.10.0.post1" future "hypothesis==5.35.1" "expecttest==0.1.3" "librosa>=0.6.2" "scipy==1.6.3" psutil pillow "unittest-xml-reporting<=3.2.0,>=2.0.0" pytest pytest-xdist pytest-rerunfailures "xdoctest==1.0.2" "pygments==2.12.0" if errorlevel 1 exit /b if not errorlevel 0 exit /b diff --git a/pytest.ini b/pytest.ini index 69185dd94ee9..53b5ad643ebf 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,11 +7,6 @@ addopts = # capture only Python print and C++ py::print, but not C output (low-level Python errors) --capture=sys --disable-warnings - # TODO: enable xdoctest later - #--xdoctest - #--xdoctest-style=google - #--xdoctest-global-exec="from torch import nn\nimport torch.nn.functional as F\nimport torch" - #--xdoctest-options=+IGNORE_WHITESPACE testpaths = test junit_logging_reruns = all diff --git a/test/run_test.py b/test/run_test.py old mode 100644 new mode 100755 index 6cd6ebde31f6..a0506b2c9978 --- a/test/run_test.py +++ b/test/run_test.py @@ -128,6 +128,10 @@ TESTS = discover_tests( ] ) +# The doctests are a special case that don't correspond to a file that discover +# tests can enable. +TESTS = TESTS + ['doctests'] + FSDP_TEST = [test for test in TESTS if test.startswith("distributed/fsdp")] # Tests need to be run with pytest. @@ -348,20 +352,6 @@ def get_executable_command(options, allow_pytest, disable_coverage=False): if options.pytest: if allow_pytest: executable += ["-m", "pytest"] - # Enable xdoctest - # TODO: enable xdoctest later - # Many doctests assume the existence of these variables - # xdoctest_global_exec_lines = r'\n'.join([ - # 'from torch import nn', - # 'import torch.nn.functional as F', - # 'import torch', - # ]) - # executable += [ - # "--xdoctest", - # "--xdoctest-style=google", - # f"--xdoctest-global-exec='{xdoctest_global_exec_lines}'", - # "--xdoctest-options=+IGNORE_WHITESPACE" - # ] else: print_to_stderr( "Pytest cannot be used for this test. Falling back to unittest." @@ -565,6 +555,81 @@ def test_distributed(test_module, test_directory, options): return 0 +def run_doctests(test_module, test_directory, options): + """ + Assumes the incoming test module is called doctest, and simply executes the + xdoctest runner on the torch library itself. + """ + import xdoctest + import pathlib + pkgpath = pathlib.Path(torch.__file__).parent + + # + enabled = { + # TODO: expose these options to the user + # Temporary disable all feature-conditional tests + # 'lapack': 'auto', + # 'cuda': 'auto', + # 'cuda1': 'auto', + # 'qengine': 'auto', + 'lapack': 0, + 'cuda': 0, + 'cuda1': 0, + 'qengine': 0, + } + + # Resolve "auto" based on a test to determine if the feature is available. + if enabled['cuda'] == 'auto' and torch.cuda.is_available(): + enabled['cuda'] = True + + if enabled['cuda1'] == 'auto' and torch.cuda.is_available() and torch.cuda.device_count() > 1: + enabled['cuda1'] = True + + if enabled['lapack'] == 'auto' and torch._C.has_lapack: + enabled['lapack'] = True + + if enabled['qengine'] == 'auto': + try: + # Is there a better check if quantization is enabled? + import torch.nn.quantized as nnq # NOQA + torch.backends.quantized.engine = 'qnnpack' + torch.backends.quantized.engine = 'fbgemm' + except (ImportError, RuntimeError): + ... + else: + enabled['qengine'] = True + + # Set doctest environment variables + if enabled['cuda']: + os.environ['TORCH_DOCTEST_CUDA'] = '1' + + if enabled['cuda1']: + os.environ['TORCH_DOCTEST_CUDA1'] = '1' + + if enabled['lapack']: + os.environ['TORCH_DOCTEST_LAPACK'] = '1' + + if enabled['qengine']: + os.environ['TORCH_DOCTEST_QENGINE'] = '1' + + pkgpath = os.path.dirname(torch.__file__) + xdoctest_config = { + 'global_exec': r'\n'.join([ + 'from torch import nn', + 'import torch.nn.functional as F', + 'import torch', + ]), + 'style': 'google', + 'options': '+IGNORE_WHITESPACE', + } + xdoctest_verbose = max(1, options.verbose) + run_summary = xdoctest.runner.doctest_module( + os.fspath(pkgpath), config=xdoctest_config, verbose=xdoctest_verbose, + command=options.xdoctest_command, argv=[]) + result = 1 if run_summary.get('n_failed', 0) else 0 + return result + + CUSTOM_HANDLERS = { "test_cuda_primary_ctx": test_cuda_primary_ctx, "test_cuda_trace": get_run_test_with_subprocess_fn(), @@ -583,6 +648,7 @@ CUSTOM_HANDLERS = { "distributed/rpc/test_tensorpipe_agent": get_run_test_with_subprocess_fn(), "distributed/rpc/test_share_memory": get_run_test_with_subprocess_fn(), "distributed/rpc/cuda/test_tensorpipe_agent": get_run_test_with_subprocess_fn(), + "doctests": run_doctests, } @@ -739,6 +805,15 @@ def parse_args(): action="store_true", help="Only list the test that will run.", ) + parser.add_argument( + "--xdoctest-command", + default='list', + help=( + "Control the specific doctest action. " + "Use 'list' to simply parse doctests and check syntax. " + "Use 'all' to execute all doctests or specify a specific " + "doctest to run") + ) return parser.parse_args() diff --git a/torch/_prims/context.py b/torch/_prims/context.py index 75143bceaff3..a404d44b6f2d 100644 --- a/torch/_prims/context.py +++ b/torch/_prims/context.py @@ -77,7 +77,8 @@ class NvfuserPrimsMode(torch.overrides.TorchFunctionMode): Switches the interpretation of torch.ops.prims.* functions to use nvFuser's prims in torch.ops.nvprims.* - >>> with NvfuserPrimMode(): + >>> # xdoctest: +SKIP("undefined vars") + >>> with NvfuserPrimsMode(): ... torch.ops.prims.add(x, y) # calls torch.ops.nvprims.add(x, y) By default, this context manager will fall back on the torch.ops.prims* if the diff --git a/torch/_tensor_str.py b/torch/_tensor_str.py index 493f17637a1b..8ec35ab8f0d3 100644 --- a/torch/_tensor_str.py +++ b/torch/_tensor_str.py @@ -46,12 +46,20 @@ def set_printoptions( Example:: + >>> # Limit the precision of elements >>> torch.set_printoptions(precision=2) >>> torch.tensor([1.12345]) tensor([1.12]) + >>> # Limit the number of elements shown >>> torch.set_printoptions(threshold=5) >>> torch.arange(10) tensor([0, 1, 2, ..., 7, 8, 9]) + >>> # Restore defaults + >>> torch.set_printoptions(profile='default') + >>> torch.tensor([1.12345]) + tensor([1.1235]) + >>> torch.arange(10) + tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) """ if profile is not None: @@ -206,7 +214,7 @@ def _vector_str(self, indent, summarize, formatter1, formatter2=None): elements_per_line = max( 1, int(math.floor((PRINT_OPTS.linewidth - indent) / (element_length))) ) - char_per_line = element_length * elements_per_line + # char_per_line = element_length * elements_per_line # unused def _val_formatter(val, formatter1=formatter1, formatter2=formatter2): if formatter2 is not None: diff --git a/torch/_torch_docs.py b/torch/_torch_docs.py index b699ea67738e..97a6fd29c6bc 100644 --- a/torch/_torch_docs.py +++ b/torch/_torch_docs.py @@ -13523,6 +13523,7 @@ Returns: Example:: + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA) >>> g_cpu = torch.Generator() >>> g_cuda = torch.Generator(device='cuda') """, diff --git a/torch/autograd/graph.py b/torch/autograd/graph.py index 022515bf1e97..05c0d51a61cc 100644 --- a/torch/autograd/graph.py +++ b/torch/autograd/graph.py @@ -1,6 +1,7 @@ import torch from typing import Callable, Any + class saved_tensors_hooks(): """Context-manager that sets a pair of pack / unpack hooks for saved tensors. @@ -93,7 +94,7 @@ class save_on_cpu(saved_tensors_hooks): Example:: - >>> # xdoctest: +REQUIRES(env:CUDAHOME) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA) >>> a = torch.randn(5, requires_grad=True, device="cuda") >>> b = torch.randn(5, requires_grad=True, device="cuda") >>> c = torch.randn(5, requires_grad=True, device="cuda") diff --git a/torch/backends/xeon/run_cpu.py b/torch/backends/xeon/run_cpu.py index c056af964478..69632cb20862 100644 --- a/torch/backends/xeon/run_cpu.py +++ b/torch/backends/xeon/run_cpu.py @@ -60,13 +60,13 @@ Single instance inference :: - >>> python -m torch.backends.xeon.run_cpu --throughput_mode script.py args + python -m torch.backends.xeon.run_cpu --throughput_mode script.py args 2. Run single-instance inference on a single CPU node. :: - >>> python -m torch.backends.xeon.run_cpu --node_id 1 script.py args + python -m torch.backends.xeon.run_cpu --node_id 1 script.py args Multi-instance inference ------------------------ @@ -77,13 +77,13 @@ Multi-instance inference :: - >>> python -m torch.backends.xeon.run_cpu -- python_script args + python -m torch.backends.xeon.run_cpu -- python_script args eg: on an Intel(R) Xeon(R) Scalable Processor with 14 instance, 4 cores per instance :: - >>> python -m torch.backends.xeon.run_cpu --ninstances 14 --ncores_per_instance 4 python_script args + python -m torch.backends.xeon.run_cpu --ninstances 14 --ncores_per_instance 4 python_script args 2. Run single-instance inference among multiple instances. By default, runs all ninstances. If you want to independently run a single instance among ninstances, specify rank. @@ -92,27 +92,27 @@ Multi-instance inference :: - >>> python -m torch.backends.xeon.run_cpu --ninstances 2 --rank 0 python_script args + python -m torch.backends.xeon.run_cpu --ninstances 2 --rank 0 python_script args eg: run 1st instance on an Intel(R) Xeon(R) Scalable Processor with 2 instance (i.e., numactl -C 28-55) :: - >>> python -m torch.backends.xeon.run_cpu --ninstances 2 --rank 1 python_script args + python -m torch.backends.xeon.run_cpu --ninstances 2 --rank 1 python_script args eg: run 0th instance on an Intel(R) Xeon(R) Scalable Processor with 2 instance, 2 cores per instance, first four cores (i.e., numactl -C 0-1) :: - >>> python -m torch.backends.xeon.run_cpu --core_list "0, 1, 2, 3" --ninstances 2 --ncores_per_instance 2 + python -m torch.backends.xeon.run_cpu --core_list "0, 1, 2, 3" --ninstances 2 --ncores_per_instance 2 --rank 0 python_script args 3. To look up what optional arguments this module offers: :: - >>> python -m torch.backends.xeon.run_cpu --help + python -m torch.backends.xeon.run_cpu --help Memory allocator ---------------- diff --git a/torch/csrc/Exceptions.cpp b/torch/csrc/Exceptions.cpp index 6342826f5daf..5210d6f713db 100644 --- a/torch/csrc/Exceptions.cpp +++ b/torch/csrc/Exceptions.cpp @@ -35,7 +35,7 @@ For example, you can the torch.linalg.inv function will raise torch.linalg.LinAl a matrix is not invertible.\n \ \n\ Example:\n \ ->>> # xdoctest: +REQUIRES(--lapac)\n \ +>>> # xdoctest: +REQUIRES(env:TORCH_DOCKTEST_LAPACK)\n \ >>> matrix = torch.eye(3, 3)\n \ >>> matrix[-1, -1] = 0\n \ >>> matrix\n \ diff --git a/torch/distributed/_shard/checkpoint/planner.py b/torch/distributed/_shard/checkpoint/planner.py index d37de1e76383..f3692cc11395 100644 --- a/torch/distributed/_shard/checkpoint/planner.py +++ b/torch/distributed/_shard/checkpoint/planner.py @@ -102,13 +102,15 @@ class SavePlanner(abc.ABC): Rewriting state_dict. This is the simplest way to extend the save process as it doesn't requite understanding the intrincacies of how SavePlan works: - >>> class RenamePlanner(DefaultSavePlanner) + >>> # xdoctest: +SKIP("undefined vars") + >>> class RenamePlanner(DefaultSavePlanner): >>> def init(self, state_dict, is_coordinator): >>> # prefix all keys with `foo_`` - >>> super().init(self, {"foo_" + k, v for k, v in state_dict.items()}, is_coordinator) + >>> super().init(self, {"foo_" + k: v for k, v in state_dict.items()}, is_coordinator) Modifying local plan and lookup in tandem. This is useful when fine control of how data is persisted + >>> # xdoctest: +SKIP("undefined vars") >>> class FP16Planner(DefaultSavePlanner): >>> def create_local_plan(self): >>> plan = super().create_local_plan() @@ -122,6 +124,7 @@ class SavePlanner(abc.ABC): Using the global planning step to make central decisions that can't be made individually by each rank + >>> # xdoctest: +SKIP("undefined vars") >>> from itertools import islice >>> from dataclasses import replace >>> class DDPLoadBalancingPlanner(DefaultSavePlanner): @@ -141,6 +144,7 @@ class SavePlanner(abc.ABC): accomplished by having each rank contribute their data items in the local plan and the global planner aggregate them: + >>> # xdoctest: +SKIP("undefined vars") >>> class SaveExtraDataPlanner(DefaultSavePlanner): >>> def create_local_plan(self) -> SavePlan: >>> plan = super().create_local_plan() @@ -247,18 +251,20 @@ class LoadPlanner: to keep a reference to the original state_dict as load happens in place so we need to be able to perform it in place - >>> class RenamePlanner(DefaultLoadPlanner) + >>> # xdoctest: +SKIP("undefined vars") + >>> class RenamePlanner(DefaultLoadPlanner): >>> def init(self, state_dict, metadata, is_coordinator): >>> self.original_state_dict = state_dict - >>> super().init(self, {"foo_" + k, v for k, v in state_dict.items()}, is_coordinator) + >>> super().init(self, {"foo_" + k: v for k, v in state_dict.items()}, is_coordinator) >>> - >>> def load_bytes(self, read_item, value); + >>> def load_bytes(self, read_item, value): >>> # Remove the "foo_" prefix >>> self.original_state_dict[read_item.dest_index.fqn[4:]] = torch.load(value) Modifying resolve_tensor and commit_tensor to handle load time transformation. + >>> # xdoctest: +SKIP("undefined vars") >>> class MetaModelMaterialize(DefaultSavePlanner): >>> def resolve_tensor(self, read_item): >>> tensor = super().resolve_tensor(read_item) diff --git a/torch/distributed/nn/api/remote_module.py b/torch/distributed/nn/api/remote_module.py index 9a8d01c40d70..72a213b1d0f7 100644 --- a/torch/distributed/nn/api/remote_module.py +++ b/torch/distributed/nn/api/remote_module.py @@ -72,6 +72,7 @@ _REMOTE_MODULE_ATTRIBUTES_IGNORE_FOR_PICKLING = ( "forward", ) + # RPC handler. def _instantiate_template(module_interface_cls, enable_moving_cpu_tensors_to_cuda): instantiator.instantiate_scriptable_remote_module_template( @@ -193,13 +194,13 @@ class _RemoteModule(nn.Module): Example:: Run the following code in two different processes: + >>> # xdoctest: +SKIP("distributed") >>> # On worker 0: >>> import torch >>> import torch.distributed.rpc as rpc >>> from torch import nn, Tensor >>> from torch.distributed.nn.api.remote_module import RemoteModule >>> - >>> # xdoctest: +SKIP >>> rpc.init_rpc("worker0", rank=0, world_size=2) >>> remote_linear_module = RemoteModule( >>> "worker1/cpu", nn.Linear, args=(20, 30), @@ -500,13 +501,13 @@ class _RemoteModule(nn.Module): Example:: Run the following code in two different processes: + >>> # xdoctest: +SKIP("distributed") >>> # On worker 0: >>> import torch >>> import torch.distributed.rpc as rpc >>> from torch import nn, Tensor >>> from torch.distributed.nn.api.remote_module import RemoteModule >>> - >>> # xdoctest: +SKIP >>> rpc.init_rpc("worker0", rank=0, world_size=2) >>> remote_module = RemoteModule( >>> "worker1/cpu", nn.Linear, args=(20, 30), @@ -622,13 +623,13 @@ class RemoteModule(_RemoteModule): Example:: Run the following code in two different processes: + >>> # xdoctest: +SKIP("distributed") >>> # On worker 0: >>> import torch >>> import torch.distributed.rpc as rpc >>> from torch import nn, Tensor >>> from torch.distributed.nn.api.remote_module import RemoteModule >>> - >>> # xdoctest: +SKIP >>> rpc.init_rpc("worker0", rank=0, world_size=2) >>> remote_linear_module = RemoteModule( >>> "worker1/cpu", nn.Linear, args=(20, 30), diff --git a/torch/distributed/optim/optimizer.py b/torch/distributed/optim/optimizer.py index caf5ab293bc4..535104beb9f4 100644 --- a/torch/distributed/optim/optimizer.py +++ b/torch/distributed/optim/optimizer.py @@ -18,6 +18,7 @@ __all__ = ['DistributedOptimizer'] logger = logging.getLogger(__name__) + # XXX: we define a _ScriptModuleOptimizer here to explicitly # compile the FunctionalOptimizer class into TorchScript # This is because ScriptClass instance still lives in @@ -33,6 +34,7 @@ class _ScriptLocalOptimizerInterface(object): def step(self, autograd_ctx_id: int) -> None: pass + class _ScriptLocalOptimizer(nn.Module): # TorchScript does not support multithread concurrent compiling. # request_callback might invoke concurrent compiling, so we @@ -106,6 +108,7 @@ def _new_script_local_optimizer(optim_cls, local_params_rref, *args, **kwargs): return rpc.RRef( script_optim, _ScriptLocalOptimizerInterface) + @jit.script def _script_local_optimizer_step( local_optim_rref: RRef[_ScriptLocalOptimizerInterface], @@ -114,6 +117,7 @@ def _script_local_optimizer_step( local_optim = local_optim_rref.local_value() local_optim.step(autograd_ctx_id) + def _wait_for_all(rpc_futs): # TODO: improve error propagation exception = None @@ -163,12 +167,12 @@ class DistributedOptimizer: kwargs: arguments to pass to the optimizer constructor on each worker. Example:: + >>> # xdoctest: +SKIP("distributed") >>> import torch.distributed.autograd as dist_autograd >>> import torch.distributed.rpc as rpc >>> from torch import optim >>> from torch.distributed.optim import DistributedOptimizer >>> - >>> # xdoctest: +SKIP >>> with dist_autograd.context() as context_id: >>> # Forward pass. >>> rref1 = rpc.remote("worker1", torch.add, args=(torch.ones(2), 3)) diff --git a/torch/distributed/pipeline/sync/pipe.py b/torch/distributed/pipeline/sync/pipe.py index 81d1a7bc7793..96bc51989f62 100644 --- a/torch/distributed/pipeline/sync/pipe.py +++ b/torch/distributed/pipeline/sync/pipe.py @@ -149,14 +149,16 @@ class WithDevice(nn.Module): device(:class:`torch.device`): The device to run the module on. Example:: + >>> # xdoctest: +SKIP("distributed") >>> fc1 = nn.Linear(16, 8).cuda(0) >>> fc2 = nn.Linear(8, 4).cuda(1) >>> dropout = nn.Dropout() >>> + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA1) >>> # Dropout does not have any parameters/buffers, but we want to >>> # run it on cuda:1 to avoid any GPU to CPU transfers. >>> model = nn.Sequential(fc1, fc2, WithDevice(dropout, 'cuda:1')) - >>> # xdoctest: +SKIP + >>> # xdoctest: +SKIP("Needs RPC framework init") >>> model = Pipe(model, chunks=8) """ def __init__(self, module: nn.Module, device: torch.device): @@ -185,6 +187,7 @@ def _assemble_partition(modules: List[nn.Module]): modules_list.append(module) return PipeSequential(*modules_list) + def _split_module(modules: nn.Sequential) -> Tuple[List[nn.Sequential], List[torch.device]]: partitions = [] devices = [] diff --git a/torch/distributed/rpc/api.py b/torch/distributed/rpc/api.py index cc2f235a441e..8416fc7c93a3 100644 --- a/torch/distributed/rpc/api.py +++ b/torch/distributed/rpc/api.py @@ -148,6 +148,7 @@ def _broadcast_to_followers(sequence_id, objects_map): _thread_local_var = threading.local() + @contextlib.contextmanager def _wait_all(): r""" @@ -157,10 +158,10 @@ def _wait_all(): Example:: + >>> # xdoctest: +SKIP("distributed") >>> # On worker 0: >>> import torch >>> import torch.distributed.rpc as rpc - >>> # xdoctest: +SKIP >>> rpc.init_rpc("worker0", rank=0, world_size=2) >>> with rpc._wait_all(): >>> fut_1 = rpc.rpc_async(dst, torch.add, (torch.ones(2, 2), 1)) @@ -176,6 +177,7 @@ def _wait_all(): finally: del _thread_local_var.future_list + @_require_initialized def _all_gather(obj, worker_names=None, timeout=UNSET_RPC_TIMEOUT): r""" @@ -285,6 +287,7 @@ def _barrier(worker_names): f"Failed to complete barrier, got error {ex}" ) + @_require_initialized def _wait_all_workers(timeout=DEFAULT_SHUTDOWN_TIMEOUT): r""" @@ -376,6 +379,7 @@ def shutdown(graceful=True, timeout=DEFAULT_SHUTDOWN_TIMEOUT): else: _finalize_shutdown() + def _finalize_shutdown(): try: # This raises a `TORCH_CHECK()` exception on RRef leak detected. @@ -396,6 +400,7 @@ def _finalize_shutdown(): _cleanup_python_rpc_handler() _reset_current_rpc_agent() + @_require_initialized def get_worker_info(worker_name=None): r""" @@ -453,7 +458,6 @@ def _rref_typeof_on_user(rref, timeout=UNSET_RPC_TIMEOUT, blocking=True): return fut - T = TypeVar("T") GenericWithOneTypeVar = Generic[T] @@ -669,6 +673,7 @@ def remote(to, func, args=None, kwargs=None, timeout=UNSET_RPC_TIMEOUT): return rref + def _invoke_rpc(to, func, rpc_type, args=None, kwargs=None, rpc_timeout=UNSET_RPC_TIMEOUT): if not callable(func): raise TypeError("function should be callable.") @@ -900,6 +905,7 @@ def rpc_async(to, func, args=None, kwargs=None, timeout=UNSET_RPC_TIMEOUT): _thread_local_var.future_list.append(fut) return fut + def _get_should_profile(): # Legacy profiler should be enabled. RPC profiling is not supported with # Kineto profiler. @@ -909,6 +915,7 @@ def _get_should_profile(): torch._C._autograd._profiler_type() == ActiveProfilerType.LEGACY # type: ignore[attr-defined] ) + def _enable_rpc_profiler(should_profile, qualified_name, func, rpc_type, dst_worker_info): ctx_manager = contextlib.suppress() diff --git a/torch/distributed/rpc/options.py b/torch/distributed/rpc/options.py index bb67ac032e6d..a995184bc823 100644 --- a/torch/distributed/rpc/options.py +++ b/torch/distributed/rpc/options.py @@ -113,6 +113,7 @@ class TensorPipeRpcBackendOptions(_TensorPipeRpcBackendOptionsBase): invertible. Example:: + >>> # xdoctest: +SKIP("distributed") >>> # both workers >>> def add(x, y): >>> print(x) # tensor([1., 1.], device='cuda:1') @@ -127,7 +128,6 @@ class TensorPipeRpcBackendOptions(_TensorPipeRpcBackendOptionsBase): >>> options.set_device_map("worker1", {1: 2}) >>> # maps worker0's cuda:1 to worker1's cuda:2 >>> - >>> # xdoctest: +SKIP >>> rpc.init_rpc( >>> "worker0", >>> rank=0, diff --git a/torch/distributions/lowrank_multivariate_normal.py b/torch/distributions/lowrank_multivariate_normal.py index 5c83dcc9e7de..921477ac99a4 100644 --- a/torch/distributions/lowrank_multivariate_normal.py +++ b/torch/distributions/lowrank_multivariate_normal.py @@ -8,6 +8,7 @@ from torch.distributions.utils import _standard_normal, lazy_property __all__ = ['LowRankMultivariateNormal'] + def _batch_capacitance_tril(W, D): r""" Computes Cholesky of :math:`I + W.T @ inv(D) @ W` for a batch of matrices :math:`W` @@ -52,7 +53,8 @@ class LowRankMultivariateNormal(Distribution): covariance_matrix = cov_factor @ cov_factor.T + cov_diag Example: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) + >>> # xdoctest: +IGNORE_WANT("non-determenistic") >>> m = LowRankMultivariateNormal(torch.zeros(2), torch.tensor([[1.], [0.]]), torch.ones(2)) >>> m.sample() # normally distributed with mean=`[0,0]`, cov_factor=`[[1],[0]]`, cov_diag=`[1,1]` tensor([-0.2102, -0.5429]) diff --git a/torch/distributions/multivariate_normal.py b/torch/distributions/multivariate_normal.py index 55a5dd3a228a..e8c15c32d985 100644 --- a/torch/distributions/multivariate_normal.py +++ b/torch/distributions/multivariate_normal.py @@ -7,6 +7,7 @@ from torch.distributions.utils import _standard_normal, lazy_property __all__ = ['MultivariateNormal'] + def _batch_mv(bmat, bvec): r""" Performs a batched matrix-vector product, with compatible but different batch shapes. @@ -91,7 +92,8 @@ class MultivariateNormal(Distribution): Example: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) + >>> # xdoctest: +IGNORE_WANT("non-determenistic") >>> m = MultivariateNormal(torch.zeros(2), torch.eye(2)) >>> m.sample() # normally distributed with mean=`[0,0]` and covariance_matrix=`I` tensor([-0.2102, -0.5429]) diff --git a/torch/functional.py b/torch/functional.py index abe3fef5eade..12c6f1b143e1 100644 --- a/torch/functional.py +++ b/torch/functional.py @@ -136,7 +136,6 @@ def broadcast_shapes(*shapes): return tensors[0].shape - def split( tensor: Tensor, split_size_or_sections: Union[int, List[int]], dim: int = 0 ) -> List[Tensor]: @@ -451,6 +450,7 @@ else: `torch.meshgrid` is commonly used to produce a grid for plotting. + >>> # xdoctest: +REQUIRES(module:matplotlib) >>> import matplotlib.pyplot as plt >>> xs = torch.linspace(-5, 5, steps=100) >>> ys = torch.linspace(-5, 5, steps=100) @@ -458,8 +458,6 @@ else: >>> z = torch.sin(torch.sqrt(x * x + y * y)) >>> ax = plt.axes(projection='3d') >>> ax.plot_surface(x.numpy(), y.numpy(), z.numpy()) - >>> # xdoctest: +SKIP - >>> plt.show() .. image:: ../_static/img/meshgrid.png @@ -736,23 +734,22 @@ def _unique_impl(input: Tensor, sorted: bool = True, >>> output = torch.unique(torch.tensor([1, 3, 2, 3], dtype=torch.long)) >>> output - >>> # xdoctest: +SKIP - tensor([ 2, 3, 1]) + tensor([1, 2, 3]) >>> output, inverse_indices = torch.unique( ... torch.tensor([1, 3, 2, 3], dtype=torch.long), sorted=True, return_inverse=True) >>> output - tensor([ 1, 2, 3]) + tensor([1, 2, 3]) >>> inverse_indices - tensor([ 0, 2, 1, 2]) + tensor([0, 2, 1, 2]) >>> output, inverse_indices = torch.unique( ... torch.tensor([[1, 3], [2, 3]], dtype=torch.long), sorted=True, return_inverse=True) >>> output - tensor([ 1, 2, 3]) + tensor([1, 2, 3]) >>> inverse_indices - tensor([[ 0, 2], - [ 1, 2]]) + tensor([[0, 2], + [1, 2]]) """ if has_torch_function_unary(input): @@ -983,6 +980,7 @@ else: def tensordot(a, b, dims: torch.Tensor, out: Optional[torch.Tensor] = None): # noqa: F811 pass + def tensordot(a, b, dims=2, out: Optional[torch.Tensor] = None): # noqa: F811 r"""Returns a contraction of a and b over multiple dimensions. @@ -1019,9 +1017,9 @@ def tensordot(a, b, dims=2, out: Optional[torch.Tensor] = None): # noqa: F811 [4796., 5162.], [4928., 5306.]]) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA) >>> a = torch.randn(3, 4, 5, device='cuda') >>> b = torch.randn(4, 5, 6, device='cuda') - >>> # xdoctest: +SKIP >>> c = torch.tensordot(a, b, dims=2).cpu() tensor([[ 8.3504, -2.5436, 6.2922, 2.7556, -1.0732, 3.2741], [ 3.3161, 0.0704, 5.0187, -0.4079, -4.3126, 4.8744], @@ -1073,6 +1071,7 @@ def tensordot(a, b, dims=2, out: Optional[torch.Tensor] = None): # noqa: F811 else: return _VF.tensordot(a, b, dims_a, dims_b, out=out) # type: ignore[attr-defined] + def cartesian_prod(*tensors: Tensor) -> Tensor: """Do cartesian product of the given sequence of tensors. The behavior is similar to python's `itertools.product`. @@ -1087,9 +1086,9 @@ def cartesian_prod(*tensors: Tensor) -> Tensor: Example:: + >>> import itertools >>> a = [1, 2, 3] >>> b = [4, 5] - >>> # xdoctest: +SKIP >>> list(itertools.product(a, b)) [(1, 4), (1, 5), (2, 4), (2, 5), (3, 4), (3, 5)] >>> tensor_a = torch.tensor(a) @@ -1107,6 +1106,7 @@ def cartesian_prod(*tensors: Tensor) -> Tensor: return handle_torch_function(cartesian_prod, tensors, *tensors) return _VF.cartesian_prod(tensors) # type: ignore[attr-defined] + def block_diag(*tensors): """Create a block diagonal matrix from provided tensors. @@ -1197,6 +1197,7 @@ def cdist(x1, x2, p=2., compute_mode='use_mm_for_euclid_dist_if_necessary'): else: raise ValueError(f"{compute_mode} is not a valid value for compute_mode") + def atleast_1d(*tensors): r""" Returns a 1-dimensional view of each input tensor with zero dimensions. @@ -1210,12 +1211,11 @@ def atleast_1d(*tensors): Example:: - >>> x = torch.randn(2) + >>> x = torch.arange(2) >>> x - >>> # xdoctest: +SKIP - tensor([1.4584, 0.7583]) + tensor([0, 1]) >>> torch.atleast_1d(x) - tensor([1.4584, 0.7583]) + tensor([0, 1]) >>> x = torch.tensor(1.) >>> x tensor(1.) @@ -1233,6 +1233,7 @@ def atleast_1d(*tensors): tensors = tensors[0] return _VF.atleast_1d(tensors) # type: ignore[attr-defined] + def atleast_2d(*tensors): r""" Returns a 2-dimensional view of each input tensor with zero dimensions. @@ -1251,14 +1252,13 @@ def atleast_2d(*tensors): tensor(1.) >>> torch.atleast_2d(x) tensor([[1.]]) - >>> x = torch.randn(2,2) + >>> x = torch.arange(4).view(2,2) >>> x - >>> # xdoctest: +SKIP - tensor([[2.2086, 2.5165], - [0.1757, 0.5194]]) + tensor([[0, 1], + [2, 3]]) >>> torch.atleast_2d(x) - tensor([[2.2086, 2.5165], - [0.1757, 0.5194]]) + tensor([[0, 1], + [2, 3]]) >>> x = torch.tensor(0.5) >>> y = torch.tensor(1.) >>> torch.atleast_2d((x,y)) @@ -1271,6 +1271,7 @@ def atleast_2d(*tensors): tensors = tensors[0] return _VF.atleast_2d(tensors) # type: ignore[attr-defined] + def atleast_3d(*tensors): r""" Returns a 3-dimensional view of each input tensor with zero dimensions. @@ -1289,22 +1290,21 @@ def atleast_3d(*tensors): tensor(0.5000) >>> torch.atleast_3d(x) tensor([[[0.5000]]]) - >>> y = torch.randn(2,2) + >>> y = torch.arange(4).view(2,2) >>> y - >>> # xdoctest: +SKIP - tensor([[-0.8079, 0.7460], - [-1.1647, 1.4734]]) + tensor([[0, 1], + [2, 3]]) >>> torch.atleast_3d(y) - tensor([[[-0.8079], - [ 0.7460]], + tensor([[[0], + [1]], - [[-1.1647], - [ 1.4734]]]) - >>> x = torch.randn(1,1,1) + [[2], + [3]]]) + >>> x = torch.tensor(1).view(1, 1, 1) >>> x - tensor([[[-1.5689]]]) + tensor([[[1]]]) >>> torch.atleast_3d(x) - tensor([[[-1.5689]]]) + tensor([[[1]]]) >>> x = torch.tensor(0.5) >>> y = torch.tensor(1.) >>> torch.atleast_3d((x,y)) @@ -1426,7 +1426,6 @@ def norm(input, p="fro", dim=None, keepdim=False, out=None, dtype=None): # noqa >>> a = torch.arange(9, dtype= torch.float) - 4 >>> b = a.reshape((3, 3)) >>> torch.norm(a) - >>> # xdoctest: +SKIP tensor(7.7460) >>> torch.norm(b) tensor(7.7460) @@ -1514,6 +1513,7 @@ def norm(input, p="fro", dim=None, keepdim=False, out=None, dtype=None): # noqa else: return _VF.norm(input, p, _dim, keepdim=keepdim, dtype=dtype, out=out) # type: ignore[attr-defined] + def chain_matmul(*matrices, out=None): r"""Returns the matrix product of the :math:`N` 2-D tensors. This product is efficiently computed using the matrix chain order algorithm which selects the order in which incurs the lowest cost in terms @@ -1537,12 +1537,13 @@ def chain_matmul(*matrices, out=None): Example:: + >>> # xdoctest: +IGNORE_WANT("non-deterministic") >>> a = torch.randn(3, 4) >>> b = torch.randn(4, 5) >>> c = torch.randn(5, 6) >>> d = torch.randn(6, 7) + >>> # will raise a deprecation warning >>> torch.chain_matmul(a, b, c, d) - >>> # xdoctest: +SKIP tensor([[ -2.3375, -3.9790, -4.1119, -6.6577, 9.5609, -11.5095, -3.2614], [ 21.4038, 3.3378, -8.4982, -5.2457, -10.2561, -2.4684, 2.7163], [ -0.9647, -5.8917, -2.3213, -5.2284, 12.8615, -12.2816, -2.5095]]) @@ -1635,7 +1636,8 @@ def _lu_impl(A, pivot=True, get_infos=False, out=None): Example:: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) + >>> # xdoctest: +IGNORE_WANT("non-determenistic") >>> A = torch.randn(2, 3, 3) >>> A_LU, pivots = torch.lu(A) >>> A_LU @@ -1662,6 +1664,7 @@ if TYPE_CHECKING: else: _ListOrSeq = List[Tensor] + def _check_list_size(out_len: int, get_infos: bool, out: _ListOrSeq) -> None: get_infos_int = 1 if get_infos else 0 if out_len - get_infos_int != 2: @@ -1669,6 +1672,7 @@ def _check_list_size(out_len: int, get_infos: bool, out: _ListOrSeq) -> None: if not isinstance(out, (tuple, list)): raise TypeError(f"argument 'out' must be tuple of Tensors, not {type(out).__name__}") + def _lu_with_infos(A, pivot=True, get_infos=False, out=None): # type: (Tensor, bool, bool, Optional[Tuple[Tensor, Tensor, Tensor]]) -> Tuple[Tensor, Tensor, Tensor] if has_torch_function_unary(A): @@ -1683,6 +1687,7 @@ def _lu_with_infos(A, pivot=True, get_infos=False, out=None): else: return result # A_LU, pivots, infos + def _lu_no_infos(A, pivot=True, get_infos=False, out=None): # type: (Tensor, bool, bool, Optional[Tuple[Tensor, Tensor]]) -> Tuple[Tensor, Tensor] # need to check for torch_function here so that we exit if @@ -1710,5 +1715,6 @@ lu = boolean_dispatch( func_name='lu') lu.__doc__ = _lu_impl.__doc__ + def align_tensors(*tensors): raise RuntimeError('`align_tensors` not yet implemented.') diff --git a/torch/futures/__init__.py b/torch/futures/__init__.py index 1795983b3f30..f2ba35f1e80b 100644 --- a/torch/futures/__init__.py +++ b/torch/futures/__init__.py @@ -9,9 +9,11 @@ __all__ = ['Future', 'collect_all', 'wait_all'] T = TypeVar("T") S = TypeVar("S") + class _PyFutureMeta(type(torch._C.Future), type(Generic)): # type: ignore[misc, no-redef] pass + class Future(torch._C.Future, Generic[T], metaclass=_PyFutureMeta): r""" Wrapper around a ``torch._C.Future`` which encapsulates an asynchronous diff --git a/torch/hub.py b/torch/hub.py index cc27b15930bb..66532d928dd1 100644 --- a/torch/hub.py +++ b/torch/hub.py @@ -170,7 +170,6 @@ def _validate_not_a_forked_repo(repo_owner, repo_name, ref): 'If it\'s a commit from a forked repo, please call hub.load() with forked repo directly.') - def _get_cache_or_reload(github, force_reload, trust_repo, calling_fn, verbose=True, skip_validation=False): # Setup hub_dir to save downloaded files hub_dir = get_dir() @@ -240,6 +239,7 @@ def _get_cache_or_reload(github, force_reload, trust_repo, calling_fn, verbose=T return repo_dir + def _check_repo_is_trusted(repo_owner, repo_name, owner_name_branch, trust_repo, calling_fn="load"): hub_dir = get_dir() filepath = os.path.join(hub_dir, "trusted_list") @@ -522,11 +522,11 @@ def load(repo_or_dir, model, *args, source='github', trust_repo=None, force_relo Example: >>> # from a github repo >>> repo = 'pytorch/vision' - >>> model = torch.hub.load(repo, 'resnet50', pretrained=True) + >>> model = torch.hub.load(repo, 'resnet50', weights='ResNet50_Weights.IMAGENET1K_V1') >>> # from a local directory >>> path = '/some/local/path/pytorch/vision' >>> # xdoctest: +SKIP - >>> model = torch.hub.load(path, 'resnet50', pretrained=True) + >>> model = torch.hub.load(path, 'resnet50', weights='ResNet50_Weights.DEFAULT') """ source = source.lower() @@ -558,9 +558,9 @@ def _load_local(hubconf_dir, model, *args, **kwargs): a single model with corresponding pretrained weights. Example: + >>> # xdoctest: +SKIP("stub local path") >>> path = '/some/local/path/pytorch/vision' - >>> # xdoctest: +SKIP - >>> model = _load_local(path, 'resnet50', pretrained=True) + >>> model = _load_local(path, 'resnet50', weights='ResNet50_Weights.IMAGENET1K_V1') """ sys.path.insert(0, hubconf_dir) @@ -587,6 +587,7 @@ def download_url_to_file(url, dst, hash_prefix=None, progress=True): Default: True Example: + >>> # xdoctest: +REQUIRES(POSIX) >>> torch.hub.download_url_to_file('https://s3.amazonaws.com/pytorch/models/resnet18-5c106cde.pth', '/tmp/temporary_file') """ diff --git a/torch/masked/maskedtensor/core.py b/torch/masked/maskedtensor/core.py index 851587c5f919..277aa396f625 100644 --- a/torch/masked/maskedtensor/core.py +++ b/torch/masked/maskedtensor/core.py @@ -21,6 +21,8 @@ def is_masked_tensor(a): Examples: + >>> # xdoctest: +SKIP + >>> from torch.masked.maskedtensor.creation import masked_tensor >>> data = torch.arange(6).reshape(2,3) >>> mask = torch.tensor([[True, False, False], [True, True, False]]) >>> mt = masked_tensor(data, mask) diff --git a/torch/masked/maskedtensor/creation.py b/torch/masked/maskedtensor/creation.py index 7919e2df863e..15acc20efcd9 100644 --- a/torch/masked/maskedtensor/creation.py +++ b/torch/masked/maskedtensor/creation.py @@ -19,6 +19,7 @@ def masked_tensor(data, mask, requires_grad=False): Examples:: + >>> # xdoctest: +SKIP >>> data = torch.arange(6).reshape(2,3) >>> mask = torch.tensor([[True, False, False], [True, True, False]]) >>> mt = masked_tensor(data, mask) diff --git a/torch/monitor/__init__.py b/torch/monitor/__init__.py index 723936c8382a..b8589bb00087 100644 --- a/torch/monitor/__init__.py +++ b/torch/monitor/__init__.py @@ -16,6 +16,7 @@ class TensorboardEventHandler: This currently only supports ``torch.monitor.Stat`` events which are logged as scalars. + >>> # xdoctest: +REQUIRES(module:tensorboard) >>> from torch.utils.tensorboard import SummaryWriter >>> from torch.monitor import TensorboardEventHandler, register_event_handler >>> writer = SummaryWriter("log_dir") diff --git a/torch/nn/functional.py b/torch/nn/functional.py index 640428de0bb6..496d2b1b5ee6 100644 --- a/torch/nn/functional.py +++ b/torch/nn/functional.py @@ -2677,7 +2677,7 @@ def nll_loss( >>> input = torch.randn(3, 5, requires_grad=True) >>> # each element in target has to have 0 <= value < C >>> target = torch.tensor([1, 0, 4]) - >>> output = F.nll_loss(F.log_softmax(input), target) + >>> output = F.nll_loss(F.log_softmax(input, dim=1), target) >>> output.backward() """ if has_torch_function_variadic(input, target, weight): diff --git a/torch/nn/init.py b/torch/nn/init.py index 6ea582d6189b..b70e7f5e390c 100644 --- a/torch/nn/init.py +++ b/torch/nn/init.py @@ -463,7 +463,7 @@ def orthogonal_(tensor, gain=1): gain: optional scaling factor Examples: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) >>> w = torch.empty(3, 5) >>> nn.init.orthogonal_(w) """ diff --git a/torch/nn/modules/activation.py b/torch/nn/modules/activation.py index 88c3322879b7..c816437abdbf 100644 --- a/torch/nn/modules/activation.py +++ b/torch/nn/modules/activation.py @@ -1323,7 +1323,7 @@ class Softmin(Module): Examples:: - >>> m = nn.Softmin() + >>> m = nn.Softmin(dim=1) >>> input = torch.randn(2, 3) >>> output = m(input) """ @@ -1450,7 +1450,7 @@ class LogSoftmax(Module): Examples:: - >>> m = nn.LogSoftmax() + >>> m = nn.LogSoftmax(dim=1) >>> input = torch.randn(2, 3) >>> output = m(input) """ diff --git a/torch/nn/modules/batchnorm.py b/torch/nn/modules/batchnorm.py index 094e91b2e695..382accfef560 100644 --- a/torch/nn/modules/batchnorm.py +++ b/torch/nn/modules/batchnorm.py @@ -13,6 +13,7 @@ from .module import Module __all__ = ['BatchNorm1d', 'LazyBatchNorm1d', 'BatchNorm2d', 'LazyBatchNorm2d', 'BatchNorm3d', 'LazyBatchNorm3d', 'SyncBatchNorm'] + class _NormBase(Module): """Common base of _InstanceNorm and _BatchNorm""" @@ -779,6 +780,7 @@ class SyncBatchNorm(_BatchNorm): Example:: >>> # Network with nn.BatchNorm layer + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA) >>> module = torch.nn.Sequential( >>> torch.nn.Linear(20, 100), >>> torch.nn.BatchNorm1d(100), @@ -790,7 +792,7 @@ class SyncBatchNorm(_BatchNorm): >>> # Note: every rank calls into new_group for every >>> # process group created, even if that rank is not >>> # part of the group. - >>> # xdoctest: +SKIP + >>> # xdoctest: +SKIP("distributed") >>> process_groups = [torch.distributed.new_group(pids) for pids in [r1, r2]] >>> process_group = process_groups[0 if dist.get_rank() <= 3 else 1] >>> sync_bn_module = torch.nn.SyncBatchNorm.convert_sync_batchnorm(module, process_group) diff --git a/torch/nn/modules/loss.py b/torch/nn/modules/loss.py index 1d466fbf2c62..85de8c549edb 100644 --- a/torch/nn/modules/loss.py +++ b/torch/nn/modules/loss.py @@ -449,15 +449,16 @@ class KLDivLoss(_Loss): Examples:: + >>> import torch.nn.functional as F >>> kl_loss = nn.KLDivLoss(reduction="batchmean") >>> # input should be a distribution in the log space - >>> input = F.log_softmax(torch.randn(3, 5, requires_grad=True)) + >>> input = F.log_softmax(torch.randn(3, 5, requires_grad=True), dim=1) >>> # Sample a batch of distributions. Usually this would come from the dataset - >>> target = F.softmax(torch.rand(3, 5)) + >>> target = F.softmax(torch.rand(3, 5), dim=1) >>> output = kl_loss(input, target) >>> kl_loss = nn.KLDivLoss(reduction="batchmean", log_target=True) - >>> log_target = F.log_softmax(torch.rand(3, 5)) + >>> log_target = F.log_softmax(torch.rand(3, 5), dim=1) >>> output = kl_loss(input, log_target) """ __constants__ = ['reduction'] diff --git a/torch/nn/modules/module.py b/torch/nn/modules/module.py index 131b45f73938..50f82d50cf82 100644 --- a/torch/nn/modules/module.py +++ b/torch/nn/modules/module.py @@ -21,6 +21,7 @@ _grad_t = Union[Tuple[Tensor, ...], Tensor] # the type of the subclass, not the looser type of `Module`. T = TypeVar('T', bound='Module') + class _IncompatibleKeys(namedtuple('IncompatibleKeys', ['missing_keys', 'unexpected_keys'])): def __repr__(self): if not self.missing_keys and not self.unexpected_keys: @@ -41,6 +42,7 @@ def _addindent(s_, numSpaces): s = first + '\n' + s return s + class _WrappedHook: def __init__(self, hook: Callable, module: Optional["Module"] = None): self.hook: Callable = hook @@ -151,6 +153,7 @@ def register_module_forward_hook(hook: Callable[..., None]) -> RemovableHandle: _global_forward_hooks[handle.id] = hook return handle + def register_module_backward_hook( hook: Callable[['Module', _grad_t, _grad_t], Union[None, Tensor]] ) -> RemovableHandle: @@ -177,6 +180,7 @@ def register_module_backward_hook( _global_backward_hooks[handle.id] = hook return handle + def register_module_full_backward_hook( hook: Callable[['Module', _grad_t, _grad_t], Union[None, Tensor]] ) -> RemovableHandle: @@ -933,6 +937,7 @@ class Module: Parameter containing: tensor([[ 0.1913, -0.3420], [-0.5113, -0.2325]], dtype=torch.float64) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_CUDA1) >>> gpu1 = torch.device("cuda:1") >>> linear.to(gpu1, dtype=torch.half, non_blocking=True) Linear(in_features=2, out_features=2, bias=True) diff --git a/torch/nn/modules/pooling.py b/torch/nn/modules/pooling.py index eb45e20db56d..93a43e10c962 100644 --- a/torch/nn/modules/pooling.py +++ b/torch/nn/modules/pooling.py @@ -525,7 +525,7 @@ class AvgPool1d(_AvgPoolNd): >>> # pool with window of size=3, stride=2 >>> m = nn.AvgPool1d(3, stride=2) >>> m(torch.tensor([[[1.,2,3,4,5,6,7]]])) - tensor([[[ 2., 4., 6.]]]) + tensor([[[2., 4., 6.]]]) """ kernel_size: _size_1_t diff --git a/torch/nn/modules/upsampling.py b/torch/nn/modules/upsampling.py index 56711f295414..4f13c84c2e90 100644 --- a/torch/nn/modules/upsampling.py +++ b/torch/nn/modules/upsampling.py @@ -7,6 +7,7 @@ from ..common_types import _size_2_t, _ratio_2_t, _size_any_t, _ratio_any_t __all__ = ['Upsample', 'UpsamplingNearest2d', 'UpsamplingBilinear2d'] + class Upsample(Module): r"""Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D (volumetric) data. @@ -73,62 +74,61 @@ class Upsample(Module): >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2) >>> input - tensor([[[[ 1., 2.], - [ 3., 4.]]]]) + tensor([[[[1., 2.], + [3., 4.]]]]) >>> m = nn.Upsample(scale_factor=2, mode='nearest') >>> m(input) - tensor([[[[ 1., 1., 2., 2.], - [ 1., 1., 2., 2.], - [ 3., 3., 4., 4.], - [ 3., 3., 4., 4.]]]]) + tensor([[[[1., 1., 2., 2.], + [1., 1., 2., 2.], + [3., 3., 4., 4.], + [3., 3., 4., 4.]]]]) >>> # xdoctest: +IGNORE_WANT("other tests seem to modify printing styles") >>> m = nn.Upsample(scale_factor=2, mode='bilinear') # align_corners=False >>> m(input) - tensor([[[[ 1.0000, 1.2500, 1.7500, 2.0000], - [ 1.5000, 1.7500, 2.2500, 2.5000], - [ 2.5000, 2.7500, 3.2500, 3.5000], - [ 3.0000, 3.2500, 3.7500, 4.0000]]]]) + tensor([[[[1.0000, 1.2500, 1.7500, 2.0000], + [1.5000, 1.7500, 2.2500, 2.5000], + [2.5000, 2.7500, 3.2500, 3.5000], + [3.0000, 3.2500, 3.7500, 4.0000]]]]) >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) >>> m(input) - tensor([[[[ 1.0000, 1.3333, 1.6667, 2.0000], - [ 1.6667, 2.0000, 2.3333, 2.6667], - [ 2.3333, 2.6667, 3.0000, 3.3333], - [ 3.0000, 3.3333, 3.6667, 4.0000]]]]) + tensor([[[[1.0000, 1.3333, 1.6667, 2.0000], + [1.6667, 2.0000, 2.3333, 2.6667], + [2.3333, 2.6667, 3.0000, 3.3333], + [3.0000, 3.3333, 3.6667, 4.0000]]]]) >>> # Try scaling the same data in a larger tensor - >>> >>> input_3x3 = torch.zeros(3, 3).view(1, 1, 3, 3) >>> input_3x3[:, :, :2, :2].copy_(input) - tensor([[[[ 1., 2.], - [ 3., 4.]]]]) + tensor([[[[1., 2.], + [3., 4.]]]]) >>> input_3x3 - tensor([[[[ 1., 2., 0.], - [ 3., 4., 0.], - [ 0., 0., 0.]]]]) + tensor([[[[1., 2., 0.], + [3., 4., 0.], + [0., 0., 0.]]]]) >>> # xdoctest: +IGNORE_WANT("seems to fail when other tests are run in the same session") >>> m = nn.Upsample(scale_factor=2, mode='bilinear') # align_corners=False >>> # Notice that values in top left corner are the same with the small input (except at boundary) >>> m(input_3x3) - tensor([[[[ 1.0000, 1.2500, 1.7500, 1.5000, 0.5000, 0.0000], - [ 1.5000, 1.7500, 2.2500, 1.8750, 0.6250, 0.0000], - [ 2.5000, 2.7500, 3.2500, 2.6250, 0.8750, 0.0000], - [ 2.2500, 2.4375, 2.8125, 2.2500, 0.7500, 0.0000], - [ 0.7500, 0.8125, 0.9375, 0.7500, 0.2500, 0.0000], - [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]]) + tensor([[[[1.0000, 1.2500, 1.7500, 1.5000, 0.5000, 0.0000], + [1.5000, 1.7500, 2.2500, 1.8750, 0.6250, 0.0000], + [2.5000, 2.7500, 3.2500, 2.6250, 0.8750, 0.0000], + [2.2500, 2.4375, 2.8125, 2.2500, 0.7500, 0.0000], + [0.7500, 0.8125, 0.9375, 0.7500, 0.2500, 0.0000], + [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]]) >>> m = nn.Upsample(scale_factor=2, mode='bilinear', align_corners=True) >>> # Notice that values in top left corner are now changed >>> m(input_3x3) - tensor([[[[ 1.0000, 1.4000, 1.8000, 1.6000, 0.8000, 0.0000], - [ 1.8000, 2.2000, 2.6000, 2.2400, 1.1200, 0.0000], - [ 2.6000, 3.0000, 3.4000, 2.8800, 1.4400, 0.0000], - [ 2.4000, 2.7200, 3.0400, 2.5600, 1.2800, 0.0000], - [ 1.2000, 1.3600, 1.5200, 1.2800, 0.6400, 0.0000], - [ 0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]]) + tensor([[[[1.0000, 1.4000, 1.8000, 1.6000, 0.8000, 0.0000], + [1.8000, 2.2000, 2.6000, 2.2400, 1.1200, 0.0000], + [2.6000, 3.0000, 3.4000, 2.8800, 1.4400, 0.0000], + [2.4000, 2.7200, 3.0400, 2.5600, 1.2800, 0.0000], + [1.2000, 1.3600, 1.5200, 1.2800, 0.6400, 0.0000], + [0.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.0000]]]]) """ __constants__ = ['size', 'scale_factor', 'mode', 'align_corners', 'name', 'recompute_scale_factor'] name: str @@ -196,15 +196,15 @@ class UpsamplingNearest2d(Upsample): >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2) >>> input - tensor([[[[ 1., 2.], - [ 3., 4.]]]]) + tensor([[[[1., 2.], + [3., 4.]]]]) >>> m = nn.UpsamplingNearest2d(scale_factor=2) >>> m(input) - tensor([[[[ 1., 1., 2., 2.], - [ 1., 1., 2., 2.], - [ 3., 3., 4., 4.], - [ 3., 3., 4., 4.]]]]) + tensor([[[[1., 1., 2., 2.], + [1., 1., 2., 2.], + [3., 3., 4., 4.], + [3., 3., 4., 4.]]]]) """ def __init__(self, size: Optional[_size_2_t] = None, scale_factor: Optional[_ratio_2_t] = None) -> None: super(UpsamplingNearest2d, self).__init__(size, scale_factor, mode='nearest') @@ -242,16 +242,16 @@ class UpsamplingBilinear2d(Upsample): >>> input = torch.arange(1, 5, dtype=torch.float32).view(1, 1, 2, 2) >>> input - tensor([[[[ 1., 2.], - [ 3., 4.]]]]) + tensor([[[[1., 2.], + [3., 4.]]]]) >>> # xdoctest: +IGNORE_WANT("do other tests modify the global state?") >>> m = nn.UpsamplingBilinear2d(scale_factor=2) >>> m(input) - tensor([[[[ 1.0000, 1.3333, 1.6667, 2.0000], - [ 1.6667, 2.0000, 2.3333, 2.6667], - [ 2.3333, 2.6667, 3.0000, 3.3333], - [ 3.0000, 3.3333, 3.6667, 4.0000]]]]) + tensor([[[[1.0000, 1.3333, 1.6667, 2.0000], + [1.6667, 2.0000, 2.3333, 2.6667], + [2.3333, 2.6667, 3.0000, 3.3333], + [3.0000, 3.3333, 3.6667, 4.0000]]]]) """ def __init__(self, size: Optional[_size_2_t] = None, scale_factor: Optional[_ratio_2_t] = None) -> None: super(UpsamplingBilinear2d, self).__init__(size, scale_factor, mode='bilinear', align_corners=True) diff --git a/torch/nn/quantized/modules/conv.py b/torch/nn/quantized/modules/conv.py index 7c726f7b114f..46acab481140 100644 --- a/torch/nn/quantized/modules/conv.py +++ b/torch/nn/quantized/modules/conv.py @@ -31,6 +31,7 @@ def _reverse_repeat_padding(padding: List[int]) -> List[int]: _reversed_padding_repeated_twice.append(padding[N - idx - 1]) return _reversed_padding_repeated_twice + class _ConvNd(WeightedQuantizedModule): def __init__(self, in_channels, out_channels, kernel_size, stride=1, padding=0, dilation=1, groups=1, bias=True, @@ -266,6 +267,7 @@ class _ConvNd(WeightedQuantizedModule): qconv.zero_point = int(output_zero_point) return qconv + class Conv1d(_ConvNd): r"""Applies a 1D convolution over a quantized input signal composed of several quantized input planes. @@ -295,7 +297,7 @@ class Conv1d(_ConvNd): >>> # quantize input to quint8 >>> # xdoctest: +SKIP >>> q_input = torch.quantize_per_tensor(input, scale=1.0, zero_point=0, - dtype=torch.quint8) + ... dtype=torch.quint8) >>> output = m(q_input) """ @@ -572,6 +574,7 @@ class Conv3d(_ConvNd): # === Transposed Convolutions === MOD = TypeVar('MOD', bound=nn.modules.conv._ConvNd) + class _ConvTransposeNd(_ConvNd): _FLOAT_MODULE = MOD @@ -655,6 +658,7 @@ class _ConvTransposeNd(_ConvNd): qconv.zero_point = int(output_zero_point) return qconv + class ConvTranspose1d(_ConvTransposeNd): r"""Applies a 1D transposed convolution operator over an input image composed of several input planes. @@ -675,9 +679,10 @@ class ConvTranspose1d(_ConvTransposeNd): Examples:: + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE) >>> torch.backends.quantized.engine = 'qnnpack' + >>> from torch.nn import quantized as nnq >>> # With square kernels and equal stride - >>> # xdoctest: +SKIP >>> m = nnq.ConvTranspose1d(16, 33, 3, stride=2) >>> # non-square kernels and unequal stride and with padding >>> m = nnq.ConvTranspose1d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) @@ -692,6 +697,7 @@ class ConvTranspose1d(_ConvTransposeNd): >>> h = downsample(q_input) >>> h.size() torch.Size([1, 16, 6]) + >>> # xdoctest: +SKIP("FIXME: output_size is not a parameter) >>> output = upsample(h, output_size=input.size()) >>> output.size() torch.Size([1, 16, 12]) @@ -763,10 +769,11 @@ class ConvTranspose2d(_ConvTransposeNd): Examples:: + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE) >>> # QNNPACK or FBGEMM as backend >>> torch.backends.quantized.engine = 'qnnpack' >>> # With square kernels and equal stride - >>> # xdoctest: +SKIP + >>> import torch.nn.quantized as nnq >>> m = nnq.ConvTranspose2d(16, 33, 3, stride=2) >>> # non-square kernels and unequal stride and with padding >>> m = nnq.ConvTranspose2d(16, 33, (3, 5), stride=(2, 1), padding=(4, 2)) @@ -781,6 +788,7 @@ class ConvTranspose2d(_ConvTransposeNd): >>> h = downsample(q_input) >>> h.size() torch.Size([1, 16, 6, 6]) + >>> # xdoctest: +SKIP("FIXME: output_size is not a parameter) >>> output = upsample(h, output_size=input.size()) >>> output.size() torch.Size([1, 16, 12, 12]) @@ -834,6 +842,7 @@ class ConvTranspose2d(_ConvTransposeNd): def from_reference(cls, ref_qconvt, output_scale, output_zero_point): return _ConvTransposeNd.from_reference(cls, ref_qconvt, output_scale, output_zero_point) + class ConvTranspose3d(_ConvTransposeNd): r"""Applies a 3D transposed convolution operator over an input image composed of several input planes. @@ -854,9 +863,10 @@ class ConvTranspose3d(_ConvTransposeNd): Examples:: + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_QENGINE) >>> torch.backends.quantized.engine = 'fbgemm' + >>> from torch.nn import quantized as nnq >>> # With cubic kernels and equal stride - >>> # xdoctest: +SKIP >>> m = nnq.ConvTranspose3d(16, 33, 3, stride=2) >>> # non-cubic kernels and unequal stride and with padding >>> m = nnq.ConvTranspose3d(16, 33, (3, 3, 5), stride=(2, 1, 1), padding=(4, 2, 2)) @@ -871,6 +881,7 @@ class ConvTranspose3d(_ConvTransposeNd): >>> h = downsample(q_input) >>> h.size() torch.Size([1, 16, 6, 6, 6]) + >>> # xdoctest: +SKIP("FIXME: output_size is not a parameter) >>> output = upsample(h, output_size=input.size()) >>> output.size() torch.Size([1, 16, 12, 12, 12]) diff --git a/torch/nn/utils/parametrizations.py b/torch/nn/utils/parametrizations.py index 3dd5192c1062..7b097f667671 100644 --- a/torch/nn/utils/parametrizations.py +++ b/torch/nn/utils/parametrizations.py @@ -10,6 +10,7 @@ from typing import Optional __all__ = ['orthogonal', 'spectral_norm'] + def _is_orthogonal(Q, eps=None): n, k = Q.size(-2), Q.size(-1) Id = torch.eye(k, dtype=Q.dtype, device=Q.device) @@ -242,7 +243,7 @@ def orthogonal(module: Module, Example:: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) >>> orth_linear = orthogonal(nn.Linear(20, 40)) >>> orth_linear ParametrizedLinear( @@ -459,19 +460,20 @@ def spectral_norm(module: Module, Example:: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) + >>> # xdoctest: +IGNORE_WANT("non-determenistic") >>> snm = spectral_norm(nn.Linear(20, 40)) >>> snm ParametrizedLinear( - in_features=20, out_features=40, bias=True - (parametrizations): ModuleDict( + in_features=20, out_features=40, bias=True + (parametrizations): ModuleDict( (weight): ParametrizationList( - (0): _SpectralNorm() + (0): _SpectralNorm() ) - ) + ) ) >>> torch.linalg.matrix_norm(snm.weight, 2) - tensor(1.0000, grad_fn=) + tensor(1.0081, grad_fn=) """ weight = getattr(module, name, None) if not isinstance(weight, Tensor): diff --git a/torch/nn/utils/parametrize.py b/torch/nn/utils/parametrize.py index 32d71b42f9ca..b8f8d439c1b7 100644 --- a/torch/nn/utils/parametrize.py +++ b/torch/nn/utils/parametrize.py @@ -460,7 +460,7 @@ def register_parametrization( ValueError: if the module does not have a parameter or a buffer named :attr:`tensor_name` Examples: - >>> # xdoctest: +REQUIRES(--lapack) + >>> # xdoctest: +REQUIRES(env:TORCH_DOCTEST_LAPACK) >>> import torch >>> import torch.nn as nn >>> import torch.nn.utils.parametrize as P diff --git a/torch/onnx/_type_utils.py b/torch/onnx/_type_utils.py index e565c6e9d664..70b1b3f868ec 100644 --- a/torch/onnx/_type_utils.py +++ b/torch/onnx/_type_utils.py @@ -55,6 +55,7 @@ class JitScalarType(enum.IntEnum): Use ``JitScalarType`` to convert from torch and JIT scalar types to ONNX scalar types. Examples:: + >>> # xdoctest: +IGNORE_WANT("win32 has different output") >>> JitScalarType.from_name("Float").onnx_type() TensorProtoDataType.FLOAT """ diff --git a/torch/optim/lr_scheduler.py b/torch/optim/lr_scheduler.py index 2431d889d1a8..c3c40d1ef0f0 100644 --- a/torch/optim/lr_scheduler.py +++ b/torch/optim/lr_scheduler.py @@ -725,6 +725,7 @@ class PolynomialLR(_LRScheduler): >>> # lr = 0.00050 if epoch == 2 >>> # lr = 0.00025 if epoch == 3 >>> # lr = 0.0 if epoch >= 4 + >>> # xdoctest: +SKIP("undefined vars") >>> scheduler = PolynomialLR(self.opt, total_iters=4, power=1.0) >>> for epoch in range(100): >>> train(...) diff --git a/torch/utils/data/dataset.py b/torch/utils/data/dataset.py index 0b59c43736d3..4cf957034cbd 100644 --- a/torch/utils/data/dataset.py +++ b/torch/utils/data/dataset.py @@ -108,6 +108,7 @@ class IterableDataset(Dataset[T_co]): >>> print(list(torch.utils.data.DataLoader(ds, num_workers=0))) [tensor([3]), tensor([4]), tensor([5]), tensor([6])] + >>> # xdoctest: +REQUIRES(POSIX) >>> # Mult-process loading with two worker processes >>> # Worker 0 fetched [3, 4]. Worker 1 fetched [5, 6]. >>> # xdoctest: +IGNORE_WANT("non deterministic") @@ -116,7 +117,7 @@ class IterableDataset(Dataset[T_co]): >>> # With even more workers >>> # xdoctest: +IGNORE_WANT("non deterministic") - >>> print(list(torch.utils.data.DataLoader(ds, num_workers=20))) + >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12))) [tensor([3]), tensor([5]), tensor([4]), tensor([6])] Example 2: splitting workload across all workers using :attr:`worker_init_fn`:: @@ -161,7 +162,7 @@ class IterableDataset(Dataset[T_co]): [3, 5, 4, 6] >>> # With even more workers - >>> print(list(torch.utils.data.DataLoader(ds, num_workers=20, worker_init_fn=worker_init_fn))) + >>> print(list(torch.utils.data.DataLoader(ds, num_workers=12, worker_init_fn=worker_init_fn))) [3, 4, 5, 6] """ def __iter__(self) -> Iterator[T_co]: diff --git a/torch/utils/dlpack.py b/torch/utils/dlpack.py index 2fe5a5c1d2c0..ae0aafceb178 100644 --- a/torch/utils/dlpack.py +++ b/torch/utils/dlpack.py @@ -42,6 +42,7 @@ Args: The DLPack capsule shares the tensor's memory. """) + # TODO: add a typing.Protocol to be able to tell Mypy that only objects with # __dlpack__ and __dlpack_device__ methods are accepted. def from_dlpack(ext_tensor: Any) -> torch.Tensor: diff --git a/torch/utils/throughput_benchmark.py b/torch/utils/throughput_benchmark.py index 7068f74d0906..1dae4b937783 100644 --- a/torch/utils/throughput_benchmark.py +++ b/torch/utils/throughput_benchmark.py @@ -1,6 +1,7 @@ import torch._C + def format_time(time_us=None, time_ms=None, time_s=None): '''Defines how to format time''' assert sum([time_us is not None, time_ms is not None, time_s is not None]) == 1 @@ -48,7 +49,6 @@ class ExecutionStats(object): return self.num_iters * ( self.latency_avg_ms / 1000.0) / self.benchmark_config.num_calling_threads - def __str__(self): return '\n'.join([ "Average latency per example: " + format_time(time_ms=self.latency_avg_ms),