Doc test non packages (#110568)

Add non-package python modules to the public API checks.
The original change is to remove the `ispkg` check in this line
https://github.com/pytorch/pytorch/blob/main/docs/source/conf.py#L518

Everything else is to add the appropriate modules to the rst files, make sure every module we provide can be imported (fixed by either making optional dependencies optional or just deleting files that have been un-importable for 3 years), make API that are both modules and functions (like torch.autograd.gradcheck) properly rendered on the docs website without confusion and add every non-documented API to the allow list (~3k of them).

Next steps will be to try and fix these missing docs
Pull Request resolved: https://github.com/pytorch/pytorch/pull/110568
Approved by: https://github.com/zou3519
This commit is contained in:
albanD
2023-10-06 14:16:01 +00:00
committed by PyTorch MergeBot
parent a3e5ec453a
commit c4db607607
26 changed files with 3820 additions and 662 deletions

View File

@ -384,3 +384,12 @@ Some ops not listed here (e.g., binary ops like ``add``) natively promote
inputs without autocasting's intervention. If inputs are a mixture of ``bfloat16``
and ``float32``, these ops run in ``float32`` and produce ``float32`` output,
regardless of whether autocast is enabled.
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.amp.autocast_mode
.. py:module:: torch.cpu.amp.autocast_mode
.. py:module:: torch.cuda.amp.autocast_mode
.. py:module:: torch.cuda.amp.common
.. py:module:: torch.cuda.amp.grad_scaler

View File

@ -214,6 +214,10 @@ When creating a new :class:`Function`, the following methods are available to `c
Numerical gradient checking
^^^^^^^^^^^^^^^^^^^^^^^^^^^
.. automodule:: torch.autograd.gradcheck
.. currentmodule:: torch.autograd.gradcheck
.. autosummary::
:toctree: generated
:nosignatures:
@ -221,6 +225,9 @@ Numerical gradient checking
gradcheck
gradgradcheck
.. Just to reset the base path for the rest of this file
.. currentmodule:: torch.autograd
Profiler
^^^^^^^^
@ -309,3 +316,17 @@ Also see :ref:`saved-tensors-hooks-doc`.
.. autoclass:: torch.autograd.graph.register_multi_grad_hook
.. autoclass:: torch.autograd.graph.allow_mutation_on_saved_tensors
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.autograd.anomaly_mode
.. py:module:: torch.autograd.forward_ad
.. py:module:: torch.autograd.function
.. py:module:: torch.autograd.functional
.. py:module:: torch.autograd.grad_mode
.. py:module:: torch.autograd.graph
.. py:module:: torch.autograd.profiler
.. py:module:: torch.autograd.profiler_legacy
.. py:module:: torch.autograd.profiler_util
.. py:module:: torch.autograd.variable

View File

@ -117,6 +117,7 @@ torch.backends.cudnn
available algorithm. Note that this setting only affects convolutions dispatched via the
cuDNN v8 API.
.. py:module:: torch.backends.cudnn.rnn
torch.backends.mps
^^^^^^^^^^^^^^^^^^
@ -187,3 +188,4 @@ torch.backends.opt_einsum
torch.backends.xeon
^^^^^^^^^^^^^^^^^^^
.. automodule:: torch.backends.xeon
.. py:module:: torch.backends.xeon.run_cpu

File diff suppressed because it is too large Load Diff

View File

@ -153,3 +153,18 @@ See the :doc:`documentation <cuda._sanitizer>` for information on how to use it.
:hidden:
cuda._sanitizer
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.cuda.comm
.. py:module:: torch.cuda.error
.. py:module:: torch.cuda.graphs
.. py:module:: torch.cuda.jiterator
.. py:module:: torch.cuda.memory
.. py:module:: torch.cuda.nccl
.. py:module:: torch.cuda.nvtx
.. py:module:: torch.cuda.profiler
.. py:module:: torch.cuda.random
.. py:module:: torch.cuda.sparse
.. py:module:: torch.cuda.streams

View File

@ -874,3 +874,118 @@ Distributed components raise custom Exception types derived from `RuntimeError`:
.. py:module:: torch.distributed.pipeline.sync
.. py:module:: torch.distributed.pipeline.sync.skip
.. py:module:: torch.distributed.tensor
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.ddp_zero_hook
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.debugging_hooks
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.default_hooks
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.mixed_precision_hooks
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.optimizer_overlap_hooks
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.post_localSGD_hook
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook
.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.quantization_hooks
.. py:module:: torch.distributed.algorithms.join
.. py:module:: torch.distributed.algorithms.model_averaging.averagers
.. py:module:: torch.distributed.algorithms.model_averaging.hierarchical_model_averager
.. py:module:: torch.distributed.algorithms.model_averaging.utils
.. py:module:: torch.distributed.argparse_util
.. py:module:: torch.distributed.c10d_logger
.. py:module:: torch.distributed.checkpoint.api
.. py:module:: torch.distributed.checkpoint.default_planner
.. py:module:: torch.distributed.checkpoint.filesystem
.. py:module:: torch.distributed.checkpoint.metadata
.. py:module:: torch.distributed.checkpoint.optimizer
.. py:module:: torch.distributed.checkpoint.planner
.. py:module:: torch.distributed.checkpoint.planner_helpers
.. py:module:: torch.distributed.checkpoint.resharding
.. py:module:: torch.distributed.checkpoint.state_dict_loader
.. py:module:: torch.distributed.checkpoint.state_dict_saver
.. py:module:: torch.distributed.checkpoint.storage
.. py:module:: torch.distributed.checkpoint.utils
.. py:module:: torch.distributed.collective_utils
.. py:module:: torch.distributed.constants
.. py:module:: torch.distributed.distributed_c10d
.. py:module:: torch.distributed.elastic.agent.server.api
.. py:module:: torch.distributed.elastic.agent.server.local_elastic_agent
.. py:module:: torch.distributed.elastic.events.api
.. py:module:: torch.distributed.elastic.events.handlers
.. py:module:: torch.distributed.elastic.metrics.api
.. py:module:: torch.distributed.elastic.multiprocessing.api
.. py:module:: torch.distributed.elastic.multiprocessing.errors.error_handler
.. py:module:: torch.distributed.elastic.multiprocessing.errors.handlers
.. py:module:: torch.distributed.elastic.multiprocessing.redirects
.. py:module:: torch.distributed.elastic.multiprocessing.tail_log
.. py:module:: torch.distributed.elastic.rendezvous.api
.. py:module:: torch.distributed.elastic.rendezvous.c10d_rendezvous_backend
.. py:module:: torch.distributed.elastic.rendezvous.dynamic_rendezvous
.. py:module:: torch.distributed.elastic.rendezvous.etcd_rendezvous
.. py:module:: torch.distributed.elastic.rendezvous.etcd_rendezvous_backend
.. py:module:: torch.distributed.elastic.rendezvous.etcd_server
.. py:module:: torch.distributed.elastic.rendezvous.etcd_store
.. py:module:: torch.distributed.elastic.rendezvous.static_tcp_rendezvous
.. py:module:: torch.distributed.elastic.rendezvous.utils
.. py:module:: torch.distributed.elastic.timer.api
.. py:module:: torch.distributed.elastic.timer.file_based_local_timer
.. py:module:: torch.distributed.elastic.timer.local_timer
.. py:module:: torch.distributed.elastic.utils.api
.. py:module:: torch.distributed.elastic.utils.data.cycling_iterator
.. py:module:: torch.distributed.elastic.utils.data.elastic_distributed_sampler
.. py:module:: torch.distributed.elastic.utils.distributed
.. py:module:: torch.distributed.elastic.utils.log_level
.. py:module:: torch.distributed.elastic.utils.logging
.. py:module:: torch.distributed.elastic.utils.store
.. py:module:: torch.distributed.fsdp.api
.. py:module:: torch.distributed.fsdp.fully_sharded_data_parallel
.. py:module:: torch.distributed.fsdp.sharded_grad_scaler
.. py:module:: torch.distributed.fsdp.wrap
.. py:module:: torch.distributed.launcher.api
.. py:module:: torch.distributed.logging_handlers
.. py:module:: torch.distributed.nn.api.remote_module
.. py:module:: torch.distributed.nn.functional
.. py:module:: torch.distributed.nn.jit.instantiator
.. py:module:: torch.distributed.nn.jit.templates.remote_module_template
.. py:module:: torch.distributed.optim.apply_optimizer_in_backward
.. py:module:: torch.distributed.optim.functional_adadelta
.. py:module:: torch.distributed.optim.functional_adagrad
.. py:module:: torch.distributed.optim.functional_adam
.. py:module:: torch.distributed.optim.functional_adamax
.. py:module:: torch.distributed.optim.functional_adamw
.. py:module:: torch.distributed.optim.functional_rmsprop
.. py:module:: torch.distributed.optim.functional_rprop
.. py:module:: torch.distributed.optim.functional_sgd
.. py:module:: torch.distributed.optim.named_optimizer
.. py:module:: torch.distributed.optim.optimizer
.. py:module:: torch.distributed.optim.post_localSGD_optimizer
.. py:module:: torch.distributed.optim.utils
.. py:module:: torch.distributed.optim.zero_redundancy_optimizer
.. py:module:: torch.distributed.pipeline.sync.batchnorm
.. py:module:: torch.distributed.pipeline.sync.checkpoint
.. py:module:: torch.distributed.pipeline.sync.copy
.. py:module:: torch.distributed.pipeline.sync.dependency
.. py:module:: torch.distributed.pipeline.sync.microbatch
.. py:module:: torch.distributed.pipeline.sync.phony
.. py:module:: torch.distributed.pipeline.sync.pipe
.. py:module:: torch.distributed.pipeline.sync.pipeline
.. py:module:: torch.distributed.pipeline.sync.skip.layout
.. py:module:: torch.distributed.pipeline.sync.skip.namespace
.. py:module:: torch.distributed.pipeline.sync.skip.portal
.. py:module:: torch.distributed.pipeline.sync.skip.skippable
.. py:module:: torch.distributed.pipeline.sync.skip.tracker
.. py:module:: torch.distributed.pipeline.sync.stream
.. py:module:: torch.distributed.pipeline.sync.utils
.. py:module:: torch.distributed.pipeline.sync.worker
.. py:module:: torch.distributed.remote_device
.. py:module:: torch.distributed.rendezvous
.. py:module:: torch.distributed.rpc.api
.. py:module:: torch.distributed.rpc.backend_registry
.. py:module:: torch.distributed.rpc.constants
.. py:module:: torch.distributed.rpc.functions
.. py:module:: torch.distributed.rpc.internal
.. py:module:: torch.distributed.rpc.options
.. py:module:: torch.distributed.rpc.rref_proxy
.. py:module:: torch.distributed.rpc.server_process_global_profiler
.. py:module:: torch.distributed.tensor.parallel.api
.. py:module:: torch.distributed.tensor.parallel.ddp
.. py:module:: torch.distributed.tensor.parallel.fsdp
.. py:module:: torch.distributed.tensor.parallel.input_reshard
.. py:module:: torch.distributed.tensor.parallel.style
.. py:module:: torch.distributed.utils
.. py:module:: torch.distributed.checkpoint.state_dict

View File

@ -394,3 +394,47 @@ Probability distributions - torch.distributions
.. automodule:: torch.distributions.constraint_registry
:members:
:member-order: bysource
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.distributions.bernoulli
.. py:module:: torch.distributions.beta
.. py:module:: torch.distributions.binomial
.. py:module:: torch.distributions.categorical
.. py:module:: torch.distributions.cauchy
.. py:module:: torch.distributions.chi2
.. py:module:: torch.distributions.continuous_bernoulli
.. py:module:: torch.distributions.dirichlet
.. py:module:: torch.distributions.distribution
.. py:module:: torch.distributions.exp_family
.. py:module:: torch.distributions.exponential
.. py:module:: torch.distributions.fishersnedecor
.. py:module:: torch.distributions.gamma
.. py:module:: torch.distributions.geometric
.. py:module:: torch.distributions.gumbel
.. py:module:: torch.distributions.half_cauchy
.. py:module:: torch.distributions.half_normal
.. py:module:: torch.distributions.independent
.. py:module:: torch.distributions.kumaraswamy
.. py:module:: torch.distributions.laplace
.. py:module:: torch.distributions.lkj_cholesky
.. py:module:: torch.distributions.log_normal
.. py:module:: torch.distributions.logistic_normal
.. py:module:: torch.distributions.lowrank_multivariate_normal
.. py:module:: torch.distributions.mixture_same_family
.. py:module:: torch.distributions.multinomial
.. py:module:: torch.distributions.multivariate_normal
.. py:module:: torch.distributions.negative_binomial
.. py:module:: torch.distributions.normal
.. py:module:: torch.distributions.one_hot_categorical
.. py:module:: torch.distributions.pareto
.. py:module:: torch.distributions.poisson
.. py:module:: torch.distributions.relaxed_bernoulli
.. py:module:: torch.distributions.relaxed_categorical
.. py:module:: torch.distributions.studentT
.. py:module:: torch.distributions.transformed_distribution
.. py:module:: torch.distributions.uniform
.. py:module:: torch.distributions.utils
.. py:module:: torch.distributions.von_mises
.. py:module:: torch.distributions.weibull
.. py:module:: torch.distributions.wishart

View File

@ -577,3 +577,8 @@ API Reference
.. autoclass:: ExportGraphSignature
.. autoclass:: ModuleCallSignature
.. autoclass:: ModuleCallEntry
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.export.exported_program

View File

@ -1125,3 +1125,75 @@ API Reference
.. py:module:: torch.fx.experimental.migrate_gradual_types
.. py:module:: torch.fx.passes.dialect
.. py:module:: torch.fx.passes.dialect.common
.. py:module:: torch.fx.annotate
.. py:module:: torch.fx.config
.. py:module:: torch.fx.experimental.accelerator_partitioner
.. py:module:: torch.fx.experimental.const_fold
.. py:module:: torch.fx.experimental.debug
.. py:module:: torch.fx.experimental.graph_gradual_typechecker
.. py:module:: torch.fx.experimental.merge_matmul
.. py:module:: torch.fx.experimental.meta_tracer
.. py:module:: torch.fx.experimental.migrate_gradual_types.constraint
.. py:module:: torch.fx.experimental.migrate_gradual_types.constraint_generator
.. py:module:: torch.fx.experimental.migrate_gradual_types.constraint_transformation
.. py:module:: torch.fx.experimental.migrate_gradual_types.operation
.. py:module:: torch.fx.experimental.migrate_gradual_types.transform_to_z3
.. py:module:: torch.fx.experimental.migrate_gradual_types.util
.. py:module:: torch.fx.experimental.migrate_gradual_types.z3_types
.. py:module:: torch.fx.experimental.normalize
.. py:module:: torch.fx.experimental.optimization
.. py:module:: torch.fx.experimental.partitioner_utils
.. py:module:: torch.fx.experimental.proxy_tensor
.. py:module:: torch.fx.experimental.recording
.. py:module:: torch.fx.experimental.refinement_types
.. py:module:: torch.fx.experimental.rewriter
.. py:module:: torch.fx.experimental.schema_type_annotation
.. py:module:: torch.fx.experimental.symbolic_shapes
.. py:module:: torch.fx.experimental.unification.core
.. py:module:: torch.fx.experimental.unification.dispatch
.. py:module:: torch.fx.experimental.unification.match
.. py:module:: torch.fx.experimental.unification.more
.. py:module:: torch.fx.experimental.unification.multipledispatch.conflict
.. py:module:: torch.fx.experimental.unification.multipledispatch.core
.. py:module:: torch.fx.experimental.unification.multipledispatch.dispatcher
.. py:module:: torch.fx.experimental.unification.multipledispatch.utils
.. py:module:: torch.fx.experimental.unification.multipledispatch.variadic
.. py:module:: torch.fx.experimental.unification.unification_tools
.. py:module:: torch.fx.experimental.unification.utils
.. py:module:: torch.fx.experimental.unification.variable
.. py:module:: torch.fx.experimental.unify_refinements
.. py:module:: torch.fx.experimental.validator
.. py:module:: torch.fx.graph
.. py:module:: torch.fx.graph_module
.. py:module:: torch.fx.immutable_collections
.. py:module:: torch.fx.interpreter
.. py:module:: torch.fx.node
.. py:module:: torch.fx.operator_schemas
.. py:module:: torch.fx.passes.annotate_getitem_nodes
.. py:module:: torch.fx.passes.backends.cudagraphs
.. py:module:: torch.fx.passes.dialect.common.cse_pass
.. py:module:: torch.fx.passes.fake_tensor_prop
.. py:module:: torch.fx.passes.graph_drawer
.. py:module:: torch.fx.passes.graph_manipulation
.. py:module:: torch.fx.passes.infra.partitioner
.. py:module:: torch.fx.passes.infra.pass_base
.. py:module:: torch.fx.passes.infra.pass_manager
.. py:module:: torch.fx.passes.net_min_base
.. py:module:: torch.fx.passes.operator_support
.. py:module:: torch.fx.passes.param_fetch
.. py:module:: torch.fx.passes.pass_manager
.. py:module:: torch.fx.passes.reinplace
.. py:module:: torch.fx.passes.shape_prop
.. py:module:: torch.fx.passes.split_module
.. py:module:: torch.fx.passes.split_utils
.. py:module:: torch.fx.passes.splitter_base
.. py:module:: torch.fx.passes.tests.test_pass_manager
.. py:module:: torch.fx.passes.tools_common
.. py:module:: torch.fx.passes.utils.common
.. py:module:: torch.fx.passes.utils.fuser_utils
.. py:module:: torch.fx.passes.utils.matcher_utils
.. py:module:: torch.fx.passes.utils.source_matcher_utils
.. py:module:: torch.fx.proxy
.. py:module:: torch.fx.subgraph_rewriter
.. py:module:: torch.fx.tensor_type
.. py:module:: torch.fx.traceback

View File

@ -886,3 +886,7 @@ References
.. This package is missing doc. Adding it here for coverage
.. This does not add anything to the rendered page.
.. py:module:: torch.jit.mobile
.. py:module:: torch.jit.annotations
.. py:module:: torch.jit.frontend
.. py:module:: torch.jit.generate_bytecode
.. py:module:: torch.jit.quantized

View File

@ -1,5 +1,6 @@
torch.library
===================================
.. py:module:: torch.library
Python operator registration API provides capabilities for extending PyTorch's core library
of operators with user defined operators. Currently, this can be done in two ways:

View File

@ -295,3 +295,12 @@ The following ops are currently supported:
Tensor.reshape
Tensor.reshape_as
Tensor.view
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.masked.maskedtensor.binary
.. py:module:: torch.masked.maskedtensor.core
.. py:module:: torch.masked.maskedtensor.creation
.. py:module:: torch.masked.maskedtensor.passthrough
.. py:module:: torch.masked.maskedtensor.reductions
.. py:module:: torch.masked.maskedtensor.unary

View File

@ -34,3 +34,9 @@ MPS Event
:nosignatures:
event.Event
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.mps.event
.. py:module:: torch.mps.profiler

View File

@ -174,10 +174,23 @@ The ``spawn`` function below addresses these concerns and takes care
of error propagation, out of order termination, and will actively
terminate processes upon detecting an error in one of them.
.. automodule:: torch.multiprocessing.spawn
.. currentmodule:: torch.multiprocessing.spawn
.. autofunction:: spawn
.. currentmodule:: torch.multiprocessing
.. class:: SpawnContext
Returned by :func:`~spawn` when called with ``join=False``.
.. automethod:: join
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.multiprocessing.pool
.. py:module:: torch.multiprocessing.queue
.. py:module:: torch.multiprocessing.reductions

View File

@ -477,6 +477,55 @@ Lazy Modules Initialization
nn.modules.lazy.LazyModuleMixin
.. This module is kept only for backward compatibility
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.nn.backends
.. py:module:: torch.nn.utils.stateless
.. py:module:: torch.nn.backends.thnn
.. py:module:: torch.nn.common_types
.. py:module:: torch.nn.cpp
.. py:module:: torch.nn.functional
.. py:module:: torch.nn.grad
.. py:module:: torch.nn.init
.. py:module:: torch.nn.modules.activation
.. py:module:: torch.nn.modules.adaptive
.. py:module:: torch.nn.modules.batchnorm
.. py:module:: torch.nn.modules.channelshuffle
.. py:module:: torch.nn.modules.container
.. py:module:: torch.nn.modules.conv
.. py:module:: torch.nn.modules.distance
.. py:module:: torch.nn.modules.dropout
.. py:module:: torch.nn.modules.flatten
.. py:module:: torch.nn.modules.fold
.. py:module:: torch.nn.modules.instancenorm
.. py:module:: torch.nn.modules.lazy
.. py:module:: torch.nn.modules.linear
.. py:module:: torch.nn.modules.loss
.. py:module:: torch.nn.modules.module
.. py:module:: torch.nn.modules.normalization
.. py:module:: torch.nn.modules.padding
.. py:module:: torch.nn.modules.pixelshuffle
.. py:module:: torch.nn.modules.pooling
.. py:module:: torch.nn.modules.rnn
.. py:module:: torch.nn.modules.sparse
.. py:module:: torch.nn.modules.transformer
.. py:module:: torch.nn.modules.upsampling
.. py:module:: torch.nn.modules.utils
.. py:module:: torch.nn.parallel.comm
.. py:module:: torch.nn.parallel.data_parallel
.. py:module:: torch.nn.parallel.distributed
.. py:module:: torch.nn.parallel.parallel_apply
.. py:module:: torch.nn.parallel.replicate
.. py:module:: torch.nn.parallel.scatter_gather
.. py:module:: torch.nn.parameter
.. py:module:: torch.nn.utils.clip_grad
.. py:module:: torch.nn.utils.convert_parameters
.. py:module:: torch.nn.utils.fusion
.. py:module:: torch.nn.utils.init
.. py:module:: torch.nn.utils.memory_format
.. py:module:: torch.nn.utils.parametrizations
.. py:module:: torch.nn.utils.parametrize
.. py:module:: torch.nn.utils.prune
.. py:module:: torch.nn.utils.rnn
.. py:module:: torch.nn.utils.spectral_norm
.. py:module:: torch.nn.utils.weight_norm

View File

@ -62,3 +62,24 @@ also be interested in reading our `development wiki <https://github.com/pytorch/
onnx_dynamo
onnx_dynamo_onnxruntime_backend
onnx_torchscript
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.onnx.errors
.. py:module:: torch.onnx.operators
.. py:module:: torch.onnx.symbolic_caffe2
.. py:module:: torch.onnx.symbolic_helper
.. py:module:: torch.onnx.symbolic_opset10
.. py:module:: torch.onnx.symbolic_opset11
.. py:module:: torch.onnx.symbolic_opset13
.. py:module:: torch.onnx.symbolic_opset14
.. py:module:: torch.onnx.symbolic_opset15
.. py:module:: torch.onnx.symbolic_opset16
.. py:module:: torch.onnx.symbolic_opset17
.. py:module:: torch.onnx.symbolic_opset18
.. py:module:: torch.onnx.symbolic_opset7
.. py:module:: torch.onnx.symbolic_opset8
.. py:module:: torch.onnx.symbolic_opset9
.. py:module:: torch.onnx.utils
.. py:module:: torch.onnx.verification
.. py:module:: torch.onnx.symbolic_opset12

View File

@ -409,3 +409,23 @@ We train the model for a total of 300 epochs and start to collect EMA averages i
>>> torch.optim.swa_utils.update_bn(loader, ema_model)
>>> # Use ema_model to make predictions on test data
>>> preds = ema_model(test_input)
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.optim.adadelta
.. py:module:: torch.optim.adagrad
.. py:module:: torch.optim.adam
.. py:module:: torch.optim.adamax
.. py:module:: torch.optim.adamw
.. py:module:: torch.optim.asgd
.. py:module:: torch.optim.lbfgs
.. py:module:: torch.optim.lr_scheduler
.. py:module:: torch.optim.nadam
.. py:module:: torch.optim.optimizer
.. py:module:: torch.optim.radam
.. py:module:: torch.optim.rmsprop
.. py:module:: torch.optim.rprop
.. py:module:: torch.optim.sgd
.. py:module:: torch.optim.sparse_adam
.. py:module:: torch.optim.swa_utils

View File

@ -515,7 +515,7 @@ The ``torch.package`` format makes no guarantees about the contents of ``.data/`
Currently, the ``.data/`` directory contains the following items:
* ``version``: a version number for the serialized format, so that the ``torch.package`` import infrastructures knows how to load this package.
* ``extern_modules``: a list of modules that are considered ``extern:class:`PackageImporter`. ``extern`` modules will be imported using the loading environments system importer.
* ``extern_modules``: a list of modules that are considered ``extern``. ``extern`` modules will be imported using the loading environments system importer.
* ``*.storage``: serialized tensor data.
@ -817,3 +817,16 @@ API Reference
.. autoclass:: torch.package.Directory
:members:
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.package.analyze.find_first_use_of_broken_modules
.. py:module:: torch.package.analyze.is_from_package
.. py:module:: torch.package.analyze.trace_dependencies
.. py:module:: torch.package.file_structure_representation
.. py:module:: torch.package.find_file_dependencies
.. py:module:: torch.package.glob_group
.. py:module:: torch.package.importer
.. py:module:: torch.package.package_exporter
.. py:module:: torch.package.package_importer

View File

@ -37,3 +37,9 @@ Intel Instrumentation and Tracing Technology APIs
.. autofunction:: torch.profiler.itt.range_push
.. autofunction:: torch.profiler.itt.range_pop
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.profiler.itt
.. py:module:: torch.profiler.profiler
.. py:module:: torch.profiler.python_tracer

View File

@ -31,7 +31,7 @@ Preparing model for quantization
:nosignatures:
:template: classtemplate.rst
fuse_modules
fuse_modules.fuse_modules
QuantStub
DeQuantStub
QuantWrapper

View File

@ -427,7 +427,7 @@ to do the following in addition:
determine output quantization parameters.
3. Fuse modules: combine operations/modules into a single module to obtain
higher accuracy and performance. This is done using the
:func:`~torch.ao.quantization.fuse_modules` API, which takes in lists of modules
:func:`~torch.ao.quantization.fuse_modules.fuse_modules` API, which takes in lists of modules
to be fused. We currently support the following fusions:
[Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
@ -1246,3 +1246,159 @@ Please take a look at `Limitations of Symbolic Tracing <https://pytorch.org/docs
.. py:module:: torch.ao.pruning
.. py:module:: torch.ao.pruning.scheduler
.. py:module:: torch.ao.pruning.sparsifier
.. py:module:: torch.ao.nn.intrinsic.modules.fused
.. py:module:: torch.ao.nn.intrinsic.qat.modules.conv_fused
.. py:module:: torch.ao.nn.intrinsic.qat.modules.linear_fused
.. py:module:: torch.ao.nn.intrinsic.qat.modules.linear_relu
.. py:module:: torch.ao.nn.intrinsic.quantized.dynamic.modules.linear_relu
.. py:module:: torch.ao.nn.intrinsic.quantized.modules.bn_relu
.. py:module:: torch.ao.nn.intrinsic.quantized.modules.conv_add
.. py:module:: torch.ao.nn.intrinsic.quantized.modules.conv_relu
.. py:module:: torch.ao.nn.intrinsic.quantized.modules.linear_relu
.. py:module:: torch.ao.nn.qat.dynamic.modules.linear
.. py:module:: torch.ao.nn.qat.modules.conv
.. py:module:: torch.ao.nn.qat.modules.embedding_ops
.. py:module:: torch.ao.nn.qat.modules.linear
.. py:module:: torch.ao.nn.quantizable.modules.activation
.. py:module:: torch.ao.nn.quantizable.modules.rnn
.. py:module:: torch.ao.nn.quantized.dynamic.modules.conv
.. py:module:: torch.ao.nn.quantized.dynamic.modules.linear
.. py:module:: torch.ao.nn.quantized.dynamic.modules.rnn
.. py:module:: torch.ao.nn.quantized.modules.activation
.. py:module:: torch.ao.nn.quantized.modules.batchnorm
.. py:module:: torch.ao.nn.quantized.modules.conv
.. py:module:: torch.ao.nn.quantized.modules.dropout
.. py:module:: torch.ao.nn.quantized.modules.embedding_ops
.. py:module:: torch.ao.nn.quantized.modules.functional_modules
.. py:module:: torch.ao.nn.quantized.modules.linear
.. py:module:: torch.ao.nn.quantized.modules.normalization
.. py:module:: torch.ao.nn.quantized.modules.rnn
.. py:module:: torch.ao.nn.quantized.modules.utils
.. py:module:: torch.ao.nn.quantized.reference.modules.conv
.. py:module:: torch.ao.nn.quantized.reference.modules.linear
.. py:module:: torch.ao.nn.quantized.reference.modules.rnn
.. py:module:: torch.ao.nn.quantized.reference.modules.sparse
.. py:module:: torch.ao.nn.quantized.reference.modules.utils
.. py:module:: torch.ao.nn.sparse.quantized.dynamic.linear
.. py:module:: torch.ao.nn.sparse.quantized.linear
.. py:module:: torch.ao.nn.sparse.quantized.utils
.. py:module:: torch.ao.ns.fx.graph_matcher
.. py:module:: torch.ao.ns.fx.graph_passes
.. py:module:: torch.ao.ns.fx.mappings
.. py:module:: torch.ao.ns.fx.n_shadows_utils
.. py:module:: torch.ao.ns.fx.ns_types
.. py:module:: torch.ao.ns.fx.pattern_utils
.. py:module:: torch.ao.ns.fx.qconfig_multi_mapping
.. py:module:: torch.ao.ns.fx.utils
.. py:module:: torch.ao.ns.fx.weight_utils
.. py:module:: torch.ao.pruning.scheduler.base_scheduler
.. py:module:: torch.ao.pruning.scheduler.cubic_scheduler
.. py:module:: torch.ao.pruning.scheduler.lambda_scheduler
.. py:module:: torch.ao.pruning.sparsifier.base_sparsifier
.. py:module:: torch.ao.pruning.sparsifier.nearly_diagonal_sparsifier
.. py:module:: torch.ao.pruning.sparsifier.utils
.. py:module:: torch.ao.pruning.sparsifier.weight_norm_sparsifier
.. py:module:: torch.ao.quantization.backend_config.backend_config
.. py:module:: torch.ao.quantization.backend_config.executorch
.. py:module:: torch.ao.quantization.backend_config.fbgemm
.. py:module:: torch.ao.quantization.backend_config.native
.. py:module:: torch.ao.quantization.backend_config.observation_type
.. py:module:: torch.ao.quantization.backend_config.onednn
.. py:module:: torch.ao.quantization.backend_config.qnnpack
.. py:module:: torch.ao.quantization.backend_config.tensorrt
.. py:module:: torch.ao.quantization.backend_config.utils
.. py:module:: torch.ao.quantization.backend_config.x86
.. py:module:: torch.ao.quantization.fake_quantize
.. py:module:: torch.ao.quantization.fuser_method_mappings
.. py:module:: torch.ao.quantization.fuse_modules
.. py:module:: torch.ao.quantization.fx.convert
.. py:module:: torch.ao.quantization.fx.custom_config
.. py:module:: torch.ao.quantization.fx.fuse
.. py:module:: torch.ao.quantization.fx.fuse_handler
.. py:module:: torch.ao.quantization.fx.graph_module
.. py:module:: torch.ao.quantization.fx.lower_to_fbgemm
.. py:module:: torch.ao.quantization.fx.lower_to_qnnpack
.. py:module:: torch.ao.quantization.fx.lstm_utils
.. py:module:: torch.ao.quantization.fx.match_utils
.. py:module:: torch.ao.quantization.fx.pattern_utils
.. py:module:: torch.ao.quantization.fx.prepare
.. py:module:: torch.ao.quantization.fx.qconfig_mapping_utils
.. py:module:: torch.ao.quantization.fx.quantize_handler
.. py:module:: torch.ao.quantization.fx.tracer
.. py:module:: torch.ao.quantization.fx.utils
.. py:module:: torch.ao.quantization.observer
.. py:module:: torch.ao.quantization.pt2e.duplicate_dq_pass
.. py:module:: torch.ao.quantization.pt2e.eval_utils
.. py:module:: torch.ao.quantization.pt2e.graph_utils
.. py:module:: torch.ao.quantization.pt2e.port_metadata_pass
.. py:module:: torch.ao.quantization.pt2e.prepare
.. py:module:: torch.ao.quantization.pt2e.qat_utils
.. py:module:: torch.ao.quantization.pt2e.representation.rewrite
.. py:module:: torch.ao.quantization.pt2e.utils
.. py:module:: torch.ao.quantization.qconfig
.. py:module:: torch.ao.quantization.qconfig_mapping
.. py:module:: torch.ao.quantization.quant_type
.. py:module:: torch.ao.quantization.quantization_mappings
.. py:module:: torch.ao.quantization.quantize
.. py:module:: torch.ao.quantization.quantize_fx
.. py:module:: torch.ao.quantization.quantize_jit
.. py:module:: torch.ao.quantization.quantize_pt2e
.. py:module:: torch.ao.quantization.quantizer.composable_quantizer
.. py:module:: torch.ao.quantization.quantizer.embedding_quantizer
.. py:module:: torch.ao.quantization.quantizer.quantizer
.. py:module:: torch.ao.quantization.quantizer.utils
.. py:module:: torch.ao.quantization.quantizer.x86_inductor_quantizer
.. py:module:: torch.ao.quantization.quantizer.xnnpack_quantizer
.. py:module:: torch.ao.quantization.quantizer.xnnpack_quantizer_utils
.. py:module:: torch.ao.quantization.stubs
.. py:module:: torch.ao.quantization.utils
.. py:module:: torch.nn.intrinsic.modules.fused
.. py:module:: torch.nn.intrinsic.qat.modules.conv_fused
.. py:module:: torch.nn.intrinsic.qat.modules.linear_fused
.. py:module:: torch.nn.intrinsic.qat.modules.linear_relu
.. py:module:: torch.nn.intrinsic.quantized.dynamic.modules.linear_relu
.. py:module:: torch.nn.intrinsic.quantized.modules.bn_relu
.. py:module:: torch.nn.intrinsic.quantized.modules.conv_relu
.. py:module:: torch.nn.intrinsic.quantized.modules.linear_relu
.. py:module:: torch.nn.qat.dynamic.modules.linear
.. py:module:: torch.nn.qat.modules.conv
.. py:module:: torch.nn.qat.modules.embedding_ops
.. py:module:: torch.nn.qat.modules.linear
.. py:module:: torch.nn.quantizable.modules.activation
.. py:module:: torch.nn.quantizable.modules.rnn
.. py:module:: torch.nn.quantized.dynamic.modules.conv
.. py:module:: torch.nn.quantized.dynamic.modules.linear
.. py:module:: torch.nn.quantized.dynamic.modules.rnn
.. py:module:: torch.nn.quantized.functional
.. py:module:: torch.nn.quantized.modules.activation
.. py:module:: torch.nn.quantized.modules.batchnorm
.. py:module:: torch.nn.quantized.modules.conv
.. py:module:: torch.nn.quantized.modules.dropout
.. py:module:: torch.nn.quantized.modules.embedding_ops
.. py:module:: torch.nn.quantized.modules.functional_modules
.. py:module:: torch.nn.quantized.modules.linear
.. py:module:: torch.nn.quantized.modules.normalization
.. py:module:: torch.nn.quantized.modules.rnn
.. py:module:: torch.nn.quantized.modules.utils
.. py:module:: torch.quantization.fake_quantize
.. py:module:: torch.quantization.fuse_modules
.. py:module:: torch.quantization.fuser_method_mappings
.. py:module:: torch.quantization.fx.convert
.. py:module:: torch.quantization.fx.fuse
.. py:module:: torch.quantization.fx.fusion_patterns
.. py:module:: torch.quantization.fx.graph_module
.. py:module:: torch.quantization.fx.match_utils
.. py:module:: torch.quantization.fx.pattern_utils
.. py:module:: torch.quantization.fx.prepare
.. py:module:: torch.quantization.fx.quantization_patterns
.. py:module:: torch.quantization.fx.quantization_types
.. py:module:: torch.quantization.fx.utils
.. py:module:: torch.quantization.observer
.. py:module:: torch.quantization.qconfig
.. py:module:: torch.quantization.quant_type
.. py:module:: torch.quantization.quantization_mappings
.. py:module:: torch.quantization.quantize
.. py:module:: torch.quantization.quantize_fx
.. py:module:: torch.quantization.quantize_jit
.. py:module:: torch.quantization.stubs
.. py:module:: torch.quantization.utils

View File

@ -2,6 +2,7 @@
torch.overrides
---------------
.. py:module:: torch.overrides
This module exposes various helper functions for the ``__torch_function__``
protocol. See :ref:`extending-torch-python` for more details on the

View File

@ -744,3 +744,13 @@ Operator Tags
.. for tracking purposes
.. py:module:: torch.utils.model_dump
.. py:module:: torch.utils.viz
.. py:module:: torch.functional
.. py:module:: torch.quasirandom
.. py:module:: torch.return_types
.. py:module:: torch.serialization
.. py:module:: torch.signal.windows.windows
.. py:module:: torch.sparse.semi_structured
.. py:module:: torch.storage
.. py:module:: torch.torch_version
.. py:module:: torch.types
.. py:module:: torch.version

View File

@ -11,3 +11,79 @@ torch.utils
generate_methods_for_privateuse1_backend
get_cpp_backtrace
set_module
.. This module needs to be documented. Adding here in the meantime
.. for tracking purposes
.. py:module:: torch.utils.backend_registration
.. py:module:: torch.utils.benchmark.examples.blas_compare_setup
.. py:module:: torch.utils.benchmark.examples.compare
.. py:module:: torch.utils.benchmark.examples.fuzzer
.. py:module:: torch.utils.benchmark.examples.op_benchmark
.. py:module:: torch.utils.benchmark.examples.simple_timeit
.. py:module:: torch.utils.benchmark.examples.spectral_ops_fuzz_test
.. py:module:: torch.utils.benchmark.op_fuzzers.binary
.. py:module:: torch.utils.benchmark.op_fuzzers.sparse_binary
.. py:module:: torch.utils.benchmark.op_fuzzers.sparse_unary
.. py:module:: torch.utils.benchmark.op_fuzzers.spectral
.. py:module:: torch.utils.benchmark.op_fuzzers.unary
.. py:module:: torch.utils.benchmark.utils.common
.. py:module:: torch.utils.benchmark.utils.compare
.. py:module:: torch.utils.benchmark.utils.compile
.. py:module:: torch.utils.benchmark.utils.cpp_jit
.. py:module:: torch.utils.benchmark.utils.fuzzer
.. py:module:: torch.utils.benchmark.utils.sparse_fuzzer
.. py:module:: torch.utils.benchmark.utils.timer
.. py:module:: torch.utils.benchmark.utils.valgrind_wrapper.timer_interface
.. py:module:: torch.utils.bundled_inputs
.. py:module:: torch.utils.checkpoint
.. py:module:: torch.utils.collect_env
.. py:module:: torch.utils.cpp_backtrace
.. py:module:: torch.utils.cpp_extension
.. py:module:: torch.utils.data.backward_compatibility
.. py:module:: torch.utils.data.dataloader
.. py:module:: torch.utils.data.datapipes.dataframe.dataframe_wrapper
.. py:module:: torch.utils.data.datapipes.dataframe.dataframes
.. py:module:: torch.utils.data.datapipes.dataframe.datapipes
.. py:module:: torch.utils.data.datapipes.dataframe.structures
.. py:module:: torch.utils.data.datapipes.datapipe
.. py:module:: torch.utils.data.datapipes.gen_pyi
.. py:module:: torch.utils.data.datapipes.iter.callable
.. py:module:: torch.utils.data.datapipes.iter.combinatorics
.. py:module:: torch.utils.data.datapipes.iter.combining
.. py:module:: torch.utils.data.datapipes.iter.filelister
.. py:module:: torch.utils.data.datapipes.iter.fileopener
.. py:module:: torch.utils.data.datapipes.iter.grouping
.. py:module:: torch.utils.data.datapipes.iter.routeddecoder
.. py:module:: torch.utils.data.datapipes.iter.selecting
.. py:module:: torch.utils.data.datapipes.iter.sharding
.. py:module:: torch.utils.data.datapipes.iter.streamreader
.. py:module:: torch.utils.data.datapipes.iter.utils
.. py:module:: torch.utils.data.datapipes.map.callable
.. py:module:: torch.utils.data.datapipes.map.combinatorics
.. py:module:: torch.utils.data.datapipes.map.combining
.. py:module:: torch.utils.data.datapipes.map.grouping
.. py:module:: torch.utils.data.datapipes.map.utils
.. py:module:: torch.utils.data.datapipes.utils.common
.. py:module:: torch.utils.data.datapipes.utils.decoder
.. py:module:: torch.utils.data.datapipes.utils.snapshot
.. py:module:: torch.utils.data.dataset
.. py:module:: torch.utils.data.distributed
.. py:module:: torch.utils.data.graph
.. py:module:: torch.utils.data.graph_settings
.. py:module:: torch.utils.data.sampler
.. py:module:: torch.utils.dlpack
.. py:module:: torch.utils.file_baton
.. py:module:: torch.utils.flop_counter
.. py:module:: torch.utils.hipify.constants
.. py:module:: torch.utils.hipify.cuda_to_hip_mappings
.. py:module:: torch.utils.hipify.hipify_python
.. py:module:: torch.utils.hipify.version
.. py:module:: torch.utils.hooks
.. py:module:: torch.utils.jit.log_extract
.. py:module:: torch.utils.mkldnn
.. py:module:: torch.utils.mobile_optimizer
.. py:module:: torch.utils.show_pickle
.. py:module:: torch.utils.tensorboard.summary
.. py:module:: torch.utils.tensorboard.writer
.. py:module:: torch.utils.throughput_benchmark
.. py:module:: torch.utils.weak

View File

@ -1,231 +0,0 @@
import argparse
import datetime
import itertools as it
import multiprocessing
import multiprocessing.dummy
import os
import queue
import pickle
import shutil
import subprocess
import sys
import tempfile
import threading
import time
from typing import Tuple, Dict
from . import blas_compare_setup
MIN_RUN_TIME = 1
NUM_REPLICATES = 20
NUM_THREAD_SETTINGS = (1, 2, 4)
RESULT_FILE = os.path.join(blas_compare_setup.WORKING_ROOT, "blas_results.pkl")
SCRATCH_DIR = os.path.join(blas_compare_setup.WORKING_ROOT, "scratch")
BLAS_CONFIGS = (
("MKL (2020.3)", blas_compare_setup.MKL_2020_3, None),
("MKL (2020.0)", blas_compare_setup.MKL_2020_0, None),
("OpenBLAS", blas_compare_setup.OPEN_BLAS, None)
)
_RESULT_FILE_LOCK = threading.Lock()
_WORKER_POOL: queue.Queue[Tuple[str, str, int]] = queue.Queue()
def clear_worker_pool():
while not _WORKER_POOL.empty():
_, result_file, _ = _WORKER_POOL.get_nowait()
os.remove(result_file)
if os.path.exists(SCRATCH_DIR):
shutil.rmtree(SCRATCH_DIR)
def fill_core_pool(n: int):
clear_worker_pool()
os.makedirs(SCRATCH_DIR)
# Reserve two cores so that bookkeeping does not interfere with runs.
cpu_count = multiprocessing.cpu_count() - 2
# Adjacent cores sometimes share cache, so we space out single core runs.
step = max(n, 2)
for i in range(0, cpu_count, step):
core_str = f"{i}" if n == 1 else f"{i},{i + n - 1}"
_, result_file = tempfile.mkstemp(suffix=".pkl", prefix=SCRATCH_DIR)
_WORKER_POOL.put((core_str, result_file, n))
def _subprocess_main(seed=0, num_threads=1, sub_label="N/A", result_file=None, env=None):
import torch
from torch.utils.benchmark import Timer
conda_prefix = os.getenv("CONDA_PREFIX")
assert conda_prefix
if not torch.__file__.startswith(conda_prefix):
raise ValueError(
f"PyTorch mismatch: `import torch` resolved to `{torch.__file__}`, "
f"which is not in the correct conda env: {conda_prefix}"
)
torch.manual_seed(seed)
results = []
for n in [4, 8, 16, 32, 64, 128, 256, 512, 1024, 7, 96, 150, 225]:
dtypes = (("Single", torch.float32), ("Double", torch.float64))
shapes = (
# Square MatMul
((n, n), (n, n), "(n x n) x (n x n)", "Matrix-Matrix Product"),
# Matrix-Vector product
((n, n), (n, 1), "(n x n) x (n x 1)", "Matrix-Vector Product"),
)
for (dtype_name, dtype), (x_shape, y_shape, shape_str, blas_type) in it.product(dtypes, shapes):
t = Timer(
stmt="torch.mm(x, y)",
label=f"torch.mm {shape_str} {blas_type} ({dtype_name})",
sub_label=sub_label,
description=f"n = {n}",
env=os.path.split(env or "")[1] or None,
globals={
"x": torch.rand(x_shape, dtype=dtype),
"y": torch.rand(y_shape, dtype=dtype),
},
num_threads=num_threads,
).blocked_autorange(min_run_time=MIN_RUN_TIME)
results.append(t)
if result_file is not None:
with open(result_file, "wb") as f:
pickle.dump(results, f)
def run_subprocess(args):
seed, env, sub_label, extra_env_vars = args
core_str = None
try:
core_str, result_file, num_threads = _WORKER_POOL.get()
with open(result_file, "wb"):
pass
env_vars: Dict[str, str] = {
"PATH": os.getenv("PATH") or "",
"PYTHONPATH": os.getenv("PYTHONPATH") or "",
# NumPy
"OMP_NUM_THREADS": str(num_threads),
"MKL_NUM_THREADS": str(num_threads),
"NUMEXPR_NUM_THREADS": str(num_threads),
}
env_vars.update(extra_env_vars or {})
subprocess.run(
f"source activate {env} && "
f"taskset --cpu-list {core_str} "
f"python {os.path.abspath(__file__)} "
"--DETAIL-in-subprocess "
f"--DETAIL-seed {seed} "
f"--DETAIL-num-threads {num_threads} "
f"--DETAIL-sub-label '{sub_label}' "
f"--DETAIL-result-file {result_file} "
f"--DETAIL-env {env}",
env=env_vars,
stdout=subprocess.PIPE,
shell=True
)
with open(result_file, "rb") as f:
result_bytes = f.read()
with _RESULT_FILE_LOCK, \
open(RESULT_FILE, "ab") as f:
f.write(result_bytes)
except KeyboardInterrupt:
pass # Handle ctrl-c gracefully.
finally:
if core_str is not None:
_WORKER_POOL.put((core_str, result_file, num_threads))
def _compare_main():
results = []
with open(RESULT_FILE, "rb") as f:
while True:
try:
results.extend(pickle.load(f))
except EOFError:
break
from torch.utils.benchmark import Compare
comparison = Compare(results)
comparison.trim_significant_figures()
comparison.colorize()
comparison.print()
def main():
with open(RESULT_FILE, "wb"):
pass
for num_threads in NUM_THREAD_SETTINGS:
fill_core_pool(num_threads)
workers = _WORKER_POOL.qsize()
trials = []
for seed in range(NUM_REPLICATES):
for sub_label, env, extra_env_vars in BLAS_CONFIGS:
env_path = os.path.join(blas_compare_setup.WORKING_ROOT, env)
trials.append((seed, env_path, sub_label, extra_env_vars))
n = len(trials)
with multiprocessing.dummy.Pool(workers) as pool:
start_time = time.time()
for i, r in enumerate(pool.imap(run_subprocess, trials)):
n_trials_done = i + 1
time_per_result = (time.time() - start_time) / n_trials_done
eta = int((n - n_trials_done) * time_per_result)
print(f"\r{i + 1} / {n} ETA:{datetime.timedelta(seconds=eta)}".ljust(80), end="")
sys.stdout.flush()
print(f"\r{n} / {n} Total time: {datetime.timedelta(seconds=int(time.time() - start_time))}")
print()
# Any env will do, it just needs to have torch for benchmark utils.
env_path = os.path.join(blas_compare_setup.WORKING_ROOT, BLAS_CONFIGS[0][1])
subprocess.run(
f"source activate {env_path} && "
f"python {os.path.abspath(__file__)} "
"--DETAIL-in-compare",
shell=True
)
if __name__ == "__main__":
# These flags are for subprocess control, not controlling the main loop.
parser = argparse.ArgumentParser()
parser.add_argument("--DETAIL-in-subprocess", "--DETAIL_in_subprocess", action="store_true")
parser.add_argument("--DETAIL-in-compare", "--DETAIL_in_compare", action="store_true")
parser.add_argument("--DETAIL-seed", "--DETAIL_seed", type=int, default=None)
parser.add_argument("--DETAIL-num-threads", "--DETAIL_num_threads", type=int, default=None)
parser.add_argument("--DETAIL-sub-label", "--DETAIL_sub_label", type=str, default="N/A")
parser.add_argument("--DETAIL-result-file", "--DETAIL_result_file", type=str, default=None)
parser.add_argument("--DETAIL-env", "--DETAIL_env", type=str, default=None)
args = parser.parse_args()
if args.DETAIL_in_subprocess:
try:
_subprocess_main(
args.DETAIL_seed,
args.DETAIL_num_threads,
args.DETAIL_sub_label,
args.DETAIL_result_file,
args.DETAIL_env,
)
except KeyboardInterrupt:
pass # Handle ctrl-c gracefully.
elif args.DETAIL_in_compare:
_compare_main()
else:
main()

View File

@ -1,426 +0,0 @@
"""End-to-end example to test a PR for regressions:
$ python -m examples.end_to_end --pr 39850
$ python -m examples.end_to_end --pr 39967
$ python -m examples.end_to_end --pr 39744
NOTE:
This example assumes that you have and environment prefixed with
`ref_`, and another prefixed with `pr_` for the PR
in question. (e.g. `ref_39850` and `pr_39850`).
A helper script (examples/prepare_e2e.sh) is provided to build
the required environments with the correct configuration.
"""
import argparse
import itertools as it
import multiprocessing
import multiprocessing.dummy
import os
import pickle
import queue
import subprocess
import tempfile
import textwrap
import numpy as np
import torch
from torch.utils.benchmark.op_fuzzers import unary
from torch.utils.benchmark import Timer, Measurement
from typing import Dict, Tuple, List
_MAIN, _SUBPROCESS = "main", "subprocess"
_PR_ENV_TEMPLATE = "pr_{pr}"
_REF_ENV_TEMPLATE = "ref_{pr}"
_PR_LIST = (
# Optimize topk performance for tensor with a large dimension size
"39850",
# Migrate `var` & `std` to ATen
"39967",
# Introducing (Const)StridedRandomAccessor + CompositeRandomAccessor + migrate `sort` to ATen (CPU)
"39744",
)
_CPU, _GPU = "cpu", "gpu"
_MIN_RUN_SEC = 1
_REPLICATES = {
_CPU: 5, # CPU has a higher variance.
_GPU: 1,
}
_RUNS_PER_LOOP = 3
_NUM_LOOPS = {
_CPU: 32,
_GPU: 64,
}
_DEVICES_TO_TEST = {
"39850": {_CPU: False, _GPU: True},
"39967": {_CPU: True, _GPU: True},
"39744": {_CPU: True, _GPU: True},
}
_AVAILABLE_GPUS = queue.Queue[int]()
_DTYPES_TO_TEST = {
"39850": ("int8", "float32", "float64"),
"39967": ("float32", "float64"),
"39744": ("int8", "float32", "float64"),
}
_DTYPE_STR_TO_DTYPE = {
"float64": torch.float64,
"float32": torch.float32,
"int8": torch.int8,
}
def parse_args():
parser = argparse.ArgumentParser()
parser.add_argument("--pr", type=str, default=_PR_LIST[0], choices=_PR_LIST)
parser.add_argument("--num-gpus", "--num_gpus", type=int, default=None)
parser.add_argument("--test-variance", "--test_variance", action="store_true")
# (Implementation details)
parser.add_argument("--DETAIL-context", "--DETAIL_context", type=str, choices=(_MAIN, _SUBPROCESS), default=_MAIN)
parser.add_argument("--DETAIL-device", "--DETAIL_device", type=str, choices=(_CPU, _GPU), default=None)
parser.add_argument("--DETAIL-env", "--DETAIL_env", type=str, default=None)
parser.add_argument("--DETAIL-result-file", "--DETAIL_result_file", type=str, default=None)
parser.add_argument("--DETAIL-seed", "--DETAIL_seed", type=int, default=None)
args = parser.parse_args()
if args.num_gpus is None:
args.num_gpus = torch.cuda.device_count()
return args
_SUBPROCESS_CMD_TEMPLATE = (
"source activate {source_env} && python -m examples.end_to_end "
"--pr {pr} "
"--DETAIL-context subprocess "
"--DETAIL-device {device} "
"--DETAIL-env {env} "
"--DETAIL-result-file {result_file} "
"--DETAIL-seed {seed}"
)
def construct_stmt_and_label(pr, params):
if pr == "39850":
k0, k1, k2, dim = (params[i] for i in ["k0", "k1", "k2", "dim"])
state = np.random.RandomState(params["random_value"])
topk_dim = state.randint(low=0, high=dim)
dim_size = [k0, k1, k2][topk_dim]
k = max(int(np.floor(2 ** state.uniform(low=0, high=np.log2(dim_size)))), 1)
return f"torch.topk(x, dim={topk_dim}, k={k})", "topk"
if pr == "39967":
return "torch.std(x)", "std"
if pr == "39744":
state = np.random.RandomState(params["random_value"])
sort_dim = state.randint(low=0, high=params["dim"])
return f"torch.sort(x, dim={sort_dim})", "sort"
raise ValueError("Unknown PR")
def subprocess_main(args):
seed = args.DETAIL_seed
cuda = (args.DETAIL_device == _GPU)
with open(args.DETAIL_result_file, "ab") as f:
for dtype_str in _DTYPES_TO_TEST[args.pr]:
dtype = _DTYPE_STR_TO_DTYPE[dtype_str]
iterator = unary.UnaryOpFuzzer(
seed=seed, dtype=dtype, cuda=cuda).take(_RUNS_PER_LOOP)
for i, (tensors, tensor_parameters, params) in enumerate(iterator):
params["dtype_str"] = dtype_str
stmt, label = construct_stmt_and_label(args.pr, params)
timer = Timer(
stmt=stmt,
globals=tensors,
label=label,
description=f"[{i}, seed={seed}] ({dtype_str}), stmt = {stmt}",
env=args.DETAIL_env,
)
measurement = timer.blocked_autorange(min_run_time=_MIN_RUN_SEC)
measurement.metadata = {
"tensor_parameters": tensor_parameters,
"params": params,
}
print(measurement)
pickle.dump(measurement, f)
def _main(args):
pools, map_iters, finished_counts = {}, {}, {}
pr = args.pr
envs = (_REF_ENV_TEMPLATE.format(pr=pr), _PR_ENV_TEMPLATE.format(pr=pr))
# We initialize both pools at the start so that they run simultaneously
# if applicable
if _DEVICES_TO_TEST[args.pr][_GPU]:
finished_counts[_GPU] = 0
for i in range(args.num_gpus):
_AVAILABLE_GPUS.put(i)
pools[_GPU] = multiprocessing.dummy.Pool(args.num_gpus)
trials = [
(seed, envs, pr, True, finished_counts, args.test_variance)
for seed in range(_NUM_LOOPS[_GPU])] * _REPLICATES[_GPU]
map_iters[_GPU] = pools[_GPU].imap(map_fn, trials)
if _DEVICES_TO_TEST[args.pr][_CPU]:
finished_counts[_CPU] = 0
cpu_workers = int(multiprocessing.cpu_count() / 3)
pools[_CPU] = multiprocessing.dummy.Pool(cpu_workers)
trials = [
(seed, envs, pr, False, finished_counts, args.test_variance)
for seed in range(_NUM_LOOPS[_CPU])] * _REPLICATES[_CPU]
map_iters[_CPU] = pools[_CPU].imap(map_fn, trials)
results = []
for map_iter in map_iters.values():
for r in map_iter:
results.append(r)
progress = [
f"{k}: {v} / {_NUM_LOOPS[k] * _REPLICATES[k]}"
for k, v in finished_counts.items()]
print(f"\r{(' ' * 10).join(progress)}", end="")
print()
for pool in pools.values():
pool.close()
process_results(results, args.test_variance)
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# == Data processing and string formatting ====================================
# /////////////////////////////////////////////////////////////////////////////
def merge(measurements):
if not measurements:
return None
states = [m.__getstate__() for m in measurements]
for k in states[0].keys():
if k in ("number_per_run", "times", "metadata"):
continue
assert all(s[k] == states[0][k] for s in states)
numbers_per_run = {m.number_per_run for m in measurements}
n = numbers_per_run.pop() if len(numbers_per_run) == 1 else 1
merged_state = states[0]
times = [[t / m.number_per_run * n for t in m.times] for m in measurements]
merged_state["times"] = list(it.chain(*times))
merged_state["number_per_run"] = n
merged_state["metadata"] = states[0]["metadata"]
return Measurement(**merged_state)
def process_results(results, test_variance):
paired_results: Dict[Tuple[str, str, int, bool, int], List] = {}
for (seed, use_gpu), result_batch in results:
for r in result_batch:
key = (r.label, r.description, r.num_threads, use_gpu, seed)
paired_results.setdefault(key, [[], []])
index = 0 if r.env.startswith("ref") else 1
paired_results[key][index].append(r)
paired_results = {
key: [merge(r_ref_list), merge(r_pr_list)]
for key, (r_ref_list, r_pr_list) in paired_results.items()
}
flagged_for_removal = set()
for key, (r_ref, r_pr) in paired_results.items():
if any(r is None or r.has_warnings for r in (r_ref, r_pr)):
flagged_for_removal.add(key)
paired_results = {
k: v for k, v in paired_results.items()
if k not in flagged_for_removal
}
print(f"{len(flagged_for_removal)} samples were culled, {len(paired_results)} remain")
gpu_results = [(k, v) for k, v in paired_results.items() if k[3]]
cpu_results = [(k, v) for k, v in paired_results.items() if not k[3]]
if cpu_results:
construct_table(cpu_results, "CPU", test_variance)
if gpu_results:
construct_table(gpu_results, "GPU", test_variance)
def construct_table(results, device_str, test_variance):
device_str = f"== {device_str} {' (Variance Test)' if test_variance else ''} ".ljust(40, "=")
print(f"{'=' * 40}\n{device_str}\n{'=' * 40}\n")
results = sorted((
(key, (r_ref, r_pr), r_pr.median / r_ref.median - 1)
for key, (r_ref, r_pr) in results
), key=lambda i: i[2])
n = len(results)
n_regressed = len([i for i in results if i[2] > 0.05])
n_improved = len([i for i in results if i[2] < -0.05])
n_unchanged = n - n_improved - n_regressed
legends = ["Improved (>5%):", "Regressed (>5%):", "Within 5%:"]
for legend, count in zip(legends, [n_improved, n_regressed, n_unchanged]):
print(f"{legend:<17} {count:>6} ({count / len(results) * 100:>3.0f}%)")
keys_to_print = (
{i[0] for i in results[20:30]} |
{i[0] for i in results[int(n // 2 - 5):int(n // 2 + 5)]} |
{i[0] for i in results[-30:-20]}
)
ellipsis_after = {results[29][0], results[int(n // 2 + 4)][0]}
column_labels = (
f"Relative Δ Absolute Δ | numel{'':>8}dtype{'':>14}"
f"shape{'':>10}steps{'':>10}layout{'':>7}task specific\n{'=' * 126}"
)
_, result_log_file = tempfile.mkstemp(suffix=".log")
with open(result_log_file, "w") as f:
f.write(f"{device_str}\n\n{column_labels}\n")
print(f"\n{column_labels}\n[First twenty omitted (these tend to be noisy) ]")
for key, (r_ref, r_pr), rel_diff in results:
row = row_str(rel_diff, r_pr.median - r_ref.median, r_ref)
f.write(f"{row}\n")
if key in keys_to_print:
print(row)
if key in ellipsis_after:
print("...")
print("[Last twenty omitted (these tend to be noisy) ]")
print(textwrap.dedent("""
steps:
Indicates that `x` is sliced from a larger Tensor. For instance, if
shape is [12, 4] and steps are [2, 1], then a larger Tensor of size
[24, 4] was created, and then x = base_tensor[::2, ::1]. Omitted if
all elements are ones.
layout:
Indicates that `x` is not contiguous due to permutation. Invoking
`x.permute(layout)` (e.g. x.permute((2, 0, 1)) if layout = [2, 0, 1])
would produce a Tensor with physical memory layout matching logical
memory layout. (Though still not contiguous if `steps` contains
non-one elements.)
"""))
print(f"\nComplete results in: {result_log_file}")
def row_str(rel_diff, diff_seconds, measurement):
params = measurement.metadata["params"]
tensor_parameters = measurement.metadata["tensor_parameters"]
dim = params["dim"]
x_numel = tensor_parameters["x"]["numel"]
steps = [params[f"x_step_{i}"] for i in range(dim)]
order = tensor_parameters['x']["order"]
order = str("" if all(i == j for i, j in zip(order, range(dim))) else order)
task_specific = ""
if measurement.stmt.startswith("torch.topk"):
dim_str, k_str = measurement.stmt[:-1].replace("torch.topk(x, ", "").split(", ")
task_specific = f"{dim_str}, {k_str:<8}"
elif measurement.stmt.startswith("torch.std"):
pass
elif measurement.stmt.startswith("torch.sort"):
task_specific = measurement.stmt[:-1].replace("torch.sort(x, ", "")
return (
f"{rel_diff * 100:>5.0f}% {abs(diff_seconds) * 1e6:>11.1f} us{'':>6}|"
f"{x_numel:>12} {params['dtype_str']:>10} "
f"{str([params[f'k{i}'] for i in range(dim)]):>17} "
f"{str(steps) if not all(i == 1 for i in steps) else '':>12} {order:>12}"
f"{'':>8}{task_specific}"
)
# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
# == Subprocess and environment management ====================================
# /////////////////////////////////////////////////////////////////////////////
def read_results(result_file: str):
output = []
with open(result_file, "rb") as f:
while True:
try:
output.append(pickle.load(f))
except EOFError:
break
return output
def run(cmd, cuda_visible_devices=""):
return subprocess.run(
cmd,
env={
"CUDA_VISIBLE_DEVICES": str(cuda_visible_devices),
"PATH": os.getenv("PATH", ""),
},
stdout=subprocess.PIPE,
shell=True
)
def test_source(envs):
"""Ensure that subprocess"""
for env in envs:
result = run(f"source activate {env}")
if result.returncode != 0:
raise ValueError(f"Failed to source environment `{env}`")
def map_fn(args):
seed, envs, pr, use_gpu, finished_counts, test_variance = args
gpu = _AVAILABLE_GPUS.get() if use_gpu else None
try:
_, result_file = tempfile.mkstemp(suffix=".pkl")
for env in envs:
cmd = _SUBPROCESS_CMD_TEMPLATE.format(
source_env=envs[0] if test_variance else env,
env=env, pr=pr, device=_GPU if use_gpu else _CPU,
result_file=result_file, seed=seed,
)
run(cmd=cmd, cuda_visible_devices=gpu if use_gpu else "")
finished_counts[_GPU if use_gpu else _CPU] += 1
return (seed, use_gpu), read_results(result_file)
except KeyboardInterrupt:
pass # Handle ctrl-c gracefully.
finally:
if gpu is not None:
_AVAILABLE_GPUS.put(gpu)
if os.path.exists(result_file):
os.remove(result_file)
def main(args):
test_source([
_REF_ENV_TEMPLATE.format(pr=args.pr),
_PR_ENV_TEMPLATE.format(pr=args.pr),
])
_main(args)
if __name__ == "__main__":
args = parse_args()
if args.DETAIL_context == "main":
main(args)
if args.DETAIL_context == "subprocess":
try:
subprocess_main(args)
except KeyboardInterrupt:
pass # Handle ctrl-c gracefully.