diff --git a/docs/source/amp.rst b/docs/source/amp.rst
index 17a3867d4adf..d3ba6e3ba930 100644
--- a/docs/source/amp.rst
+++ b/docs/source/amp.rst
@@ -384,3 +384,12 @@ Some ops not listed here (e.g., binary ops like ``add``) natively promote
 inputs without autocasting's intervention.  If inputs are a mixture of ``bfloat16``
 and ``float32``, these ops run in ``float32`` and produce ``float32`` output,
 regardless of whether autocast is enabled.
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.amp.autocast_mode
+.. py:module:: torch.cpu.amp.autocast_mode
+.. py:module:: torch.cuda.amp.autocast_mode
+.. py:module:: torch.cuda.amp.common
+.. py:module:: torch.cuda.amp.grad_scaler
\ No newline at end of file
diff --git a/docs/source/autograd.rst b/docs/source/autograd.rst
index 7b31d8942179..fa4644847a6b 100644
--- a/docs/source/autograd.rst
+++ b/docs/source/autograd.rst
@@ -214,6 +214,10 @@ When creating a new :class:`Function`, the following methods are available to `c
 Numerical gradient checking
 ^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
+
+.. automodule:: torch.autograd.gradcheck
+.. currentmodule:: torch.autograd.gradcheck
+
 .. autosummary::
     :toctree: generated
     :nosignatures:
@@ -221,6 +225,9 @@ Numerical gradient checking
     gradcheck
     gradgradcheck
 
+.. Just to reset the base path for the rest of this file
+.. currentmodule:: torch.autograd
+
 Profiler
 ^^^^^^^^
 
@@ -309,3 +316,17 @@ Also see :ref:`saved-tensors-hooks-doc`.
 .. autoclass:: torch.autograd.graph.register_multi_grad_hook
 
 .. autoclass:: torch.autograd.graph.allow_mutation_on_saved_tensors
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.autograd.anomaly_mode
+.. py:module:: torch.autograd.forward_ad
+.. py:module:: torch.autograd.function
+.. py:module:: torch.autograd.functional
+.. py:module:: torch.autograd.grad_mode
+.. py:module:: torch.autograd.graph
+.. py:module:: torch.autograd.profiler
+.. py:module:: torch.autograd.profiler_legacy
+.. py:module:: torch.autograd.profiler_util
+.. py:module:: torch.autograd.variable
diff --git a/docs/source/backends.rst b/docs/source/backends.rst
index f89f6132ed0f..bf6d4e5ae134 100644
--- a/docs/source/backends.rst
+++ b/docs/source/backends.rst
@@ -117,6 +117,7 @@ torch.backends.cudnn
     available algorithm. Note that this setting only affects convolutions dispatched via the
     cuDNN v8 API.
 
+.. py:module:: torch.backends.cudnn.rnn
 
 torch.backends.mps
 ^^^^^^^^^^^^^^^^^^
@@ -187,3 +188,4 @@ torch.backends.opt_einsum
 torch.backends.xeon
 ^^^^^^^^^^^^^^^^^^^
 .. automodule:: torch.backends.xeon
+.. py:module:: torch.backends.xeon.run_cpu
diff --git a/docs/source/conf.py b/docs/source/conf.py
index 6039a5b8265c..d9785ec7dd5d 100644
--- a/docs/source/conf.py
+++ b/docs/source/conf.py
@@ -103,6 +103,9 @@ coverage_ignore_functions = [
     "zip_by_key",
     # torch.distributed.autograd
     "is_available",
+    # torch.distributed.checkpoint.state_dict
+    "gc_context",
+    "state_dict",
     # torch.distributed.elastic.events
     "construct_and_record_rdzv_event",
     "record_rdzv_event",
@@ -157,6 +160,1950 @@ coverage_ignore_functions = [
     "get_model_info",
     "get_storage_info",
     "hierarchical_pickle",
+    # torch.amp.autocast_mode
+    "autocast_decorator",
+    # torch.ao.nn.quantized.dynamic.modules.rnn
+    "apply_permutation",
+    "pack_weight_bias",
+    # torch.ao.nn.quantized.reference.modules.rnn
+    "get_quantized_weight",
+    # torch.ao.ns.fx.graph_matcher
+    "get_matching_subgraph_pairs",
+    # torch.ao.ns.fx.graph_passes
+    "add_loggers_to_model",
+    "create_a_shadows_b",
+    # torch.ao.ns.fx.mappings
+    "add_op_to_sets_of_related_ops",
+    "get_base_name_for_op",
+    "get_base_name_to_sets_of_related_ops",
+    "get_node_type_to_io_type_map",
+    "get_unmatchable_types_map",
+    # torch.ao.ns.fx.n_shadows_utils
+    "create_add_loggers_graph",
+    "create_n_transformed_and_logged_copies_of_subgraph",
+    "create_one_transformed_and_logged_copy_of_subgraph",
+    "create_results_comparison",
+    "create_submodule_from_subgraph",
+    "extract_weight_comparison",
+    "group_results_by_subgraph",
+    "print_n_shadows_summary",
+    # torch.ao.ns.fx.pattern_utils
+    "end_node_matches_reversed_fusion",
+    "get_reversed_fusions",
+    "get_type_a_related_to_b",
+    # torch.ao.ns.fx.utils
+    "get_arg_indices_of_inputs_to_log",
+    "get_node_first_input_and_output_type",
+    "get_node_input_qparams",
+    "get_normalized_nth_input",
+    "get_number_of_non_param_args",
+    "get_target_type_str",
+    "maybe_add_missing_fqns",
+    "maybe_dequantize_first_two_tensor_args_and_handle_tuples",
+    "op_type_supports_shadowing",
+    "rekey_logger_info_on_node_name_of_model",
+    "return_first_non_observer_node",
+    # torch.ao.ns.fx.weight_utils
+    "extract_weight_from_node",
+    "get_conv_fun_weight",
+    "get_conv_mod_weight",
+    "get_linear_fun_weight",
+    "get_linear_mod_weight",
+    "get_lstm_mod_weights",
+    "get_lstm_weight",
+    "get_op_to_type_to_weight_extraction_fn",
+    "get_qconv_fun_weight",
+    "get_qlinear_fun_weight",
+    "get_qlstm_weight",
+    "mod_0_weight_detach",
+    "mod_weight_bias_0",
+    "mod_weight_detach",
+    # torch.ao.pruning.sparsifier.utils
+    "fqn_to_module",
+    "get_arg_info_from_tensor_fqn",
+    "module_contains_param",
+    "module_to_fqn",
+    "swap_module",
+    # torch.ao.quantization.backend_config.executorch
+    "get_executorch_backend_config",
+    # torch.ao.quantization.backend_config.fbgemm
+    "get_fbgemm_backend_config",
+    # torch.ao.quantization.backend_config.native
+    "get_native_backend_config",
+    "get_native_backend_config_dict",
+    "get_test_only_legacy_native_backend_config",
+    "get_test_only_legacy_native_backend_config_dict",
+    # torch.ao.quantization.backend_config.onednn
+    "get_onednn_backend_config",
+    # torch.ao.quantization.backend_config.qnnpack
+    "get_qnnpack_backend_config",
+    # torch.ao.quantization.backend_config.tensorrt
+    "get_tensorrt_backend_config",
+    "get_tensorrt_backend_config_dict",
+    # torch.ao.quantization.backend_config.utils
+    "entry_to_pretty_str",
+    "get_fused_module_classes",
+    "get_fuser_method_mapping",
+    "get_fusion_pattern_to_extra_inputs_getter",
+    "get_fusion_pattern_to_root_node_getter",
+    "get_module_to_qat_module",
+    "get_pattern_to_dtype_configs",
+    "get_pattern_to_input_type_to_index",
+    "get_qat_module_classes",
+    "get_root_module_to_quantized_reference_module",
+    "pattern_to_human_readable",
+    "remove_boolean_dispatch_from_name",
+    # torch.ao.quantization.backend_config.x86
+    "get_x86_backend_config",
+    # torch.ao.quantization.fuse_modules
+    "fuse_known_modules",
+    "fuse_modules_qat",
+    # torch.ao.quantization.fuser_method_mappings
+    "fuse_conv_bn",
+    "fuse_conv_bn_relu",
+    "fuse_convtranspose_bn",
+    "fuse_linear_bn",
+    "get_fuser_method",
+    "get_fuser_method_new",
+    # torch.ao.quantization.fx.convert
+    "convert",
+    "convert_custom_module",
+    "convert_standalone_module",
+    "convert_weighted_module",
+    # torch.ao.quantization.fx.fuse
+    "fuse",
+    # torch.ao.quantization.fx.lower_to_fbgemm
+    "lower_to_fbgemm",
+    # torch.ao.quantization.fx.lower_to_qnnpack
+    "lower_to_qnnpack",
+    # torch.ao.quantization.fx.pattern_utils
+    "get_default_fusion_patterns",
+    "get_default_output_activation_post_process_map",
+    "get_default_quant_patterns",
+    # torch.ao.quantization.fx.prepare
+    "insert_observers_for_model",
+    "prepare",
+    "propagate_dtypes_for_known_nodes",
+    # torch.ao.quantization.fx.utils
+    "all_node_args_except_first",
+    "all_node_args_have_no_tensors",
+    "assert_and_get_unique_device",
+    "collect_producer_nodes",
+    "create_getattr_from_value",
+    "create_node_from_old_node_preserve_meta",
+    "get_custom_module_class_keys",
+    "get_linear_prepack_op_for_dtype",
+    "get_new_attr_name_with_prefix",
+    "get_non_observable_arg_indexes_and_types",
+    "get_qconv_prepack_op",
+    "get_skipped_module_name_and_classes",
+    "graph_module_from_producer_nodes",
+    "maybe_get_next_module",
+    "node_arg_is_bias",
+    "node_arg_is_weight",
+    "return_arg_list",
+    # torch.ao.quantization.pt2e.graph_utils
+    "find_sequential_partitions",
+    "get_equivalent_types",
+    "update_equivalent_types_dict",
+    # torch.ao.quantization.pt2e.prepare
+    "prepare",
+    # torch.ao.quantization.pt2e.representation.rewrite
+    "reference_representation_rewrite",
+    # torch.ao.quantization.pt2e.utils
+    "fold_bn_weights_into_conv_node",
+    "get_aten_graph_module",
+    "remove_tensor_overload_for_qdq_ops",
+    # torch.ao.quantization.qconfig
+    "get_default_qat_qconfig",
+    "get_default_qat_qconfig_dict",
+    "get_default_qconfig",
+    "get_default_qconfig_dict",
+    "qconfig_equals",
+    # torch.ao.quantization.quantization_mappings
+    "get_default_compare_output_module_list",
+    "get_default_dynamic_quant_module_mappings",
+    "get_default_dynamic_sparse_quant_module_mappings",
+    "get_default_float_to_quantized_operator_mappings",
+    "get_default_qat_module_mappings",
+    "get_default_qconfig_propagation_list",
+    "get_default_static_quant_module_mappings",
+    "get_default_static_quant_reference_module_mappings",
+    "get_default_static_sparse_quant_module_mappings",
+    "get_dynamic_quant_module_class",
+    "get_embedding_qat_module_mappings",
+    "get_embedding_static_quant_module_mappings",
+    "get_quantized_operator",
+    "get_static_quant_module_class",
+    "no_observer_set",
+    # torch.ao.quantization.quantize
+    "get_default_custom_config_dict",
+    # torch.ao.quantization.quantize_fx
+    "attach_preserved_attrs_to_model",
+    "convert_to_reference_fx",
+    # torch.ao.quantization.quantize_jit
+    "convert_dynamic_jit",
+    "convert_jit",
+    "fuse_conv_bn_jit",
+    "prepare_dynamic_jit",
+    "prepare_jit",
+    "quantize_dynamic_jit",
+    "quantize_jit",
+    "script_qconfig",
+    "script_qconfig_dict",
+    # torch.ao.quantization.quantize_pt2e
+    "convert_pt2e",
+    "prepare_pt2e",
+    "prepare_qat_pt2e",
+    # torch.ao.quantization.quantizer.embedding_quantizer
+    "get_embedding_operators_config",
+    # torch.ao.quantization.quantizer.xnnpack_quantizer_utils
+    "convert_scalars_to_attrs",
+    "get_bias_qspec",
+    "get_input_act_qspec",
+    "get_output_act_qspec",
+    "get_weight_qspec",
+    "propagate_annotation",
+    "register_annotator",
+    # torch.ao.quantization.utils
+    "activation_dtype",
+    "activation_is_dynamically_quantized",
+    "activation_is_int32_quantized",
+    "activation_is_int8_quantized",
+    "activation_is_statically_quantized",
+    "calculate_qmin_qmax",
+    "check_min_max_valid",
+    "check_node",
+    "determine_qparams",
+    "get_combined_dict",
+    "get_fqn_to_example_inputs",
+    "get_qconfig_dtypes",
+    "get_qparam_dict",
+    "get_quant_type",
+    "get_swapped_custom_module_class",
+    "getattr_from_fqn",
+    "has_no_children_ignoring_parametrizations",
+    "is_per_channel",
+    "is_per_tensor",
+    "op_is_int8_dynamically_quantized",
+    "to_underlying_dtype",
+    "validate_qmin_qmax",
+    "weight_dtype",
+    "weight_is_quantized",
+    "weight_is_statically_quantized",
+    # torch.autograd.forward_ad
+    "enter_dual_level",
+    "exit_dual_level",
+    # torch.autograd.function
+    "once_differentiable",
+    "traceable",
+    # torch.autograd.gradcheck
+    "get_analytical_jacobian",
+    "get_numerical_jacobian",
+    "get_numerical_jacobian_wrt_specific_input",
+    # torch.autograd.graph
+    "increment_version",
+    # torch.autograd.profiler
+    "parse_nvprof_trace",
+    # torch.backends.cudnn.rnn
+    "get_cudnn_mode",
+    "init_dropout_state",
+    # torch.backends.xeon.run_cpu
+    "create_args",
+    # torch.cuda.amp.autocast_mode
+    "custom_bwd",
+    "custom_fwd",
+    # torch.cuda.amp.common
+    "amp_definitely_not_available",
+    # torch.cuda.graphs
+    "graph_pool_handle",
+    "is_current_stream_capturing",
+    "make_graphed_callables",
+    # torch.cuda.memory
+    "caching_allocator_alloc",
+    "caching_allocator_delete",
+    "change_current_allocator",
+    "empty_cache",
+    "get_allocator_backend",
+    "list_gpu_processes",
+    "max_memory_allocated",
+    "max_memory_cached",
+    "max_memory_reserved",
+    "mem_get_info",
+    "memory_allocated",
+    "memory_cached",
+    "memory_reserved",
+    "memory_snapshot",
+    "memory_stats",
+    "memory_stats_as_nested_dict",
+    "memory_summary",
+    "reset_accumulated_memory_stats",
+    "reset_max_memory_allocated",
+    "reset_max_memory_cached",
+    "reset_peak_memory_stats",
+    "set_per_process_memory_fraction",
+    # torch.cuda.nccl
+    "all_gather",
+    "all_reduce",
+    "broadcast",
+    "init_rank",
+    "reduce",
+    "reduce_scatter",
+    "unique_id",
+    "version",
+    # torch.cuda.nvtx
+    "range",
+    "range_end",
+    "range_start",
+    # torch.cuda.profiler
+    "init",
+    "profile",
+    "start",
+    "stop",
+    # torch.cuda.random
+    "get_rng_state",
+    "get_rng_state_all",
+    "initial_seed",
+    "manual_seed",
+    "manual_seed_all",
+    "seed",
+    "seed_all",
+    "set_rng_state",
+    "set_rng_state_all",
+    # torch.distributed.algorithms.ddp_comm_hooks.ddp_zero_hook
+    "hook_with_zero_step",
+    "hook_with_zero_step_interleaved",
+    # torch.distributed.algorithms.ddp_comm_hooks.post_localSGD_hook
+    "post_localSGD_hook",
+    # torch.distributed.algorithms.ddp_comm_hooks.quantization_hooks
+    "quantization_perchannel_hook",
+    "quantization_pertensor_hook",
+    # torch.distributed.algorithms.model_averaging.utils
+    "average_parameters",
+    "average_parameters_or_parameter_groups",
+    "get_params_to_average",
+    # torch.distributed.checkpoint.default_planner
+    "create_default_global_load_plan",
+    "create_default_global_save_plan",
+    "create_default_local_load_plan",
+    "create_default_local_save_plan",
+    # torch.distributed.checkpoint.optimizer
+    "load_sharded_optimizer_state_dict",
+    # torch.distributed.checkpoint.planner_helpers
+    "create_read_items_for_chunk_list",
+    # torch.distributed.checkpoint.state_dict_loader
+    "load_state_dict",
+    # torch.distributed.checkpoint.state_dict_saver
+    "save_state_dict",
+    # torch.distributed.checkpoint.utils
+    "find_state_dict_object",
+    "find_tensor_shard",
+    # torch.distributed.collective_utils
+    "all_gather",
+    "all_gather_object_enforce_type",
+    "broadcast",
+    # torch.distributed.distributed_c10d
+    "all_gather",
+    "all_gather_coalesced",
+    "all_gather_into_tensor",
+    "all_gather_multigpu",
+    "all_gather_object",
+    "all_reduce",
+    "all_reduce_coalesced",
+    "all_reduce_multigpu",
+    "all_to_all",
+    "all_to_all_single",
+    "barrier",
+    "batch_isend_irecv",
+    "broadcast",
+    "broadcast_multigpu",
+    "broadcast_object_list",
+    "destroy_process_group",
+    "gather",
+    "gather_object",
+    "get_backend",
+    "get_backend_config",
+    "get_global_rank",
+    "get_group_rank",
+    "get_process_group_ranks",
+    "get_rank",
+    "get_world_size",
+    "init_process_group",
+    "irecv",
+    "is_backend_available",
+    "is_gloo_available",
+    "is_initialized",
+    "is_mpi_available",
+    "is_nccl_available",
+    "is_torchelastic_launched",
+    "is_ucc_available",
+    "isend",
+    "monitored_barrier",
+    "new_group",
+    "new_subgroups",
+    "new_subgroups_by_enumeration",
+    "recv",
+    "reduce",
+    "reduce_multigpu",
+    "reduce_scatter",
+    "reduce_scatter_multigpu",
+    "reduce_scatter_tensor",
+    "scatter",
+    "scatter_object_list",
+    "send",
+    "supports_complex",
+    # torch.distributed.elastic.events.handlers
+    "get_logging_handler",
+    # torch.distributed.elastic.metrics.api
+    "configure",
+    "getStream",
+    "get_elapsed_time_ms",
+    "prof",
+    "profile",
+    "publish_metric",
+    "put_metric",
+    # torch.distributed.elastic.multiprocessing.api
+    "get_std_cm",
+    "to_map",
+    # torch.distributed.elastic.multiprocessing.errors.handlers
+    "get_error_handler",
+    # torch.distributed.elastic.multiprocessing.redirects
+    "get_libc",
+    "redirect",
+    # torch.distributed.elastic.multiprocessing.tail_log
+    "tail_logfile",
+    # torch.distributed.elastic.rendezvous.dynamic_rendezvous
+    "get_method_name",
+    # torch.distributed.elastic.rendezvous.etcd_rendezvous
+    "create_rdzv_handler",
+    # torch.distributed.elastic.rendezvous.etcd_server
+    "find_free_port",
+    "stop_etcd",
+    # torch.distributed.elastic.rendezvous.etcd_store
+    "cas_delay",
+    # torch.distributed.elastic.rendezvous.static_tcp_rendezvous
+    "create_rdzv_handler",
+    # torch.distributed.elastic.rendezvous.utils
+    "parse_rendezvous_endpoint",
+    # torch.distributed.elastic.timer.api
+    "configure",
+    "expires",
+    # torch.distributed.elastic.utils.api
+    "get_env_variable_or_raise",
+    "get_socket_with_port",
+    # torch.distributed.elastic.utils.distributed
+    "create_c10d_store",
+    "get_free_port",
+    "get_socket_with_port",
+    # torch.distributed.elastic.utils.log_level
+    "get_log_level",
+    # torch.distributed.elastic.utils.logging
+    "get_logger",
+    # torch.distributed.elastic.utils.store
+    "barrier",
+    "get_all",
+    "synchronize",
+    # torch.distributed.fsdp.wrap
+    "always_wrap_policy",
+    "enable_wrap",
+    "lambda_auto_wrap_policy",
+    "size_based_auto_wrap_policy",
+    "transformer_auto_wrap_policy",
+    "wrap",
+    # torch.distributed.nn.functional
+    "all_gather",
+    "all_reduce",
+    "all_to_all",
+    "all_to_all_single",
+    "broadcast",
+    "gather",
+    "reduce",
+    "reduce_scatter",
+    "scatter",
+    # torch.distributed.nn.jit.instantiator
+    "get_arg_return_types_from_interface",
+    "instantiate_non_scriptable_remote_module_template",
+    "instantiate_scriptable_remote_module_template",
+    # torch.distributed.nn.jit.templates.remote_module_template
+    "get_remote_module_template",
+    # torch.distributed.optim.utils
+    "as_functional_optim",
+    "register_functional_optim",
+    # torch.distributed.pipeline.sync.checkpoint
+    "checkpoint",
+    "enable_checkpointing",
+    "enable_recomputing",
+    "is_checkpointing",
+    "is_recomputing",
+    "restore_rng_states",
+    "save_rng_states",
+    # torch.distributed.pipeline.sync.dependency
+    "fork",
+    "join",
+    # torch.distributed.pipeline.sync.microbatch
+    "check",
+    "gather",
+    "scatter",
+    # torch.distributed.pipeline.sync.phony
+    "get_phony",
+    # torch.distributed.pipeline.sync.skip.layout
+    "inspect_skip_layout",
+    # torch.distributed.pipeline.sync.skip.tracker
+    "current_skip_tracker",
+    "use_skip_tracker",
+    # torch.distributed.pipeline.sync.stream
+    "as_cuda",
+    "current_stream",
+    "default_stream",
+    "get_device",
+    "is_cuda",
+    "new_stream",
+    "record_stream",
+    "use_device",
+    "use_stream",
+    "wait_stream",
+    # torch.distributed.pipeline.sync.utils
+    "partition_model",
+    # torch.distributed.pipeline.sync.worker
+    "create_workers",
+    "spawn_workers",
+    "worker",
+    # torch.distributed.rendezvous
+    "register_rendezvous_handler",
+    "rendezvous",
+    # torch.distributed.rpc.api
+    "get_worker_info",
+    "method_factory",
+    "new_method",
+    "remote",
+    "rpc_async",
+    "rpc_sync",
+    "shutdown",
+    # torch.distributed.rpc.backend_registry
+    "backend_registered",
+    "construct_rpc_backend_options",
+    "init_backend",
+    "register_backend",
+    # torch.distributed.rpc.internal
+    "deserialize",
+    "serialize",
+    # torch.distributed.tensor.parallel.api
+    "parallelize_module",
+    # torch.distributed.tensor.parallel.input_reshard
+    "input_reshard",
+    # torch.distributed.tensor.parallel.style
+    "make_sharded_output_tensor",
+    # torch.distributions.utils
+    "broadcast_all",
+    "clamp_probs",
+    "logits_to_probs",
+    "probs_to_logits",
+    "tril_matrix_to_vec",
+    "vec_to_tril_matrix",
+    # torch.functional
+    "align_tensors",
+    "atleast_1d",
+    "atleast_2d",
+    "atleast_3d",
+    "block_diag",
+    "broadcast_shapes",
+    "broadcast_tensors",
+    "cartesian_prod",
+    "cdist",
+    "chain_matmul",
+    "einsum",
+    "lu",
+    "meshgrid",
+    "norm",
+    "split",
+    "stft",
+    "tensordot",
+    "unique",
+    "unique_consecutive",
+    # torch.fx.annotate
+    "annotate",
+    # torch.fx.experimental.accelerator_partitioner
+    "check_dependency",
+    "combine_two_partitions",
+    "get_bfs_level_partition",
+    "get_device_partition_stats",
+    "get_device_to_partitions_mapping",
+    "get_logical_id_to_device",
+    "get_node_to_partition_mapping",
+    "reorganize_partitions",
+    "reset_partition_device",
+    "set_parents_and_children",
+    # torch.fx.experimental.const_fold
+    "get_unique_attr_name_in_module",
+    "split_const_subgraphs",
+    # torch.fx.experimental.debug
+    "set_trace",
+    # torch.fx.experimental.graph_gradual_typechecker
+    "adaptiveavgpool2d_check",
+    "adaptiveavgpool2d_inference_rule",
+    "add_inference_rule",
+    "all_eq",
+    "bn2d_inference_rule",
+    "broadcast_types",
+    "calculate_out_dimension",
+    "conv2d_inference_rule",
+    "conv_refinement_rule",
+    "conv_rule",
+    "element_wise_eq",
+    "expand_to_tensor_dim",
+    "first_two_eq",
+    "flatten_check",
+    "flatten_inference_rule",
+    "flatten_refinement_rule",
+    "get_attr_inference_rule",
+    "get_greatest_upper_bound",
+    "get_parameter",
+    "linear_check",
+    "linear_inference_rule",
+    "linear_refinement_rule",
+    "maxpool2d_check",
+    "maxpool2d_inference_rule",
+    "register_algebraic_expressions_inference_rule",
+    "register_inference_rule",
+    "register_refinement_rule",
+    "relu_inference_rule",
+    "reshape_inference_rule",
+    "transpose_inference_rule",
+    # torch.fx.experimental.merge_matmul
+    "are_nodes_independent",
+    "may_depend_on",
+    "merge_matmul",
+    "split_result_tensors",
+    # torch.fx.experimental.meta_tracer
+    "embedding_override",
+    "functional_relu_override",
+    "gen_constructor_wrapper",
+    "nn_layernorm_override",
+    "proxys_to_metas",
+    "symbolic_trace",
+    "torch_abs_override",
+    "torch_nn_relu_override",
+    "torch_relu_override",
+    "torch_where_override",
+    # torch.fx.experimental.migrate_gradual_types.constraint
+    "is_algebraic_expression",
+    "is_bool_expr",
+    "is_dim",
+    # torch.fx.experimental.migrate_gradual_types.constraint_generator
+    "adaptive_inference_rule",
+    "add_layer_norm_constraints",
+    "add_linear_constraints",
+    "arange_inference_rule",
+    "assert_inference_rule",
+    "batchnorm_inference_rule",
+    "bmm_inference_rule",
+    "broadcasting_inference_rule",
+    "conv2d_inference_rule",
+    "cumsum_inference_rule",
+    "embedding_inference_rule",
+    "embedding_inference_rule_functional",
+    "eq_inference_rule",
+    "equality_inference_rule",
+    "expand_inference_rule",
+    "flatten_inference_rule",
+    "full_inference_rule",
+    "gen_broadcasting_constraints",
+    "gen_embedding_rules",
+    "gen_layer_norm_constraints",
+    "generate_flatten_constraints",
+    "get_attr_inference_rule",
+    "getitem_inference_rule",
+    "gt_inference_rule",
+    "index_select_inference_rule",
+    "layer_norm_functional",
+    "layer_norm_inference_rule",
+    "linear_constraints",
+    "linear_inference_rule",
+    "lt_inference_rule",
+    "masked_fill_inference_rule",
+    "maxpool_inference_rule",
+    "neq_inference_rule",
+    "range_check",
+    "register_inference_rule",
+    "relu_inference_rule",
+    "reshape_inference_rule",
+    "size_inference_rule",
+    "tensor_inference_rule",
+    "torch_dim_inference_rule",
+    "torch_linear_inference_rule",
+    "transpose_inference_rule",
+    "type_inference_rule",
+    "view_inference_rule",
+    # torch.fx.experimental.migrate_gradual_types.constraint_transformation
+    "apply_padding",
+    "broadcast_dim",
+    "calc_last_two_dims",
+    "create_equality_constraints_for_broadcasting",
+    "gen_all_reshape_possibilities",
+    "gen_broadcasting_constraints",
+    "gen_consistency_constraints",
+    "gen_greatest_upper_bound",
+    "gen_lists_of_dims",
+    "generate_all_broadcasting_possibilities_no_padding",
+    "generate_all_int_dyn_dim_possibilities",
+    "generate_binconstraint_d",
+    "generate_binconstraint_t",
+    "generate_broadcasting",
+    "generate_calc_conv",
+    "generate_calc_maxpool",
+    "generate_calc_product",
+    "generate_conj",
+    "generate_d_gub",
+    "generate_disj",
+    "generate_gub",
+    "generate_reshape",
+    "is_dim_div_by_target",
+    "is_target_div_by_dim",
+    "no_broadcast_dim_with_index",
+    "register_transformation_rule",
+    "transform_constraint",
+    "transform_get_item",
+    "transform_get_item_tensor",
+    "transform_index_select",
+    "transform_transpose",
+    "valid_index",
+    "valid_index_tensor",
+    # torch.fx.experimental.migrate_gradual_types.transform_to_z3
+    "evaluate_conditional_with_constraints",
+    # torch.fx.experimental.migrate_gradual_types.util
+    "gen_bvar",
+    "gen_dvar",
+    "gen_nat_constraints",
+    "gen_tensor_dims",
+    "gen_tvar",
+    # torch.fx.experimental.optimization
+    "extract_subgraph",
+    "fuse",
+    "gen_mkl_autotuner",
+    "matches_module_pattern",
+    "modules_to_mkldnn",
+    "optimize_for_inference",
+    "remove_dropout",
+    "replace_node_module",
+    "reset_modules",
+    "use_mkl_length",
+    # torch.fx.experimental.partitioner_utils
+    "get_comm_latency_between",
+    "get_extra_size_of",
+    "get_latency_of_one_partition",
+    "get_latency_of_partitioned_graph",
+    "get_partition_to_latency_mapping",
+    # torch.fx.experimental.proxy_tensor
+    "decompose",
+    "disable_autocast_cache",
+    "disable_proxy_modes_tracing",
+    "dispatch_trace",
+    "extract_val",
+    "fake_signature",
+    "fetch_sym_proxy",
+    "fetch_tensor_proxy",
+    "get_innermost_proxy_mode",
+    "get_isolated_graphmodule",
+    "get_proxy_slot",
+    "get_torch_dispatch_modes",
+    "has_proxy_slot",
+    "is_sym_node",
+    "make_fx",
+    "maybe_disable_fake_tensor_mode",
+    "maybe_handle_decomp",
+    "proxy_call",
+    "set_meta",
+    "set_original_aten_op",
+    "set_proxy_slot",
+    "snapshot_fake",
+    "thunkify",
+    "track_tensor",
+    "track_tensor_tree",
+    "wrap_key",
+    "wrapper_and_args_for_make_fx",
+    # torch.fx.experimental.recording
+    "record_shapeenv_event",
+    "replay_shape_env_events",
+    "shape_env_check_state_equal",
+    # torch.fx.experimental.symbolic_shapes
+    "bind_symbols",
+    "cast_symbool_to_symint_guardless",
+    "ceil_impl",
+    "constrain_range",
+    "constrain_unify",
+    "create_contiguous",
+    "definitely_false",
+    "definitely_true",
+    "error",
+    "eval_guards",
+    "eval_is_non_overlapping_and_dense",
+    "expect_true",
+    "find_symbol_binding_fx_nodes",
+    "floor_ceil_helper",
+    "floor_impl",
+    "free_symbols",
+    "free_unbacked_symbols",
+    "fx_placeholder_targets",
+    "fx_placeholder_vals",
+    "guard_bool",
+    "guard_float",
+    "guard_int",
+    "guard_scalar",
+    "has_hint",
+    "has_symbolic_sizes_strides",
+    "hint_int",
+    "is_channels_last_contiguous_2d",
+    "is_channels_last_contiguous_3d",
+    "is_channels_last_strides_2d",
+    "is_channels_last_strides_3d",
+    "is_concrete_bool",
+    "is_concrete_int",
+    "is_contiguous",
+    "is_non_overlapping_and_dense_indicator",
+    "is_symbol_binding_fx_node",
+    "is_symbolic",
+    "method_to_operator",
+    "parallel_and",
+    "parallel_or",
+    "sym_sqrt",
+    "sympy_is_channels_last_contiguous_2d",
+    "sympy_is_channels_last_contiguous_3d",
+    "sympy_is_channels_last_strides_2d",
+    "sympy_is_channels_last_strides_3d",
+    "sympy_is_channels_last_strides_generic",
+    "sympy_is_contiguous",
+    "sympy_is_contiguous_generic",
+    "tensor_has_hints",
+    "to_node",
+    "wrap_node",
+    # torch.fx.experimental.unification.core
+    "reify",
+    # torch.fx.experimental.unification.match
+    "edge",
+    "match",
+    "ordering",
+    "supercedes",
+    # torch.fx.experimental.unification.more
+    "reify_object",
+    "unifiable",
+    "unify_object",
+    # torch.fx.experimental.unification.multipledispatch.conflict
+    "ambiguities",
+    "ambiguous",
+    "consistent",
+    "edge",
+    "ordering",
+    "super_signature",
+    "supercedes",
+    # torch.fx.experimental.unification.multipledispatch.core
+    "dispatch",
+    "ismethod",
+    # torch.fx.experimental.unification.multipledispatch.dispatcher
+    "ambiguity_warn",
+    "halt_ordering",
+    "restart_ordering",
+    "source",
+    "str_signature",
+    "variadic_signature_matches",
+    "variadic_signature_matches_iter",
+    "warning_text",
+    # torch.fx.experimental.unification.multipledispatch.utils
+    "expand_tuples",
+    "groupby",
+    "raises",
+    "reverse_dict",
+    # torch.fx.experimental.unification.multipledispatch.variadic
+    "isvariadic",
+    # torch.fx.experimental.unification.unification_tools
+    "assoc",
+    "assoc_in",
+    "dissoc",
+    "first",
+    "get_in",
+    "getter",
+    "groupby",
+    "itemfilter",
+    "itemmap",
+    "keyfilter",
+    "keymap",
+    "merge",
+    "merge_with",
+    "update_in",
+    "valfilter",
+    "valmap",
+    # torch.fx.experimental.unification.utils
+    "freeze",
+    "hashable",
+    "raises",
+    "reverse_dict",
+    "transitive_get",
+    "xfail",
+    # torch.fx.experimental.unification.variable
+    "var",
+    "vars",
+    # torch.fx.experimental.unify_refinements
+    "check_for_type_equality",
+    "convert_eq",
+    "infer_symbolic_types",
+    "infer_symbolic_types_single_pass",
+    "substitute_all_types",
+    "substitute_solution_one_type",
+    "unify_eq",
+    # torch.fx.experimental.validator
+    "bisect",
+    "translation_validation_enabled",
+    "translation_validation_timeout",
+    "z3op",
+    "z3str",
+    # torch.fx.graph_module
+    "reduce_deploy_graph_module",
+    "reduce_graph_module",
+    "reduce_package_graph_module",
+    # torch.fx.node
+    "has_side_effect",
+    "map_aggregate",
+    "map_arg",
+    # torch.fx.operator_schemas
+    "check_for_mutable_operation",
+    "create_type_hint",
+    "get_signature_for_torch_op",
+    "normalize_function",
+    "normalize_module",
+    "type_matches",
+    # torch.fx.passes.annotate_getitem_nodes
+    "annotate_getitem_nodes",
+    # torch.fx.passes.backends.cudagraphs
+    "partition_cudagraphs",
+    # torch.fx.passes.dialect.common.cse_pass
+    "get_CSE_banned_ops",
+    # torch.fx.passes.graph_manipulation
+    "get_size_of_all_nodes",
+    "get_size_of_node",
+    "get_tensor_meta",
+    "replace_target_nodes_with",
+    # torch.fx.passes.infra.pass_manager
+    "pass_result_wrapper",
+    "this_before_that_pass_constraint",
+    # torch.fx.passes.operator_support
+    "any_chain",
+    "chain",
+    "create_op_support",
+    # torch.fx.passes.param_fetch
+    "default_matching",
+    "extract_attrs_for_lowering",
+    "lift_lowering_attrs_to_nodes",
+    # torch.fx.passes.pass_manager
+    "inplace_wrapper",
+    "log_hook",
+    "loop_pass",
+    "these_before_those_pass_constraint",
+    "this_before_that_pass_constraint",
+    # torch.fx.passes.reinplace
+    "reinplace",
+    # torch.fx.passes.split_module
+    "split_module",
+    # torch.fx.passes.split_utils
+    "getattr_recursive",
+    "setattr_recursive",
+    "split_by_tags",
+    # torch.fx.passes.splitter_base
+    "generate_inputs_for_submodules",
+    # torch.fx.passes.tools_common
+    "get_acc_ops_name",
+    "get_node_target",
+    "is_node_output_tensor",
+    "legalize_graph",
+    # torch.fx.passes.utils.common
+    "compare_graphs",
+    "lift_subgraph_as_module",
+    # torch.fx.passes.utils.fuser_utils
+    "erase_nodes",
+    "fuse_as_graphmodule",
+    "fuse_by_partitions",
+    "insert_subgm",
+    "topo_sort",
+    "validate_partition",
+    # torch.fx.passes.utils.source_matcher_utils
+    "check_subgraphs_connected",
+    "get_source_partitions",
+    # torch.fx.proxy
+    "assert_fn",
+    # torch.fx.subgraph_rewriter
+    "replace_pattern",
+    "replace_pattern_with_filters",
+    # torch.fx.tensor_type
+    "is_consistent",
+    "is_more_precise",
+    # torch.fx.traceback
+    "format_stack",
+    "get_current_meta",
+    "has_preserved_node_meta",
+    "preserve_node_meta",
+    "reset_grad_fn_seq_nr",
+    "set_current_meta",
+    "set_grad_fn_seq_nr",
+    "set_stack_trace",
+    # torch.jit.annotations
+    "ann_to_type",
+    "check_fn",
+    "get_enum_value_type",
+    "get_param_names",
+    "get_signature",
+    "get_type_line",
+    "is_function_or_method",
+    "is_tensor",
+    "is_vararg",
+    "parse_type_line",
+    "split_type_line",
+    "try_ann_to_type",
+    "try_real_annotations",
+    # torch.jit.frontend
+    "build_class_def",
+    "build_def",
+    "build_ignore_context_manager",
+    "build_param",
+    "build_param_list",
+    "build_stmts",
+    "build_withitems",
+    "find_before",
+    "get_class_assigns",
+    "get_class_properties",
+    "get_default_args",
+    "get_default_args_for_class",
+    "get_jit_class_def",
+    "get_jit_def",
+    "is_reserved_name",
+    "is_torch_jit_ignore_context_manager",
+    # torch.jit.generate_bytecode
+    "format_bytecode",
+    "generate_upgraders_bytecode",
+    # torch.jit.quantized
+    "apply_permutation",
+    "quantize_linear_modules",
+    "quantize_rnn_cell_modules",
+    "quantize_rnn_modules",
+    # torch.library
+    "define",
+    "get_ctx",
+    "impl",
+    "impl_abstract",
+    # torch.masked.maskedtensor.core
+    "is_masked_tensor",
+    # torch.masked.maskedtensor.creation
+    "as_masked_tensor",
+    "masked_tensor",
+    # torch.multiprocessing.pool
+    "clean_worker",
+    # torch.multiprocessing.reductions
+    "fd_id",
+    "init_reductions",
+    "rebuild_cuda_tensor",
+    "rebuild_event",
+    "rebuild_nested_tensor",
+    "rebuild_storage_empty",
+    "rebuild_storage_fd",
+    "rebuild_storage_filename",
+    "rebuild_tensor",
+    "rebuild_typed_storage",
+    "rebuild_typed_storage_child",
+    "reduce_event",
+    "reduce_storage",
+    "reduce_tensor",
+    "reduce_typed_storage",
+    "reduce_typed_storage_child",
+    "storage_from_cache",
+    # torch.multiprocessing.spawn
+    "start_processes",
+    # torch.nn.functional
+    "adaptive_max_pool1d_with_indices",
+    "adaptive_max_pool2d_with_indices",
+    "adaptive_max_pool3d_with_indices",
+    "assert_int_or_pair",
+    "fractional_max_pool2d_with_indices",
+    "fractional_max_pool3d_with_indices",
+    "max_pool1d_with_indices",
+    "max_pool2d_with_indices",
+    "max_pool3d_with_indices",
+    "multi_head_attention_forward",
+    # torch.nn.grad
+    "conv1d_input",
+    "conv1d_weight",
+    "conv2d_input",
+    "conv2d_weight",
+    "conv3d_input",
+    "conv3d_weight",
+    # torch.nn.init
+    "constant",
+    "dirac",
+    "eye",
+    "kaiming_normal",
+    "kaiming_uniform",
+    "normal",
+    "orthogonal",
+    "sparse",
+    "uniform",
+    "xavier_normal",
+    "xavier_uniform",
+    # torch.nn.modules.rnn
+    "apply_permutation",
+    # torch.nn.modules.utils
+    "consume_prefix_in_state_dict_if_present",
+    # torch.nn.parallel.comm
+    "broadcast",
+    "broadcast_coalesced",
+    "gather",
+    "reduce_add",
+    "reduce_add_coalesced",
+    "scatter",
+    # torch.nn.parallel.data_parallel
+    "data_parallel",
+    # torch.nn.parallel.parallel_apply
+    "get_a_var",
+    "parallel_apply",
+    # torch.nn.parallel.replicate
+    "replicate",
+    # torch.nn.parallel.scatter_gather
+    "gather",
+    "is_namedtuple",
+    "scatter",
+    "scatter_kwargs",
+    # torch.nn.parameter
+    "is_lazy",
+    # torch.nn.utils.clip_grad
+    "clip_grad_norm",
+    "clip_grad_norm_",
+    "clip_grad_value_",
+    # torch.nn.utils.convert_parameters
+    "parameters_to_vector",
+    "vector_to_parameters",
+    # torch.nn.utils.fusion
+    "fuse_conv_bn_eval",
+    "fuse_conv_bn_weights",
+    "fuse_linear_bn_eval",
+    "fuse_linear_bn_weights",
+    # torch.nn.utils.init
+    "skip_init",
+    # torch.nn.utils.memory_format
+    "convert_conv2d_weight_memory_format",
+    # torch.nn.utils.parametrizations
+    "weight_norm",
+    # torch.nn.utils.parametrize
+    "transfer_parametrizations_and_params",
+    "type_before_parametrizations",
+    # torch.nn.utils.rnn
+    "bind",
+    "invert_permutation",
+    # torch.nn.utils.spectral_norm
+    "remove_spectral_norm",
+    "spectral_norm",
+    # torch.nn.utils.weight_norm
+    "remove_weight_norm",
+    "weight_norm",
+    # torch.onnx.operators
+    "reshape_from_tensor_shape",
+    "shape_as_tensor",
+    # torch.onnx.symbolic_caffe2
+    "add",
+    "avg_pool2d",
+    "cat",
+    "conv2d",
+    "conv2d_relu",
+    "conv_prepack",
+    "dequantize",
+    "linear",
+    "linear_prepack",
+    "max_pool2d",
+    "nchw2nhwc",
+    "nhwc2nchw",
+    "quantize_per_tensor",
+    "register_quantized_ops",
+    "relu",
+    "reshape",
+    "sigmoid",
+    "slice",
+    "upsample_nearest2d",
+    # torch.onnx.symbolic_helper
+    "args_have_same_dtype",
+    "check_training_mode",
+    "dequantize_helper",
+    "is_caffe2_aten_fallback",
+    "is_complex_value",
+    "quantize_helper",
+    "quantized_args",
+    "requantize_bias_helper",
+    # torch.onnx.symbolic_opset10
+    "dequantize",
+    "div",
+    "embedding_bag",
+    "fake_quantize_per_tensor_affine",
+    "flip",
+    "fmod",
+    "isfinite",
+    "isinf",
+    "nan_to_num",
+    "quantize_per_tensor",
+    "quantized_add",
+    "quantized_add_relu",
+    "quantized_cat",
+    "quantized_conv1d",
+    "quantized_conv1d_relu",
+    "quantized_conv2d",
+    "quantized_conv2d_relu",
+    "quantized_conv3d",
+    "quantized_conv3d_relu",
+    "quantized_conv_transpose1d",
+    "quantized_conv_transpose2d",
+    "quantized_conv_transpose3d",
+    "quantized_group_norm",
+    "quantized_hardswish",
+    "quantized_instance_norm",
+    "quantized_layer_norm",
+    "quantized_leaky_relu",
+    "quantized_linear",
+    "quantized_linear_relu",
+    "quantized_mul",
+    "quantized_sigmoid",
+    "slice",
+    "sort",
+    "topk",
+    # torch.onnx.symbolic_opset11
+    "Delete",
+    "add",
+    "append",
+    "arange",
+    "argsort",
+    "atleast_1d",
+    "atleast_2d",
+    "atleast_3d",
+    "cat",
+    "chunk",
+    "clamp",
+    "clamp_max",
+    "clamp_min",
+    "constant_pad_nd",
+    "cumsum",
+    "embedding_bag",
+    "embedding_renorm",
+    "flatten",
+    "gather",
+    "hardtanh",
+    "hstack",
+    "im2col",
+    "index",
+    "index_copy",
+    "index_fill",
+    "index_put",
+    "insert",
+    "linalg_det",
+    "linalg_vector_norm",
+    "logdet",
+    "masked_scatter",
+    "masked_select",
+    "mm",
+    "narrow",
+    "normal",
+    "pad",
+    "pixel_shuffle",
+    "pop",
+    "prim_constant_chunk",
+    "reflection_pad",
+    "relu6",
+    "remainder",
+    "replication_pad",
+    "round",
+    "scatter",
+    "select",
+    "size",
+    "sort",
+    "split",
+    "split_with_sizes",
+    "squeeze",
+    "stack",
+    "topk",
+    "unbind",
+    "unique_dim",
+    "unsqueeze",
+    "vstack",
+    # torch.onnx.symbolic_opset12
+    "argmax",
+    "argmin",
+    "binary_cross_entropy_with_logits",
+    "celu",
+    "cross_entropy_loss",
+    "dropout",
+    "einsum",
+    "ge",
+    "le",
+    "native_dropout",
+    "nll_loss",
+    "nll_loss2d",
+    "nll_loss_nd",
+    "outer",
+    "pow",
+    "tensordot",
+    "unfold",
+    # torch.onnx.symbolic_opset13
+    "diagonal",
+    "fake_quantize_per_channel_affine",
+    "fake_quantize_per_tensor_affine",
+    "frobenius_norm",
+    "log_softmax",
+    "nonzero_numpy",
+    "quantized_conv1d",
+    "quantized_conv1d_relu",
+    "quantized_conv2d",
+    "quantized_conv2d_relu",
+    "quantized_conv3d",
+    "quantized_conv3d_relu",
+    "quantized_conv_transpose1d",
+    "quantized_conv_transpose2d",
+    "quantized_conv_transpose3d",
+    "quantized_linear",
+    "quantized_linear_relu",
+    "repeat_interleave",
+    "softmax",
+    "split",
+    "split_with_sizes",
+    "tensor_split",
+    "tile",
+    "unbind",
+    "unflatten",
+    "unsafe_chunk",
+    "unsafe_split",
+    "unsafe_split_with_sizes",
+    "where",
+    # torch.onnx.symbolic_opset14
+    "batch_norm",
+    "hardswish",
+    "quantized_hardswish",
+    "reshape",
+    "scaled_dot_product_attention",
+    "tril",
+    "triu",
+    # torch.onnx.symbolic_opset15
+    "aten__is_",
+    "aten__isnot_",
+    "bernoulli",
+    "prim_unchecked_cast",
+    # torch.onnx.symbolic_opset16
+    "grid_sampler",
+    "scatter_add",
+    "scatter_reduce",
+    # torch.onnx.symbolic_opset17
+    "layer_norm",
+    "stft",
+    # torch.onnx.symbolic_opset18
+    "col2im",
+    # torch.onnx.symbolic_opset7
+    "max",
+    "min",
+    # torch.onnx.symbolic_opset8
+    "addmm",
+    "bmm",
+    "empty",
+    "empty_like",
+    "flatten",
+    "full",
+    "full_like",
+    "gt",
+    "lt",
+    "matmul",
+    "mm",
+    "ones",
+    "ones_like",
+    "prelu",
+    "repeat",
+    "zeros",
+    "zeros_like",
+    # torch.onnx.symbolic_opset9
+    "abs",
+    "acos",
+    "adaptive_avg_pool1d",
+    "adaptive_avg_pool2d",
+    "adaptive_avg_pool3d",
+    "adaptive_max_pool1d",
+    "adaptive_max_pool2d",
+    "adaptive_max_pool3d",
+    "add",
+    "addcmul",
+    "addmm",
+    "alias",
+    "amax",
+    "amin",
+    "aminmax",
+    "arange",
+    "argmax",
+    "argmin",
+    "as_strided",
+    "as_tensor",
+    "asin",
+    "atan",
+    "atan2",
+    "avg_pool1d",
+    "avg_pool2d",
+    "avg_pool3d",
+    "baddbmm",
+    "batch_norm",
+    "bernoulli",
+    "bitwise_not",
+    "bitwise_or",
+    "bmm",
+    "broadcast_tensors",
+    "broadcast_to",
+    "bucketize",
+    "cat",
+    "cdist",
+    "ceil",
+    "clamp",
+    "clamp_max",
+    "clamp_min",
+    "clone",
+    "constant_pad_nd",
+    "contiguous",
+    "conv1d",
+    "conv2d",
+    "conv3d",
+    "conv_tbc",
+    "conv_transpose1d",
+    "conv_transpose2d",
+    "conv_transpose3d",
+    "convert_element_type",
+    "convolution",
+    "cos",
+    "cosine_similarity",
+    "cross",
+    "cumsum",
+    "detach",
+    "dim",
+    "div",
+    "dot",
+    "dropout",
+    "elu",
+    "embedding",
+    "embedding_bag",
+    "empty",
+    "empty_like",
+    "eq",
+    "erf",
+    "exp",
+    "expand",
+    "expand_as",
+    "eye",
+    "fill",
+    "flatten",
+    "floor",
+    "floor_divide",
+    "floordiv",
+    "frobenius_norm",
+    "full",
+    "full_like",
+    "gather",
+    "ge",
+    "gelu",
+    "get_pool_ceil_padding",
+    "glu",
+    "group_norm",
+    "gru",
+    "gt",
+    "hann_window",
+    "hardshrink",
+    "hardsigmoid",
+    "hardswish",
+    "hardtanh",
+    "index",
+    "index_add",
+    "index_copy",
+    "index_fill",
+    "index_put",
+    "index_select",
+    "instance_norm",
+    "is_floating_point",
+    "is_pinned",
+    "isnan",
+    "item",
+    "kl_div",
+    "layer_norm",
+    "le",
+    "leaky_relu",
+    "lerp",
+    "lift",
+    "linalg_cross",
+    "linalg_matrix_norm",
+    "linalg_norm",
+    "linalg_vector_norm",
+    "linear",
+    "linspace",
+    "log",
+    "log10",
+    "log1p",
+    "log2",
+    "log_sigmoid",
+    "log_softmax",
+    "logical_and",
+    "logical_not",
+    "logical_or",
+    "logical_xor",
+    "logit",
+    "logsumexp",
+    "lstm",
+    "lstm_cell",
+    "lt",
+    "masked_fill",
+    "masked_fill_",
+    "matmul",
+    "max",
+    "max_pool1d",
+    "max_pool1d_with_indices",
+    "max_pool2d",
+    "max_pool2d_with_indices",
+    "max_pool3d",
+    "max_pool3d_with_indices",
+    "maximum",
+    "meshgrid",
+    "min",
+    "minimum",
+    "mish",
+    "mm",
+    "movedim",
+    "mse_loss",
+    "mul",
+    "multinomial",
+    "mv",
+    "narrow",
+    "native_layer_norm",
+    "ne",
+    "neg",
+    "new_empty",
+    "new_full",
+    "new_ones",
+    "new_zeros",
+    "nonzero",
+    "nonzero_numpy",
+    "noop_complex_operators",
+    "norm",
+    "numel",
+    "numpy_T",
+    "one_hot",
+    "ones",
+    "ones_like",
+    "onnx_placeholder",
+    "overload_by_arg_count",
+    "pad",
+    "pairwise_distance",
+    "permute",
+    "pixel_shuffle",
+    "pixel_unshuffle",
+    "pow",
+    "prelu",
+    "prim_constant",
+    "prim_constant_chunk",
+    "prim_constant_split",
+    "prim_data",
+    "prim_device",
+    "prim_dtype",
+    "prim_if",
+    "prim_layout",
+    "prim_list_construct",
+    "prim_list_unpack",
+    "prim_loop",
+    "prim_max",
+    "prim_min",
+    "prim_shape",
+    "prim_tolist",
+    "prim_tuple_construct",
+    "prim_type",
+    "prim_unchecked_cast",
+    "prim_uninitialized",
+    "rand",
+    "rand_like",
+    "randint",
+    "randint_like",
+    "randn",
+    "randn_like",
+    "reciprocal",
+    "reflection_pad",
+    "relu",
+    "relu6",
+    "remainder",
+    "repeat",
+    "repeat_interleave",
+    "replication_pad",
+    "reshape",
+    "reshape_as",
+    "rnn_relu",
+    "rnn_tanh",
+    "roll",
+    "rrelu",
+    "rsqrt",
+    "rsub",
+    "scalar_tensor",
+    "scatter",
+    "scatter_add",
+    "select",
+    "selu",
+    "sigmoid",
+    "sign",
+    "silu",
+    "sin",
+    "size",
+    "slice",
+    "softmax",
+    "softplus",
+    "softshrink",
+    "sort",
+    "split",
+    "split_with_sizes",
+    "sqrt",
+    "square",
+    "squeeze",
+    "stack",
+    "std",
+    "std_mean",
+    "sub",
+    "t",
+    "take",
+    "tan",
+    "tanh",
+    "tanhshrink",
+    "tensor",
+    "threshold",
+    "to",
+    "topk",
+    "transpose",
+    "true_divide",
+    "type_as",
+    "unbind",
+    "unfold",
+    "unsafe_chunk",
+    "unsafe_split",
+    "unsafe_split_with_sizes",
+    "unsqueeze",
+    "unsupported_complex_operators",
+    "unused",
+    "upsample_bilinear2d",
+    "upsample_linear1d",
+    "upsample_nearest1d",
+    "upsample_nearest2d",
+    "upsample_nearest3d",
+    "upsample_trilinear3d",
+    "var",
+    "var_mean",
+    "view",
+    "view_as",
+    "where",
+    "wrap_logical_op_with_cast_to",
+    "wrap_logical_op_with_negation",
+    "zero",
+    "zeros",
+    "zeros_like",
+    # torch.onnx.utils
+    "disable_apex_o2_state_dict_hook",
+    "export",
+    "export_to_pretty_string",
+    "exporter_context",
+    "is_in_onnx_export",
+    "model_signature",
+    "register_custom_op_symbolic",
+    "select_model_mode_for_export",
+    "setup_onnx_logging",
+    "unconvertible_ops",
+    "unpack_quantized_tensor",
+    "warn_on_static_input_change",
+    # torch.onnx.verification
+    "check_export_model_diff",
+    "verify",
+    "verify_aten_graph",
+    # torch.optim.adadelta
+    "adadelta",
+    # torch.optim.adagrad
+    "adagrad",
+    # torch.optim.adam
+    "adam",
+    # torch.optim.adamax
+    "adamax",
+    # torch.optim.adamw
+    "adamw",
+    # torch.optim.asgd
+    "asgd",
+    # torch.optim.nadam
+    "nadam",
+    # torch.optim.optimizer
+    "register_optimizer_step_post_hook",
+    "register_optimizer_step_pre_hook",
+    # torch.optim.radam
+    "radam",
+    # torch.optim.rmsprop
+    "rmsprop",
+    # torch.optim.rprop
+    "rprop",
+    # torch.optim.sgd
+    "sgd",
+    # torch.optim.swa_utils
+    "get_ema_avg_fn",
+    "get_ema_multi_avg_fn",
+    "get_swa_avg_fn",
+    "get_swa_multi_avg_fn",
+    "update_bn",
+    # torch.overrides
+    "enable_reentrant_dispatch",
+    # torch.package.analyze.find_first_use_of_broken_modules
+    "find_first_use_of_broken_modules",
+    # torch.package.analyze.is_from_package
+    "is_from_package",
+    # torch.package.analyze.trace_dependencies
+    "trace_dependencies",
+    # torch.profiler.itt
+    "range",
+    # torch.profiler.profiler
+    "schedule",
+    "supported_activities",
+    "tensorboard_trace_handler",
+    # torch.return_types
+    "pytree_register_structseq",
+    # torch.serialization
+    "check_module_version_greater_or_equal",
+    "default_restore_location",
+    "load",
+    "location_tag",
+    "mkdtemp",
+    "normalize_storage_type",
+    "save",
+    "storage_to_tensor_type",
+    "validate_cuda_device",
+    "validate_hpu_device",
+    # torch.signal.windows.windows
+    "bartlett",
+    "blackman",
+    "cosine",
+    "exponential",
+    "gaussian",
+    "general_cosine",
+    "general_hamming",
+    "hamming",
+    "hann",
+    "kaiser",
+    "nuttall",
+    # torch.sparse.semi_structured
+    "to_sparse_semi_structured",
+    # torch.utils.backend_registration
+    "generate_methods_for_privateuse1_backend",
+    "rename_privateuse1_backend",
+    # torch.utils.benchmark.examples.blas_compare_setup
+    "conda_run",
+    # torch.utils.benchmark.examples.op_benchmark
+    "assert_dicts_equal",
+    # torch.utils.benchmark.op_fuzzers.spectral
+    "power_range",
+    # torch.utils.benchmark.utils.common
+    "ordered_unique",
+    "select_unit",
+    "set_torch_threads",
+    "trim_sigfig",
+    "unit_to_english",
+    # torch.utils.benchmark.utils.compare
+    "optional_min",
+    # torch.utils.benchmark.utils.compile
+    "bench_all",
+    "bench_loop",
+    "benchmark_compile",
+    # torch.utils.benchmark.utils.cpp_jit
+    "compile_callgrind_template",
+    "compile_timeit_template",
+    "get_compat_bindings",
+    # torch.utils.benchmark.utils.fuzzer
+    "dtype_size",
+    "prod",
+    # torch.utils.benchmark.utils.timer
+    "timer",
+    # torch.utils.benchmark.utils.valgrind_wrapper.timer_interface
+    "wrapper_singleton",
+    # torch.utils.bundled_inputs
+    "augment_many_model_functions_with_bundled_inputs",
+    "augment_model_with_bundled_inputs",
+    "bundle_inputs",
+    "bundle_large_tensor",
+    "bundle_randn",
+    # torch.utils.checkpoint
+    "check_backward_validity",
+    "context_fn_gen",
+    "detach_variable",
+    "get_device_states",
+    "noop_context_fn",
+    "set_checkpoint_early_stop",
+    "set_device_states",
+    # torch.utils.collect_env
+    "check_release_file",
+    "get_cachingallocator_config",
+    "get_clang_version",
+    "get_cmake_version",
+    "get_conda_packages",
+    "get_cpu_info",
+    "get_cuda_module_loading_config",
+    "get_cudnn_version",
+    "get_env_info",
+    "get_gcc_version",
+    "get_gpu_info",
+    "get_libc_version",
+    "get_lsb_version",
+    "get_mac_version",
+    "get_nvidia_driver_version",
+    "get_nvidia_smi",
+    "get_os",
+    "get_pip_packages",
+    "get_platform",
+    "get_pretty_env_info",
+    "get_python_platform",
+    "get_running_cuda_version",
+    "get_windows_version",
+    "is_xnnpack_available",
+    "pretty_str",
+    # torch.utils.cpp_backtrace
+    "get_cpp_backtrace",
+    # torch.utils.cpp_extension
+    "check_compiler_is_gcc",
+    "check_compiler_ok_for_platform",
+    "get_cxx_compiler",
+    "get_default_build_root",
+    "library_paths",
+    "remove_extension_h_precompiler_headers",
+    # torch.utils.data.backward_compatibility
+    "worker_init_fn",
+    # torch.utils.data.datapipes.dataframe.dataframe_wrapper
+    "concat",
+    "create_dataframe",
+    "get_columns",
+    "get_df_wrapper",
+    "get_item",
+    "get_len",
+    "is_column",
+    "is_dataframe",
+    "iterate",
+    "set_df_wrapper",
+    # torch.utils.data.datapipes.dataframe.dataframes
+    "disable_capture",
+    "get_val",
+    # torch.utils.data.datapipes.gen_pyi
+    "extract_class_name",
+    "extract_method_name",
+    "find_file_paths",
+    "gen_from_template",
+    "get_method_definitions",
+    "materialize_lines",
+    "parse_datapipe_file",
+    "parse_datapipe_files",
+    "process_signature",
+    "split_outside_bracket",
+    # torch.utils.data.datapipes.map.callable
+    "default_fn",
+    # torch.utils.data.datapipes.utils.common
+    "get_file_binaries_from_pathnames",
+    "get_file_pathnames_from_root",
+    "match_masks",
+    "validate_input_col",
+    "validate_pathname_binary_tuple",
+    # torch.utils.data.datapipes.utils.decoder
+    "audiohandler",
+    "basichandlers",
+    "extension_extract_fn",
+    "handle_extension",
+    "imagehandler",
+    "mathandler",
+    "videohandler",
+    # torch.utils.data.dataset
+    "random_split",
+    # torch.utils.data.graph
+    "traverse",
+    "traverse_dps",
+    # torch.utils.data.graph_settings
+    "apply_random_seed",
+    "apply_sharding",
+    "apply_shuffle_seed",
+    "apply_shuffle_settings",
+    "get_all_graph_pipes",
+    # torch.utils.flop_counter
+    "addmm_flop",
+    "baddbmm_flop",
+    "bmm_flop",
+    "conv_backward_flop",
+    "conv_flop",
+    "conv_flop_count",
+    "convert_num_with_suffix",
+    "get_shape",
+    "get_suffix_str",
+    "mm_flop",
+    "normalize_tuple",
+    "register_flop_formula",
+    "sdpa_backward_flop",
+    "sdpa_backward_flop_count",
+    "sdpa_flop",
+    "sdpa_flop_count",
+    "shape_wrapper",
+    "transpose_shape",
+    # torch.utils.hipify.hipify_python
+    "add_dim3",
+    "compute_stats",
+    "extract_arguments",
+    "file_add_header",
+    "file_specific_replacement",
+    "find_bracket_group",
+    "find_closure_group",
+    "find_parentheses_group",
+    "fix_static_global_kernels",
+    "get_hip_file_path",
+    "hip_header_magic",
+    "hipify",
+    "is_caffe2_gpu_file",
+    "is_cusparse_file",
+    "is_out_of_place",
+    "is_pytorch_file",
+    "is_special_file",
+    "match_extensions",
+    "matched_files_iter",
+    "openf",
+    "preprocess_file_and_save_result",
+    "preprocessor",
+    "processKernelLaunches",
+    "replace_extern_shared",
+    "replace_math_functions",
+    "str2bool",
+    # torch.utils.hooks
+    "unserializable_hook",
+    "warn_if_has_hooks",
+    # torch.utils.jit.log_extract
+    "extract_ir",
+    "load_graph_and_inputs",
+    "make_tensor_from_type",
+    "no_fuser",
+    "time_cpu",
+    "time_cuda",
+    # torch.utils.mkldnn
+    "to_mkldnn",
+    # torch.utils.mobile_optimizer
+    "generate_mobile_module_lints",
+    # torch.utils.tensorboard.summary
+    "audio",
+    "compute_curve",
+    "custom_scalars",
+    "draw_boxes",
+    "half_to_int",
+    "histogram",
+    "histogram_raw",
+    "hparams",
+    "image",
+    "image_boxes",
+    "int_to_half",
+    "make_histogram",
+    "make_image",
+    "make_video",
+    "mesh",
+    "pr_curve",
+    "pr_curve_raw",
+    "scalar",
+    "tensor_proto",
+    "text",
+    "video",
+    # torch.utils.throughput_benchmark
+    "format_time",
 ]
 
 coverage_ignore_classes = [
@@ -339,6 +2286,1206 @@ coverage_ignore_classes = [
     "Quantize",
     # torch.utils.backcompat
     "Warning",
+    # torch.ao.nn.intrinsic.modules.fused
+    "ConvAdd2d",
+    "ConvAddReLU2d",
+    "LinearBn1d",
+    "LinearLeakyReLU",
+    "LinearTanh",
+    # torch.ao.nn.intrinsic.qat.modules.conv_fused
+    "ConvBnReLU1d",
+    "ConvBnReLU2d",
+    "ConvBnReLU3d",
+    "ConvReLU1d",
+    "ConvReLU2d",
+    "ConvReLU3d",
+    # torch.ao.nn.intrinsic.qat.modules.linear_fused
+    "LinearBn1d",
+    # torch.ao.nn.intrinsic.qat.modules.linear_relu
+    "LinearReLU",
+    # torch.ao.nn.intrinsic.quantized.dynamic.modules.linear_relu
+    "LinearReLU",
+    # torch.ao.nn.intrinsic.quantized.modules.bn_relu
+    "BNReLU2d",
+    "BNReLU3d",
+    # torch.ao.nn.intrinsic.quantized.modules.conv_add
+    "ConvAdd2d",
+    "ConvAddReLU2d",
+    # torch.ao.nn.intrinsic.quantized.modules.conv_relu
+    "ConvReLU1d",
+    "ConvReLU2d",
+    "ConvReLU3d",
+    # torch.ao.nn.intrinsic.quantized.modules.linear_relu
+    "LinearLeakyReLU",
+    "LinearReLU",
+    "LinearTanh",
+    # torch.ao.nn.qat.modules.conv
+    "Conv1d",
+    "Conv2d",
+    "Conv3d",
+    # torch.ao.nn.qat.modules.embedding_ops
+    "Embedding",
+    "EmbeddingBag",
+    # torch.ao.nn.qat.modules.linear
+    "Linear",
+    # torch.ao.nn.quantizable.modules.activation
+    "MultiheadAttention",
+    # torch.ao.nn.quantizable.modules.rnn
+    "LSTM",
+    "LSTMCell",
+    # torch.ao.nn.quantized.dynamic.modules.conv
+    "Conv1d",
+    "Conv2d",
+    "Conv3d",
+    "ConvTranspose1d",
+    "ConvTranspose2d",
+    "ConvTranspose3d",
+    # torch.ao.nn.quantized.dynamic.modules.linear
+    "Linear",
+    # torch.ao.nn.quantized.dynamic.modules.rnn
+    "GRU",
+    "GRUCell",
+    "LSTM",
+    "LSTMCell",
+    "PackedParameter",
+    "RNNBase",
+    "RNNCell",
+    "RNNCellBase",
+    # torch.ao.nn.quantized.modules.activation
+    "ELU",
+    "Hardswish",
+    "LeakyReLU",
+    "MultiheadAttention",
+    "PReLU",
+    "ReLU6",
+    "Sigmoid",
+    "Softmax",
+    # torch.ao.nn.quantized.modules.batchnorm
+    "BatchNorm2d",
+    "BatchNorm3d",
+    # torch.ao.nn.quantized.modules.conv
+    "Conv1d",
+    "Conv2d",
+    "Conv3d",
+    "ConvTranspose1d",
+    "ConvTranspose2d",
+    "ConvTranspose3d",
+    # torch.ao.nn.quantized.modules.dropout
+    "Dropout",
+    # torch.ao.nn.quantized.modules.embedding_ops
+    "Embedding",
+    "EmbeddingBag",
+    "EmbeddingPackedParams",
+    # torch.ao.nn.quantized.modules.functional_modules
+    "FXFloatFunctional",
+    "FloatFunctional",
+    "QFunctional",
+    # torch.ao.nn.quantized.modules.linear
+    "Linear",
+    "LinearPackedParams",
+    # torch.ao.nn.quantized.modules.normalization
+    "GroupNorm",
+    "InstanceNorm1d",
+    "InstanceNorm2d",
+    "InstanceNorm3d",
+    "LayerNorm",
+    # torch.ao.nn.quantized.modules.rnn
+    "LSTM",
+    # torch.ao.nn.quantized.modules.utils
+    "WeightedQuantizedModule",
+    # torch.ao.nn.quantized.reference.modules.conv
+    "Conv1d",
+    "Conv2d",
+    "Conv3d",
+    "ConvTranspose1d",
+    "ConvTranspose2d",
+    "ConvTranspose3d",
+    # torch.ao.nn.quantized.reference.modules.linear
+    "Linear",
+    # torch.ao.nn.quantized.reference.modules.rnn
+    "GRU",
+    "GRUCell",
+    "LSTM",
+    "LSTMCell",
+    "RNNBase",
+    "RNNCell",
+    "RNNCellBase",
+    # torch.ao.nn.quantized.reference.modules.sparse
+    "Embedding",
+    "EmbeddingBag",
+    # torch.ao.nn.quantized.reference.modules.utils
+    "ReferenceQuantizedModule",
+    # torch.ao.nn.sparse.quantized.dynamic.linear
+    "Linear",
+    # torch.ao.nn.sparse.quantized.linear
+    "Linear",
+    "LinearPackedParams",
+    # torch.ao.nn.sparse.quantized.utils
+    "LinearBlockSparsePattern",
+    # torch.ao.ns.fx.graph_matcher
+    "SubgraphTypeRelationship",
+    # torch.ao.ns.fx.n_shadows_utils
+    "OutputProp",
+    # torch.ao.ns.fx.ns_types
+    "NSSingleResultValuesType",
+    "NSSubgraph",
+    # torch.ao.ns.fx.qconfig_multi_mapping
+    "QConfigMultiMapping",
+    # torch.ao.pruning.scheduler.base_scheduler
+    "BaseScheduler",
+    # torch.ao.pruning.scheduler.cubic_scheduler
+    "CubicSL",
+    # torch.ao.pruning.scheduler.lambda_scheduler
+    "LambdaSL",
+    # torch.ao.pruning.sparsifier.base_sparsifier
+    "BaseSparsifier",
+    # torch.ao.pruning.sparsifier.nearly_diagonal_sparsifier
+    "NearlyDiagonalSparsifier",
+    # torch.ao.pruning.sparsifier.utils
+    "FakeSparsity",
+    # torch.ao.pruning.sparsifier.weight_norm_sparsifier
+    "WeightNormSparsifier",
+    # torch.ao.quantization.backend_config.backend_config
+    "BackendConfig",
+    "BackendPatternConfig",
+    "DTypeConfig",
+    # torch.ao.quantization.fake_quantize
+    "FakeQuantize",
+    "FakeQuantizeBase",
+    "FixedQParamsFakeQuantize",
+    "FusedMovingAvgObsFakeQuantize",
+    # torch.ao.quantization.fx.fuse_handler
+    "DefaultFuseHandler",
+    "FuseHandler",
+    # torch.ao.quantization.fx.graph_module
+    "FusedGraphModule",
+    "ObservedGraphModule",
+    "ObservedStandaloneGraphModule",
+    # torch.ao.quantization.fx.quantize_handler
+    "BatchNormQuantizeHandler",
+    "BinaryOpQuantizeHandler",
+    "CatQuantizeHandler",
+    "ConvReluQuantizeHandler",
+    "CopyNodeQuantizeHandler",
+    "CustomModuleQuantizeHandler",
+    "DefaultNodeQuantizeHandler",
+    "EmbeddingQuantizeHandler",
+    "FixedQParamsOpQuantizeHandler",
+    "GeneralTensorShapeOpQuantizeHandler",
+    "LinearReLUQuantizeHandler",
+    "RNNDynamicQuantizeHandler",
+    "StandaloneModuleQuantizeHandler",
+    # torch.ao.quantization.fx.tracer
+    "QuantizationTracer",
+    "ScopeContextManager",
+    # torch.ao.quantization.fx.utils
+    "ObservedGraphModuleAttrs",
+    # torch.ao.quantization.observer
+    "FixedQParamsObserver",
+    "HistogramObserver",
+    "MinMaxObserver",
+    "MovingAverageMinMaxObserver",
+    "MovingAveragePerChannelMinMaxObserver",
+    "NoopObserver",
+    "ObserverBase",
+    "PerChannelMinMaxObserver",
+    "PlaceholderObserver",
+    "RecordingObserver",
+    "ReuseInputObserver",
+    "UniformQuantizationObserverBase",
+    "default_debug_observer",
+    "default_placeholder_observer",
+    "default_reuse_input_observer",
+    # torch.ao.quantization.pt2e.duplicate_dq_pass
+    "DuplicateDQPass",
+    # torch.ao.quantization.pt2e.port_metadata_pass
+    "PortNodeMetaForQDQ",
+    # torch.ao.quantization.qconfig
+    "QConfigDynamic",
+    # torch.ao.quantization.quant_type
+    "QuantType",
+    # torch.ao.quantization.quantizer.composable_quantizer
+    "ComposableQuantizer",
+    # torch.ao.quantization.quantizer.embedding_quantizer
+    "EmbeddingQuantizer",
+    # torch.ao.quantization.quantizer.quantizer
+    "DerivedQuantizationSpec",
+    "FixedQParamsQuantizationSpec",
+    "QuantizationAnnotation",
+    "QuantizationSpec",
+    "QuantizationSpecBase",
+    "SharedQuantizationSpec",
+    # torch.ao.quantization.quantizer.x86_inductor_quantizer
+    "X86InductorQuantizer",
+    # torch.ao.quantization.quantizer.xnnpack_quantizer
+    "XNNPACKQuantizer",
+    # torch.ao.quantization.quantizer.xnnpack_quantizer_utils
+    "OperatorConfig",
+    "QuantizationConfig",
+    # torch.ao.quantization.stubs
+    "DeQuantStub",
+    "QuantStub",
+    "QuantWrapper",
+    # torch.ao.quantization.utils
+    "MatchAllNode",
+    # torch.autograd.forward_ad
+    "UnpackedDualTensor",
+    # torch.autograd.function
+    "BackwardCFunction",
+    "Function",
+    "FunctionCtx",
+    "FunctionMeta",
+    "InplaceFunction",
+    "NestedIOFunction",
+    # torch.autograd.grad_mode
+    "inference_mode",
+    "set_grad_enabled",
+    "set_multithreading_enabled",
+    # torch.autograd.gradcheck
+    "GradcheckError",
+    # torch.autograd.profiler
+    "EnforceUnique",
+    "KinetoStepTracker",
+    "profile",
+    "record_function",
+    # torch.autograd.profiler_legacy
+    "profile",
+    # torch.autograd.profiler_util
+    "EventList",
+    "FormattedTimesMixin",
+    "FunctionEvent",
+    "FunctionEventAvg",
+    "Interval",
+    "Kernel",
+    "MemRecordsAcc",
+    "StringTable",
+    # torch.autograd.variable
+    "Variable",
+    "VariableMeta",
+    # torch.backends.cudnn.rnn
+    "Unserializable",
+    # torch.cuda.amp.grad_scaler
+    "GradScaler",
+    "OptState",
+    # torch.cuda.graphs
+    "CUDAGraph",
+    # torch.cuda.streams
+    "Event",
+    # torch.distributed.algorithms.ddp_comm_hooks.post_localSGD_hook
+    "PostLocalSGDState",
+    # torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook
+    "PowerSGDState",
+    # torch.distributed.algorithms.join
+    "Join",
+    "JoinHook",
+    "Joinable",
+    # torch.distributed.algorithms.model_averaging.averagers
+    "ModelAverager",
+    "PeriodicModelAverager",
+    # torch.distributed.algorithms.model_averaging.hierarchical_model_averager
+    "HierarchicalModelAverager",
+    # torch.distributed.argparse_util
+    "check_env",
+    "env",
+    # torch.distributed.checkpoint.api
+    "CheckpointException",
+    # torch.distributed.checkpoint.default_planner
+    "DefaultLoadPlanner",
+    "DefaultSavePlanner",
+    # torch.distributed.checkpoint.filesystem
+    "FileSystemReader",
+    "FileSystemWriter",
+    # torch.distributed.checkpoint.metadata
+    "BytesStorageMetadata",
+    "ChunkStorageMetadata",
+    "Metadata",
+    "MetadataIndex",
+    # torch.distributed.checkpoint.planner
+    "LoadItemType",
+    "LoadPlanner",
+    "SavePlanner",
+    "WriteItemType",
+    # torch.distributed.checkpoint.state_dict
+    "DistributedStateDictOptions",
+    # torch.distributed.checkpoint.storage
+    "WriteResult",
+    # torch.distributed.collective_utils
+    "SyncPayload",
+    # torch.distributed.distributed_c10d
+    "AllToAllOptions",
+    "AllreduceCoalescedOptions",
+    "AllreduceOptions",
+    "Backend",
+    "BackendConfig",
+    "BarrierOptions",
+    "BroadcastOptions",
+    "DebugLevel",
+    "GatherOptions",
+    "GroupMember",
+    "ProcessGroup",
+    "ProcessGroupGloo",
+    "ProcessGroupNCCL",
+    "ReduceOptions",
+    "ReduceScatterOptions",
+    "ScatterOptions",
+    "Work",
+    "group",
+    # torch.distributed.elastic.agent.server.api
+    "ElasticAgent",
+    "RunResult",
+    "SimpleElasticAgent",
+    "WorkerSpec",
+    # torch.distributed.elastic.events.api
+    "Event",
+    "RdzvEvent",
+    # torch.distributed.elastic.metrics.api
+    "ConsoleMetricHandler",
+    "MetricData",
+    "MetricHandler",
+    "MetricStream",
+    "MetricsConfig",
+    "NullMetricHandler",
+    # torch.distributed.elastic.multiprocessing.api
+    "MultiprocessContext",
+    "PContext",
+    "RunProcsResult",
+    "SignalException",
+    "Std",
+    "SubprocessContext",
+    "SubprocessHandler",
+    # torch.distributed.elastic.multiprocessing.tail_log
+    "TailLog",
+    # torch.distributed.elastic.rendezvous.api
+    "RendezvousHandler",
+    "RendezvousHandlerRegistry",
+    "RendezvousParameters",
+    # torch.distributed.elastic.rendezvous.dynamic_rendezvous
+    "DynamicRendezvousHandler",
+    "RendezvousSettings",
+    # torch.distributed.elastic.rendezvous.etcd_rendezvous
+    "EtcdRendezvous",
+    "EtcdRendezvousHandler",
+    "EtcdRendezvousRetryImmediately",
+    "EtcdRendezvousRetryableFailure",
+    # torch.distributed.elastic.rendezvous.etcd_server
+    "EtcdServer",
+    # torch.distributed.elastic.rendezvous.static_tcp_rendezvous
+    "StaticTCPRendezvous",
+    # torch.distributed.elastic.timer.api
+    "RequestQueue",
+    "TimerClient",
+    "TimerServer",
+    # torch.distributed.elastic.timer.file_based_local_timer
+    "FileTimerClient",
+    "FileTimerRequest",
+    "FileTimerServer",
+    # torch.distributed.elastic.timer.local_timer
+    "LocalTimerClient",
+    "LocalTimerServer",
+    "MultiprocessingRequestQueue",
+    # torch.distributed.elastic.utils.api
+    "macros",
+    # torch.distributed.elastic.utils.data.cycling_iterator
+    "CyclingIterator",
+    # torch.distributed.elastic.utils.data.elastic_distributed_sampler
+    "ElasticDistributedSampler",
+    # torch.distributed.fsdp.api
+    "StateDictType",
+    # torch.distributed.fsdp.fully_sharded_data_parallel
+    "FullyShardedDataParallel",
+    "OptimStateKeyType",
+    # torch.distributed.fsdp.sharded_grad_scaler
+    "ShardedGradScaler",
+    # torch.distributed.fsdp.wrap
+    "CustomPolicy",
+    "ModuleWrapPolicy",
+    # torch.distributed.launcher.api
+    "LaunchConfig",
+    "elastic_launch",
+    # torch.distributed.optim.optimizer
+    "DistributedOptimizer",
+    # torch.distributed.optim.post_localSGD_optimizer
+    "PostLocalSGDOptimizer",
+    # torch.distributed.optim.zero_redundancy_optimizer
+    "ZeroRedundancyOptimizer",
+    # torch.distributed.pipeline.sync.batchnorm
+    "DeferredBatchNorm",
+    # torch.distributed.pipeline.sync.checkpoint
+    "Checkpoint",
+    "Checkpointing",
+    "Context",
+    "Function",
+    "Recompute",
+    "ThreadLocal",
+    # torch.distributed.pipeline.sync.copy
+    "Context",
+    "Copy",
+    "Wait",
+    # torch.distributed.pipeline.sync.dependency
+    "Fork",
+    "Join",
+    # torch.distributed.pipeline.sync.microbatch
+    "Batch",
+    "NoChunk",
+    # torch.distributed.pipeline.sync.pipe
+    "BalanceError",
+    "Pipe",
+    "PipeSequential",
+    "WithDevice",
+    # torch.distributed.pipeline.sync.pipeline
+    "Pipeline",
+    # torch.distributed.pipeline.sync.skip.layout
+    "SkipLayout",
+    # torch.distributed.pipeline.sync.skip.namespace
+    "Namespace",
+    # torch.distributed.pipeline.sync.skip.portal
+    "Context",
+    "Portal",
+    "PortalBlue",
+    "PortalCopy",
+    "PortalOrange",
+    # torch.distributed.pipeline.sync.skip.skippable
+    "Skippable",
+    # torch.distributed.pipeline.sync.skip.tracker
+    "SkipTracker",
+    "SkipTrackerThroughPotals",
+    "ThreadLocal",
+    # torch.distributed.pipeline.sync.stream
+    "CPUStreamType",
+    # torch.distributed.pipeline.sync.worker
+    "Task",
+    # torch.distributed.rpc.api
+    "AllGatherStates",
+    "RRef",
+    # torch.distributed.rpc.backend_registry
+    "BackendValue",
+    # torch.distributed.rpc.internal
+    "PythonUDF",
+    "RPCExecMode",
+    "RemoteException",
+    # torch.distributed.rpc.rref_proxy
+    "RRefProxy",
+    # torch.distributed.tensor.parallel.fsdp
+    "DTensorExtensions",
+    # torch.distributed.tensor.parallel.style
+    "ParallelStyle",
+    # torch.distributions.logistic_normal
+    "LogisticNormal",
+    # torch.distributions.one_hot_categorical
+    "OneHotCategoricalStraightThrough",
+    # torch.distributions.relaxed_categorical
+    "ExpRelaxedCategorical",
+    # torch.distributions.utils
+    "lazy_property",
+    # torch.export.exported_program
+    "ConstantArgument",
+    "ExportedProgram",
+    # torch.fx.experimental.accelerator_partitioner
+    "DAG",
+    "DAGNode",
+    "PartitionResult",
+    "Partitioner",
+    # torch.fx.experimental.const_fold
+    "FoldedGraphModule",
+    # torch.fx.experimental.graph_gradual_typechecker
+    "Refine",
+    # torch.fx.experimental.meta_tracer
+    "MetaAttribute",
+    "MetaDeviceAttribute",
+    "MetaProxy",
+    "MetaTracer",
+    # torch.fx.experimental.migrate_gradual_types.constraint
+    "ApplyBroadcasting",
+    "BVar",
+    "BinConstraintD",
+    "BinConstraintT",
+    "BinaryConstraint",
+    "CalcConv",
+    "CalcMaxPool",
+    "CalcProduct",
+    "CanReshape",
+    "Conj",
+    "Constraint",
+    "DGreatestUpperBound",
+    "DVar",
+    "Disj",
+    "F",
+    "GetItem",
+    "GetItemTensor",
+    "IndexSelect",
+    "Prod",
+    "T",
+    "TGreatestUpperBound",
+    "TVar",
+    "Transpose",
+    # torch.fx.experimental.migrate_gradual_types.constraint_generator
+    "ConstraintGenerator",
+    # torch.fx.experimental.normalize
+    "NormalizeArgs",
+    "NormalizeOperators",
+    # torch.fx.experimental.optimization
+    "MklSubgraph",
+    "UnionFind",
+    # torch.fx.experimental.partitioner_utils
+    "Device",
+    "Partition",
+    "PartitionLatency",
+    "PartitionMode",
+    "PartitionerConfig",
+    # torch.fx.experimental.proxy_tensor
+    "DecompositionInterpreter",
+    "PreDispatchTorchFunctionMode",
+    "ProxySymDispatchMode",
+    "ProxyTorchDispatchMode",
+    "PythonKeyTracer",
+    # torch.fx.experimental.recording
+    "FakeTensorMeta",
+    "NotEqualError",
+    "ShapeEnvEvent",
+    # torch.fx.experimental.refinement_types
+    "Equality",
+    # torch.fx.experimental.rewriter
+    "AST_Rewriter",
+    "RewritingTracer",
+    # torch.fx.experimental.schema_type_annotation
+    "AnnotateTypesWithSchema",
+    # torch.fx.experimental.symbolic_shapes
+    "Constraint",
+    "ConstraintViolationError",
+    "DimConstraints",
+    "DimDynamic",
+    "DynamicDimConstraintPrinter",
+    "EqualityConstraint",
+    "GuardOnDataDependentSymNode",
+    "IsNonOverlappingAndDenseIndicator",
+    "LoggingShapeGuardPrinter",
+    "Pow",
+    "RelaxedUnspecConstraint",
+    "RuntimeAssert",
+    "ShapeEnv",
+    "ShapeGuardPrinter",
+    "StrictMinMaxConstraint",
+    "SymDispatchMode",
+    "SymNode",
+    "TrueDiv",
+    # torch.fx.experimental.unification.match
+    "Dispatcher",
+    "VarDispatcher",
+    # torch.fx.experimental.unification.multipledispatch.conflict
+    "AmbiguityWarning",
+    # torch.fx.experimental.unification.multipledispatch.dispatcher
+    "Dispatcher",
+    "MDNotImplementedError",
+    "MethodDispatcher",
+    # torch.fx.experimental.unification.multipledispatch.variadic
+    "Variadic",
+    "VariadicSignatureMeta",
+    "VariadicSignatureType",
+    # torch.fx.experimental.unification.variable
+    "Var",
+    # torch.fx.experimental.validator
+    "BisectValidationException",
+    "PopulateValidator",
+    "SympyToZ3",
+    "ValidationException",
+    # torch.fx.graph
+    "PythonCode",
+    # torch.fx.immutable_collections
+    "immutable_dict",
+    "immutable_list",
+    # torch.fx.interpreter
+    "Interpreter",
+    # torch.fx.operator_schemas
+    "ArgsKwargsPair",
+    # torch.fx.passes.backends.cudagraphs
+    "CudaGraphsSupport",
+    # torch.fx.passes.dialect.common.cse_pass
+    "CSEPass",
+    # torch.fx.passes.fake_tensor_prop
+    "FakeTensorProp",
+    # torch.fx.passes.graph_drawer
+    "FxGraphDrawer",
+    # torch.fx.passes.graph_manipulation
+    "size_bytes",
+    # torch.fx.passes.infra.partitioner
+    "CapabilityBasedPartitioner",
+    "Partition",
+    # torch.fx.passes.infra.pass_base
+    "PassBase",
+    "PassResult",
+    # torch.fx.passes.infra.pass_manager
+    "PassManager",
+    # torch.fx.passes.net_min_base
+    "FxNetMinimizerBadModuleError",
+    "FxNetMinimizerResultMismatchError",
+    "FxNetMinimizerRunFuncError",
+    # torch.fx.passes.operator_support
+    "OpSupports",
+    "OperatorSupport",
+    "OperatorSupportBase",
+    # torch.fx.passes.pass_manager
+    "PassManager",
+    # torch.fx.passes.shape_prop
+    "ShapeProp",
+    # torch.fx.passes.split_module
+    "Partition",
+    # torch.fx.passes.split_utils
+    "Component",
+    # torch.fx.passes.splitter_base
+    "FxNetAccNodesFinder",
+    "FxNetSplitterInternalError",
+    "SplitResult",
+    "Subgraph",
+    # torch.fx.passes.tests.test_pass_manager
+    "TestPassManager",
+    # torch.fx.passes.tools_common
+    "FxNetAccFusionsFinder",
+    # torch.fx.passes.utils.common
+    "HolderModule",
+    # torch.fx.passes.utils.matcher_utils
+    "InternalMatch",
+    "SubgraphMatcher",
+    # torch.fx.passes.utils.source_matcher_utils
+    "SourcePartition",
+    # torch.fx.proxy
+    "Attribute",
+    "ParameterProxy",
+    "Proxy",
+    "Scope",
+    "ScopeContextManager",
+    "TraceError",
+    "TracerBase",
+    # torch.fx.subgraph_rewriter
+    "Match",
+    "ReplacedPatterns",
+    # torch.jit.annotations
+    "EvalEnv",
+    "Module",
+    # torch.jit.frontend
+    "Builder",
+    "ExprBuilder",
+    "FrontendError",
+    "FrontendTypeError",
+    "NotSupportedError",
+    "StmtBuilder",
+    "UnsupportedNodeError",
+    "WithItemBuilder",
+    # torch.masked.maskedtensor.core
+    "MaskedTensor",
+    # torch.multiprocessing.pool
+    "Pool",
+    # torch.multiprocessing.queue
+    "ConnectionWrapper",
+    "Queue",
+    "SimpleQueue",
+    # torch.multiprocessing.reductions
+    "SharedCache",
+    # torch.multiprocessing.spawn
+    "ProcessContext",
+    "ProcessException",
+    "ProcessExitedException",
+    "ProcessRaisedException",
+    "SpawnContext",
+    # torch.nn.cpp
+    "ModuleWrapper",
+    "OrderedDictWrapper",
+    # torch.nn.modules.activation
+    "CELU",
+    "ELU",
+    "GELU",
+    "GLU",
+    "Hardshrink",
+    "Hardsigmoid",
+    "Hardswish",
+    "Hardtanh",
+    "LeakyReLU",
+    "LogSigmoid",
+    "LogSoftmax",
+    "Mish",
+    "MultiheadAttention",
+    "PReLU",
+    "RReLU",
+    "ReLU",
+    "ReLU6",
+    "SELU",
+    "SiLU",
+    "Sigmoid",
+    "Softmax",
+    "Softmax2d",
+    "Softmin",
+    "Softplus",
+    "Softshrink",
+    "Softsign",
+    "Tanh",
+    "Tanhshrink",
+    "Threshold",
+    # torch.nn.modules.adaptive
+    "AdaptiveLogSoftmaxWithLoss",
+    # torch.nn.modules.batchnorm
+    "SyncBatchNorm",
+    # torch.nn.modules.channelshuffle
+    "ChannelShuffle",
+    # torch.nn.modules.container
+    "Container",
+    "ModuleList",
+    "ParameterList",
+    "Sequential",
+    # torch.nn.modules.conv
+    "Conv1d",
+    "Conv2d",
+    "Conv3d",
+    "ConvTranspose1d",
+    "ConvTranspose2d",
+    "ConvTranspose3d",
+    # torch.nn.modules.distance
+    "CosineSimilarity",
+    "PairwiseDistance",
+    # torch.nn.modules.dropout
+    "AlphaDropout",
+    "Dropout",
+    "Dropout1d",
+    "Dropout2d",
+    "Dropout3d",
+    "FeatureAlphaDropout",
+    # torch.nn.modules.flatten
+    "Flatten",
+    "Unflatten",
+    # torch.nn.modules.fold
+    "Fold",
+    "Unfold",
+    # torch.nn.modules.linear
+    "Bilinear",
+    "Identity",
+    "LazyLinear",
+    "Linear",
+    "NonDynamicallyQuantizableLinear",
+    # torch.nn.modules.loss
+    "BCELoss",
+    "BCEWithLogitsLoss",
+    "CTCLoss",
+    "CosineEmbeddingLoss",
+    "CrossEntropyLoss",
+    "GaussianNLLLoss",
+    "HingeEmbeddingLoss",
+    "HuberLoss",
+    "KLDivLoss",
+    "L1Loss",
+    "MSELoss",
+    "MarginRankingLoss",
+    "MultiLabelMarginLoss",
+    "MultiLabelSoftMarginLoss",
+    "MultiMarginLoss",
+    "NLLLoss",
+    "NLLLoss2d",
+    "PoissonNLLLoss",
+    "SmoothL1Loss",
+    "SoftMarginLoss",
+    "TripletMarginLoss",
+    "TripletMarginWithDistanceLoss",
+    # torch.nn.modules.module
+    "Module",
+    # torch.nn.modules.normalization
+    "CrossMapLRN2d",
+    "GroupNorm",
+    "LayerNorm",
+    "LocalResponseNorm",
+    # torch.nn.modules.padding
+    "CircularPad1d",
+    "CircularPad2d",
+    "CircularPad3d",
+    "ZeroPad1d",
+    "ZeroPad2d",
+    "ZeroPad3d",
+    # torch.nn.modules.pixelshuffle
+    "PixelShuffle",
+    "PixelUnshuffle",
+    # torch.nn.modules.pooling
+    "AdaptiveAvgPool1d",
+    "AdaptiveAvgPool2d",
+    "AdaptiveAvgPool3d",
+    "AdaptiveMaxPool1d",
+    "AdaptiveMaxPool2d",
+    "AdaptiveMaxPool3d",
+    "AvgPool1d",
+    "AvgPool2d",
+    "AvgPool3d",
+    "FractionalMaxPool2d",
+    "FractionalMaxPool3d",
+    "LPPool1d",
+    "LPPool2d",
+    "MaxPool1d",
+    "MaxPool2d",
+    "MaxPool3d",
+    "MaxUnpool1d",
+    "MaxUnpool2d",
+    "MaxUnpool3d",
+    # torch.nn.modules.rnn
+    "GRU",
+    "GRUCell",
+    "LSTM",
+    "LSTMCell",
+    "RNN",
+    "RNNBase",
+    "RNNCell",
+    "RNNCellBase",
+    # torch.nn.modules.sparse
+    "Embedding",
+    "EmbeddingBag",
+    # torch.nn.modules.upsampling
+    "Upsample",
+    # torch.nn.parallel.data_parallel
+    "DataParallel",
+    # torch.nn.parallel.distributed
+    "DistributedDataParallel",
+    # torch.nn.parameter
+    "UninitializedTensorMixin",
+    # torch.nn.utils.parametrize
+    "ParametrizationList",
+    # torch.nn.utils.prune
+    "CustomFromMask",
+    "Identity",
+    "L1Unstructured",
+    "RandomUnstructured",
+    # torch.nn.utils.rnn
+    "PackedSequence",
+    "PackedSequence_",
+    # torch.nn.utils.spectral_norm
+    "SpectralNorm",
+    "SpectralNormLoadStateDictPreHook",
+    "SpectralNormStateDictHook",
+    # torch.nn.utils.weight_norm
+    "WeightNorm",
+    # torch.onnx.errors
+    "OnnxExporterError",
+    "OnnxExporterWarning",
+    "SymbolicValueError",
+    "UnsupportedOperatorError",
+    # torch.onnx.verification
+    "OnnxBackend",
+    "OnnxTestCaseRepro",
+    # torch.optim.adadelta
+    "Adadelta",
+    # torch.optim.adagrad
+    "Adagrad",
+    # torch.optim.adam
+    "Adam",
+    # torch.optim.adamax
+    "Adamax",
+    # torch.optim.adamw
+    "AdamW",
+    # torch.optim.asgd
+    "ASGD",
+    # torch.optim.lbfgs
+    "LBFGS",
+    # torch.optim.lr_scheduler
+    "ChainedScheduler",
+    "ConstantLR",
+    "CosineAnnealingLR",
+    "CosineAnnealingWarmRestarts",
+    "CyclicLR",
+    "ExponentialLR",
+    "LRScheduler",
+    "LambdaLR",
+    "LinearLR",
+    "MultiStepLR",
+    "MultiplicativeLR",
+    "OneCycleLR",
+    "PolynomialLR",
+    "ReduceLROnPlateau",
+    "SequentialLR",
+    "StepLR",
+    # torch.optim.nadam
+    "NAdam",
+    # torch.optim.optimizer
+    "Optimizer",
+    # torch.optim.radam
+    "RAdam",
+    # torch.optim.rmsprop
+    "RMSprop",
+    # torch.optim.rprop
+    "Rprop",
+    # torch.optim.sgd
+    "SGD",
+    # torch.optim.sparse_adam
+    "SparseAdam",
+    # torch.optim.swa_utils
+    "AveragedModel",
+    "SWALR",
+    # torch.overrides
+    "BaseTorchFunctionMode",
+    "TorchFunctionMode",
+    # torch.package.file_structure_representation
+    "Directory",
+    # torch.package.glob_group
+    "GlobGroup",
+    # torch.package.importer
+    "Importer",
+    "ObjMismatchError",
+    "ObjNotFoundError",
+    "OrderedImporter",
+    # torch.package.package_exporter
+    "PackageExporter",
+    "PackagingErrorReason",
+    # torch.package.package_importer
+    "PackageImporter",
+    # torch.profiler.profiler
+    "ExecutionTraceObserver",
+    "profile",
+    # torch.return_types
+    "aminmax",
+    "aminmax_out",
+    "cummax",
+    "cummax_out",
+    "cummin",
+    "cummin_out",
+    "frexp",
+    "frexp_out",
+    "geqrf",
+    "geqrf_out",
+    "histogram",
+    "histogram_out",
+    "histogramdd",
+    "kthvalue",
+    "kthvalue_out",
+    "linalg_cholesky_ex",
+    "linalg_cholesky_ex_out",
+    "linalg_eig",
+    "linalg_eig_out",
+    "linalg_eigh",
+    "linalg_eigh_out",
+    "linalg_inv_ex",
+    "linalg_inv_ex_out",
+    "linalg_ldl_factor",
+    "linalg_ldl_factor_ex",
+    "linalg_ldl_factor_ex_out",
+    "linalg_ldl_factor_out",
+    "linalg_lstsq",
+    "linalg_lstsq_out",
+    "linalg_lu",
+    "linalg_lu_factor",
+    "linalg_lu_factor_ex",
+    "linalg_lu_factor_ex_out",
+    "linalg_lu_factor_out",
+    "linalg_lu_out",
+    "linalg_qr",
+    "linalg_qr_out",
+    "linalg_slogdet",
+    "linalg_slogdet_out",
+    "linalg_solve_ex",
+    "linalg_solve_ex_out",
+    "linalg_svd",
+    "linalg_svd_out",
+    "lu_unpack",
+    "lu_unpack_out",
+    "max",
+    "max_out",
+    "median",
+    "median_out",
+    "min",
+    "min_out",
+    "mode",
+    "mode_out",
+    "nanmedian",
+    "nanmedian_out",
+    "qr",
+    "qr_out",
+    "slogdet",
+    "slogdet_out",
+    "sort",
+    "sort_out",
+    "svd",
+    "svd_out",
+    "topk",
+    "topk_out",
+    "triangular_solve",
+    "triangular_solve_out",
+    # torch.serialization
+    "LoadEndianness",
+    "SourceChangeWarning",
+    # torch.sparse.semi_structured
+    "SparseSemiStructuredTensor",
+    # torch.storage
+    "UntypedStorage",
+    # torch.torch_version
+    "TorchVersion",
+    # torch.types
+    "SymInt",
+    # torch.utils.benchmark.examples.blas_compare_setup
+    "SubEnvSpec",
+    # torch.utils.benchmark.examples.compare
+    "FauxTorch",
+    # torch.utils.benchmark.examples.spectral_ops_fuzz_test
+    "Benchmark",
+    # torch.utils.benchmark.op_fuzzers.binary
+    "BinaryOpFuzzer",
+    # torch.utils.benchmark.op_fuzzers.sparse_binary
+    "BinaryOpSparseFuzzer",
+    # torch.utils.benchmark.op_fuzzers.sparse_unary
+    "UnaryOpSparseFuzzer",
+    # torch.utils.benchmark.op_fuzzers.spectral
+    "SpectralOpFuzzer",
+    # torch.utils.benchmark.op_fuzzers.unary
+    "UnaryOpFuzzer",
+    # torch.utils.benchmark.utils.common
+    "Measurement",
+    "TaskSpec",
+    # torch.utils.benchmark.utils.compare
+    "Colorize",
+    "Compare",
+    "Table",
+    # torch.utils.benchmark.utils.fuzzer
+    "FuzzedParameter",
+    "FuzzedTensor",
+    "Fuzzer",
+    "ParameterAlias",
+    # torch.utils.benchmark.utils.sparse_fuzzer
+    "FuzzedSparseTensor",
+    # torch.utils.benchmark.utils.timer
+    "CPPTimer",
+    "Language",
+    "Timer",
+    # torch.utils.benchmark.utils.valgrind_wrapper.timer_interface
+    "CallgrindStats",
+    "CopyIfCallgrind",
+    "FunctionCount",
+    "FunctionCounts",
+    "GlobalsBridge",
+    "Serialization",
+    # torch.utils.bundled_inputs
+    "InflatableArg",
+    # torch.utils.checkpoint
+    "CheckpointError",
+    "CheckpointFunction",
+    "DefaultDeviceType",
+    # torch.utils.collect_env
+    "SystemEnv",
+    # torch.utils.cpp_extension
+    "BuildExtension",
+    # torch.utils.data.dataloader
+    "DataLoader",
+    # torch.utils.data.datapipes.dataframe.dataframe_wrapper
+    "PandasWrapper",
+    "default_wrapper",
+    # torch.utils.data.datapipes.dataframe.dataframes
+    "Capture",
+    "CaptureA",
+    "CaptureAdd",
+    "CaptureCall",
+    "CaptureControl",
+    "CaptureDataFrame",
+    "CaptureDataFrameWithDataPipeOps",
+    "CaptureF",
+    "CaptureGetAttr",
+    "CaptureGetItem",
+    "CaptureInitial",
+    "CaptureLikeMock",
+    "CaptureMul",
+    "CaptureSetItem",
+    "CaptureSub",
+    "CaptureVariable",
+    "CaptureVariableAssign",
+    "DataFrameTracedOps",
+    "DataFrameTracer",
+    # torch.utils.data.datapipes.dataframe.datapipes
+    "ConcatDataFramesPipe",
+    "DataFramesAsTuplesPipe",
+    "ExampleAggregateAsDataFrames",
+    "FilterDataFramesPipe",
+    "PerRowDataFramesPipe",
+    "ShuffleDataFramesPipe",
+    # torch.utils.data.datapipes.dataframe.structures
+    "DataChunkDF",
+    # torch.utils.data.datapipes.datapipe
+    "DFIterDataPipe",
+    "DataChunk",
+    "IterDataPipe",
+    "MapDataPipe",
+    # torch.utils.data.datapipes.iter.callable
+    "CollatorIterDataPipe",
+    "MapperIterDataPipe",
+    # torch.utils.data.datapipes.iter.combinatorics
+    "SamplerIterDataPipe",
+    "ShufflerIterDataPipe",
+    # torch.utils.data.datapipes.iter.combining
+    "ConcaterIterDataPipe",
+    "DemultiplexerIterDataPipe",
+    "ForkerIterDataPipe",
+    "MultiplexerIterDataPipe",
+    "ZipperIterDataPipe",
+    # torch.utils.data.datapipes.iter.filelister
+    "FileListerIterDataPipe",
+    # torch.utils.data.datapipes.iter.fileopener
+    "FileOpenerIterDataPipe",
+    # torch.utils.data.datapipes.iter.grouping
+    "BatcherIterDataPipe",
+    "GrouperIterDataPipe",
+    "UnBatcherIterDataPipe",
+    # torch.utils.data.datapipes.iter.routeddecoder
+    "RoutedDecoderIterDataPipe",
+    # torch.utils.data.datapipes.iter.selecting
+    "FilterIterDataPipe",
+    # torch.utils.data.datapipes.iter.sharding
+    "SHARDING_PRIORITIES",
+    "ShardingFilterIterDataPipe",
+    # torch.utils.data.datapipes.iter.utils
+    "IterableWrapperIterDataPipe",
+    # torch.utils.data.datapipes.map.callable
+    "MapperMapDataPipe",
+    # torch.utils.data.datapipes.map.combinatorics
+    "ShufflerIterDataPipe",
+    # torch.utils.data.datapipes.map.combining
+    "ConcaterMapDataPipe",
+    "ZipperMapDataPipe",
+    # torch.utils.data.datapipes.map.grouping
+    "BatcherMapDataPipe",
+    # torch.utils.data.datapipes.map.utils
+    "SequenceWrapperMapDataPipe",
+    # torch.utils.data.datapipes.utils.decoder
+    "Decoder",
+    "ImageHandler",
+    "MatHandler",
+    # torch.utils.data.dataset
+    "ConcatDataset",
+    # torch.utils.data.distributed
+    "DistributedSampler",
+    # torch.utils.dlpack
+    "DLDeviceType",
+    # torch.utils.file_baton
+    "FileBaton",
+    # torch.utils.flop_counter
+    "FlopCounterMode",
+    # torch.utils.hipify.hipify_python
+    "CurrentState",
+    "GeneratedFileCleaner",
+    "HipifyResult",
+    "InputError",
+    "Trie",
+    "bcolors",
+    # torch.utils.hooks
+    "BackwardHook",
+    "RemovableHandle",
+    # torch.utils.mkldnn
+    "MkldnnBatchNorm",
+    "MkldnnConv1d",
+    "MkldnnConv2d",
+    "MkldnnConv3d",
+    "MkldnnLinear",
+    "MkldnnPrelu",
+    # torch.utils.mobile_optimizer
+    "LintCode",
+    # torch.utils.show_pickle
+    "DumpUnpickler",
+    "FakeClass",
+    "FakeObject",
+    # torch.utils.tensorboard.writer
+    "FileWriter",
+    "SummaryWriter",
+    # torch.utils.throughput_benchmark
+    "ExecutionStats",
+    # torch.utils.weak
+    "WeakIdKeyDictionary",
+    "WeakIdRef",
+    "WeakTensorKeyDictionary",
 ]
 
 # The suffix(es) of source filenames.
@@ -515,7 +3662,7 @@ def coverage_post_process(app, exception):
     for _, modname, ispkg in pkgutil.walk_packages(
         path=torch.__path__, prefix=torch.__name__ + "."
     ):
-        if ispkg and is_not_internal(modname):
+        if is_not_internal(modname):
             if modname not in modules:
                 missing.add(modname)
 
diff --git a/docs/source/cuda.rst b/docs/source/cuda.rst
index e208da759dec..33b697a288f8 100644
--- a/docs/source/cuda.rst
+++ b/docs/source/cuda.rst
@@ -153,3 +153,18 @@ See the :doc:`documentation <cuda._sanitizer>` for information on how to use it.
     :hidden:
 
     cuda._sanitizer
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.cuda.comm
+.. py:module:: torch.cuda.error
+.. py:module:: torch.cuda.graphs
+.. py:module:: torch.cuda.jiterator
+.. py:module:: torch.cuda.memory
+.. py:module:: torch.cuda.nccl
+.. py:module:: torch.cuda.nvtx
+.. py:module:: torch.cuda.profiler
+.. py:module:: torch.cuda.random
+.. py:module:: torch.cuda.sparse
+.. py:module:: torch.cuda.streams
\ No newline at end of file
diff --git a/docs/source/distributed.rst b/docs/source/distributed.rst
index 9958b1cde1a2..a68f9fb307b8 100644
--- a/docs/source/distributed.rst
+++ b/docs/source/distributed.rst
@@ -874,3 +874,118 @@ Distributed components raise custom Exception types derived from `RuntimeError`:
 .. py:module:: torch.distributed.pipeline.sync
 .. py:module:: torch.distributed.pipeline.sync.skip
 .. py:module:: torch.distributed.tensor
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.ddp_zero_hook
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.debugging_hooks
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.default_hooks
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.mixed_precision_hooks
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.optimizer_overlap_hooks
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.post_localSGD_hook
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.powerSGD_hook
+.. py:module:: torch.distributed.algorithms.ddp_comm_hooks.quantization_hooks
+.. py:module:: torch.distributed.algorithms.join
+.. py:module:: torch.distributed.algorithms.model_averaging.averagers
+.. py:module:: torch.distributed.algorithms.model_averaging.hierarchical_model_averager
+.. py:module:: torch.distributed.algorithms.model_averaging.utils
+.. py:module:: torch.distributed.argparse_util
+.. py:module:: torch.distributed.c10d_logger
+.. py:module:: torch.distributed.checkpoint.api
+.. py:module:: torch.distributed.checkpoint.default_planner
+.. py:module:: torch.distributed.checkpoint.filesystem
+.. py:module:: torch.distributed.checkpoint.metadata
+.. py:module:: torch.distributed.checkpoint.optimizer
+.. py:module:: torch.distributed.checkpoint.planner
+.. py:module:: torch.distributed.checkpoint.planner_helpers
+.. py:module:: torch.distributed.checkpoint.resharding
+.. py:module:: torch.distributed.checkpoint.state_dict_loader
+.. py:module:: torch.distributed.checkpoint.state_dict_saver
+.. py:module:: torch.distributed.checkpoint.storage
+.. py:module:: torch.distributed.checkpoint.utils
+.. py:module:: torch.distributed.collective_utils
+.. py:module:: torch.distributed.constants
+.. py:module:: torch.distributed.distributed_c10d
+.. py:module:: torch.distributed.elastic.agent.server.api
+.. py:module:: torch.distributed.elastic.agent.server.local_elastic_agent
+.. py:module:: torch.distributed.elastic.events.api
+.. py:module:: torch.distributed.elastic.events.handlers
+.. py:module:: torch.distributed.elastic.metrics.api
+.. py:module:: torch.distributed.elastic.multiprocessing.api
+.. py:module:: torch.distributed.elastic.multiprocessing.errors.error_handler
+.. py:module:: torch.distributed.elastic.multiprocessing.errors.handlers
+.. py:module:: torch.distributed.elastic.multiprocessing.redirects
+.. py:module:: torch.distributed.elastic.multiprocessing.tail_log
+.. py:module:: torch.distributed.elastic.rendezvous.api
+.. py:module:: torch.distributed.elastic.rendezvous.c10d_rendezvous_backend
+.. py:module:: torch.distributed.elastic.rendezvous.dynamic_rendezvous
+.. py:module:: torch.distributed.elastic.rendezvous.etcd_rendezvous
+.. py:module:: torch.distributed.elastic.rendezvous.etcd_rendezvous_backend
+.. py:module:: torch.distributed.elastic.rendezvous.etcd_server
+.. py:module:: torch.distributed.elastic.rendezvous.etcd_store
+.. py:module:: torch.distributed.elastic.rendezvous.static_tcp_rendezvous
+.. py:module:: torch.distributed.elastic.rendezvous.utils
+.. py:module:: torch.distributed.elastic.timer.api
+.. py:module:: torch.distributed.elastic.timer.file_based_local_timer
+.. py:module:: torch.distributed.elastic.timer.local_timer
+.. py:module:: torch.distributed.elastic.utils.api
+.. py:module:: torch.distributed.elastic.utils.data.cycling_iterator
+.. py:module:: torch.distributed.elastic.utils.data.elastic_distributed_sampler
+.. py:module:: torch.distributed.elastic.utils.distributed
+.. py:module:: torch.distributed.elastic.utils.log_level
+.. py:module:: torch.distributed.elastic.utils.logging
+.. py:module:: torch.distributed.elastic.utils.store
+.. py:module:: torch.distributed.fsdp.api
+.. py:module:: torch.distributed.fsdp.fully_sharded_data_parallel
+.. py:module:: torch.distributed.fsdp.sharded_grad_scaler
+.. py:module:: torch.distributed.fsdp.wrap
+.. py:module:: torch.distributed.launcher.api
+.. py:module:: torch.distributed.logging_handlers
+.. py:module:: torch.distributed.nn.api.remote_module
+.. py:module:: torch.distributed.nn.functional
+.. py:module:: torch.distributed.nn.jit.instantiator
+.. py:module:: torch.distributed.nn.jit.templates.remote_module_template
+.. py:module:: torch.distributed.optim.apply_optimizer_in_backward
+.. py:module:: torch.distributed.optim.functional_adadelta
+.. py:module:: torch.distributed.optim.functional_adagrad
+.. py:module:: torch.distributed.optim.functional_adam
+.. py:module:: torch.distributed.optim.functional_adamax
+.. py:module:: torch.distributed.optim.functional_adamw
+.. py:module:: torch.distributed.optim.functional_rmsprop
+.. py:module:: torch.distributed.optim.functional_rprop
+.. py:module:: torch.distributed.optim.functional_sgd
+.. py:module:: torch.distributed.optim.named_optimizer
+.. py:module:: torch.distributed.optim.optimizer
+.. py:module:: torch.distributed.optim.post_localSGD_optimizer
+.. py:module:: torch.distributed.optim.utils
+.. py:module:: torch.distributed.optim.zero_redundancy_optimizer
+.. py:module:: torch.distributed.pipeline.sync.batchnorm
+.. py:module:: torch.distributed.pipeline.sync.checkpoint
+.. py:module:: torch.distributed.pipeline.sync.copy
+.. py:module:: torch.distributed.pipeline.sync.dependency
+.. py:module:: torch.distributed.pipeline.sync.microbatch
+.. py:module:: torch.distributed.pipeline.sync.phony
+.. py:module:: torch.distributed.pipeline.sync.pipe
+.. py:module:: torch.distributed.pipeline.sync.pipeline
+.. py:module:: torch.distributed.pipeline.sync.skip.layout
+.. py:module:: torch.distributed.pipeline.sync.skip.namespace
+.. py:module:: torch.distributed.pipeline.sync.skip.portal
+.. py:module:: torch.distributed.pipeline.sync.skip.skippable
+.. py:module:: torch.distributed.pipeline.sync.skip.tracker
+.. py:module:: torch.distributed.pipeline.sync.stream
+.. py:module:: torch.distributed.pipeline.sync.utils
+.. py:module:: torch.distributed.pipeline.sync.worker
+.. py:module:: torch.distributed.remote_device
+.. py:module:: torch.distributed.rendezvous
+.. py:module:: torch.distributed.rpc.api
+.. py:module:: torch.distributed.rpc.backend_registry
+.. py:module:: torch.distributed.rpc.constants
+.. py:module:: torch.distributed.rpc.functions
+.. py:module:: torch.distributed.rpc.internal
+.. py:module:: torch.distributed.rpc.options
+.. py:module:: torch.distributed.rpc.rref_proxy
+.. py:module:: torch.distributed.rpc.server_process_global_profiler
+.. py:module:: torch.distributed.tensor.parallel.api
+.. py:module:: torch.distributed.tensor.parallel.ddp
+.. py:module:: torch.distributed.tensor.parallel.fsdp
+.. py:module:: torch.distributed.tensor.parallel.input_reshard
+.. py:module:: torch.distributed.tensor.parallel.style
+.. py:module:: torch.distributed.utils
+.. py:module:: torch.distributed.checkpoint.state_dict
diff --git a/docs/source/distributions.rst b/docs/source/distributions.rst
index 14b620b5a3cc..bad68cdc6d0b 100644
--- a/docs/source/distributions.rst
+++ b/docs/source/distributions.rst
@@ -394,3 +394,47 @@ Probability distributions - torch.distributions
 .. automodule:: torch.distributions.constraint_registry
     :members:
     :member-order: bysource
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.distributions.bernoulli
+.. py:module:: torch.distributions.beta
+.. py:module:: torch.distributions.binomial
+.. py:module:: torch.distributions.categorical
+.. py:module:: torch.distributions.cauchy
+.. py:module:: torch.distributions.chi2
+.. py:module:: torch.distributions.continuous_bernoulli
+.. py:module:: torch.distributions.dirichlet
+.. py:module:: torch.distributions.distribution
+.. py:module:: torch.distributions.exp_family
+.. py:module:: torch.distributions.exponential
+.. py:module:: torch.distributions.fishersnedecor
+.. py:module:: torch.distributions.gamma
+.. py:module:: torch.distributions.geometric
+.. py:module:: torch.distributions.gumbel
+.. py:module:: torch.distributions.half_cauchy
+.. py:module:: torch.distributions.half_normal
+.. py:module:: torch.distributions.independent
+.. py:module:: torch.distributions.kumaraswamy
+.. py:module:: torch.distributions.laplace
+.. py:module:: torch.distributions.lkj_cholesky
+.. py:module:: torch.distributions.log_normal
+.. py:module:: torch.distributions.logistic_normal
+.. py:module:: torch.distributions.lowrank_multivariate_normal
+.. py:module:: torch.distributions.mixture_same_family
+.. py:module:: torch.distributions.multinomial
+.. py:module:: torch.distributions.multivariate_normal
+.. py:module:: torch.distributions.negative_binomial
+.. py:module:: torch.distributions.normal
+.. py:module:: torch.distributions.one_hot_categorical
+.. py:module:: torch.distributions.pareto
+.. py:module:: torch.distributions.poisson
+.. py:module:: torch.distributions.relaxed_bernoulli
+.. py:module:: torch.distributions.relaxed_categorical
+.. py:module:: torch.distributions.studentT
+.. py:module:: torch.distributions.transformed_distribution
+.. py:module:: torch.distributions.uniform
+.. py:module:: torch.distributions.utils
+.. py:module:: torch.distributions.von_mises
+.. py:module:: torch.distributions.weibull
+.. py:module:: torch.distributions.wishart
\ No newline at end of file
diff --git a/docs/source/export.rst b/docs/source/export.rst
index b26a7534e6f0..ead18491bb6a 100644
--- a/docs/source/export.rst
+++ b/docs/source/export.rst
@@ -577,3 +577,8 @@ API Reference
 .. autoclass:: ExportGraphSignature
 .. autoclass:: ModuleCallSignature
 .. autoclass:: ModuleCallEntry
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.export.exported_program
diff --git a/docs/source/fx.rst b/docs/source/fx.rst
index 0aecbf38aaf7..44767cfee758 100644
--- a/docs/source/fx.rst
+++ b/docs/source/fx.rst
@@ -1125,3 +1125,75 @@ API Reference
 .. py:module:: torch.fx.experimental.migrate_gradual_types
 .. py:module:: torch.fx.passes.dialect
 .. py:module:: torch.fx.passes.dialect.common
+.. py:module:: torch.fx.annotate
+.. py:module:: torch.fx.config
+.. py:module:: torch.fx.experimental.accelerator_partitioner
+.. py:module:: torch.fx.experimental.const_fold
+.. py:module:: torch.fx.experimental.debug
+.. py:module:: torch.fx.experimental.graph_gradual_typechecker
+.. py:module:: torch.fx.experimental.merge_matmul
+.. py:module:: torch.fx.experimental.meta_tracer
+.. py:module:: torch.fx.experimental.migrate_gradual_types.constraint
+.. py:module:: torch.fx.experimental.migrate_gradual_types.constraint_generator
+.. py:module:: torch.fx.experimental.migrate_gradual_types.constraint_transformation
+.. py:module:: torch.fx.experimental.migrate_gradual_types.operation
+.. py:module:: torch.fx.experimental.migrate_gradual_types.transform_to_z3
+.. py:module:: torch.fx.experimental.migrate_gradual_types.util
+.. py:module:: torch.fx.experimental.migrate_gradual_types.z3_types
+.. py:module:: torch.fx.experimental.normalize
+.. py:module:: torch.fx.experimental.optimization
+.. py:module:: torch.fx.experimental.partitioner_utils
+.. py:module:: torch.fx.experimental.proxy_tensor
+.. py:module:: torch.fx.experimental.recording
+.. py:module:: torch.fx.experimental.refinement_types
+.. py:module:: torch.fx.experimental.rewriter
+.. py:module:: torch.fx.experimental.schema_type_annotation
+.. py:module:: torch.fx.experimental.symbolic_shapes
+.. py:module:: torch.fx.experimental.unification.core
+.. py:module:: torch.fx.experimental.unification.dispatch
+.. py:module:: torch.fx.experimental.unification.match
+.. py:module:: torch.fx.experimental.unification.more
+.. py:module:: torch.fx.experimental.unification.multipledispatch.conflict
+.. py:module:: torch.fx.experimental.unification.multipledispatch.core
+.. py:module:: torch.fx.experimental.unification.multipledispatch.dispatcher
+.. py:module:: torch.fx.experimental.unification.multipledispatch.utils
+.. py:module:: torch.fx.experimental.unification.multipledispatch.variadic
+.. py:module:: torch.fx.experimental.unification.unification_tools
+.. py:module:: torch.fx.experimental.unification.utils
+.. py:module:: torch.fx.experimental.unification.variable
+.. py:module:: torch.fx.experimental.unify_refinements
+.. py:module:: torch.fx.experimental.validator
+.. py:module:: torch.fx.graph
+.. py:module:: torch.fx.graph_module
+.. py:module:: torch.fx.immutable_collections
+.. py:module:: torch.fx.interpreter
+.. py:module:: torch.fx.node
+.. py:module:: torch.fx.operator_schemas
+.. py:module:: torch.fx.passes.annotate_getitem_nodes
+.. py:module:: torch.fx.passes.backends.cudagraphs
+.. py:module:: torch.fx.passes.dialect.common.cse_pass
+.. py:module:: torch.fx.passes.fake_tensor_prop
+.. py:module:: torch.fx.passes.graph_drawer
+.. py:module:: torch.fx.passes.graph_manipulation
+.. py:module:: torch.fx.passes.infra.partitioner
+.. py:module:: torch.fx.passes.infra.pass_base
+.. py:module:: torch.fx.passes.infra.pass_manager
+.. py:module:: torch.fx.passes.net_min_base
+.. py:module:: torch.fx.passes.operator_support
+.. py:module:: torch.fx.passes.param_fetch
+.. py:module:: torch.fx.passes.pass_manager
+.. py:module:: torch.fx.passes.reinplace
+.. py:module:: torch.fx.passes.shape_prop
+.. py:module:: torch.fx.passes.split_module
+.. py:module:: torch.fx.passes.split_utils
+.. py:module:: torch.fx.passes.splitter_base
+.. py:module:: torch.fx.passes.tests.test_pass_manager
+.. py:module:: torch.fx.passes.tools_common
+.. py:module:: torch.fx.passes.utils.common
+.. py:module:: torch.fx.passes.utils.fuser_utils
+.. py:module:: torch.fx.passes.utils.matcher_utils
+.. py:module:: torch.fx.passes.utils.source_matcher_utils
+.. py:module:: torch.fx.proxy
+.. py:module:: torch.fx.subgraph_rewriter
+.. py:module:: torch.fx.tensor_type
+.. py:module:: torch.fx.traceback
\ No newline at end of file
diff --git a/docs/source/jit.rst b/docs/source/jit.rst
index b9e43d1086d7..9d37c2a7d330 100644
--- a/docs/source/jit.rst
+++ b/docs/source/jit.rst
@@ -886,3 +886,7 @@ References
 .. This package is missing doc. Adding it here for coverage
 .. This does not add anything to the rendered page.
 .. py:module:: torch.jit.mobile
+.. py:module:: torch.jit.annotations
+.. py:module:: torch.jit.frontend
+.. py:module:: torch.jit.generate_bytecode
+.. py:module:: torch.jit.quantized
diff --git a/docs/source/library.rst b/docs/source/library.rst
index 7f5f9cfe005d..0cba2931de12 100644
--- a/docs/source/library.rst
+++ b/docs/source/library.rst
@@ -1,5 +1,6 @@
 torch.library
 ===================================
+.. py:module:: torch.library
 
 Python operator registration API provides capabilities for extending PyTorch's core library
 of operators with user defined operators. Currently, this can be done in two ways:
diff --git a/docs/source/masked.rst b/docs/source/masked.rst
index 139c267ac6ff..60dd67f643b8 100644
--- a/docs/source/masked.rst
+++ b/docs/source/masked.rst
@@ -295,3 +295,12 @@ The following ops are currently supported:
     Tensor.reshape
     Tensor.reshape_as
     Tensor.view
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.masked.maskedtensor.binary
+.. py:module:: torch.masked.maskedtensor.core
+.. py:module:: torch.masked.maskedtensor.creation
+.. py:module:: torch.masked.maskedtensor.passthrough
+.. py:module:: torch.masked.maskedtensor.reductions
+.. py:module:: torch.masked.maskedtensor.unary
\ No newline at end of file
diff --git a/docs/source/mps.rst b/docs/source/mps.rst
index 03ec57caa464..bff15666ff33 100644
--- a/docs/source/mps.rst
+++ b/docs/source/mps.rst
@@ -34,3 +34,9 @@ MPS Event
     :nosignatures:
 
     event.Event
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.mps.event
+.. py:module:: torch.mps.profiler
\ No newline at end of file
diff --git a/docs/source/multiprocessing.rst b/docs/source/multiprocessing.rst
index 2d324f3eead5..78218c2f7e9a 100644
--- a/docs/source/multiprocessing.rst
+++ b/docs/source/multiprocessing.rst
@@ -174,10 +174,23 @@ The ``spawn`` function below addresses these concerns and takes care
 of error propagation, out of order termination, and will actively
 terminate processes upon detecting an error in one of them.
 
+.. automodule:: torch.multiprocessing.spawn
+.. currentmodule:: torch.multiprocessing.spawn
+
 .. autofunction:: spawn
 
+.. currentmodule:: torch.multiprocessing
+
+
 .. class:: SpawnContext
 
    Returned by :func:`~spawn` when called with ``join=False``.
 
    .. automethod:: join
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.multiprocessing.pool
+.. py:module:: torch.multiprocessing.queue
+.. py:module:: torch.multiprocessing.reductions
diff --git a/docs/source/nn.rst b/docs/source/nn.rst
index 6e1b0f1a0377..123fa702ddb1 100644
--- a/docs/source/nn.rst
+++ b/docs/source/nn.rst
@@ -477,6 +477,55 @@ Lazy Modules Initialization
     nn.modules.lazy.LazyModuleMixin
 
 
-.. This module is kept only for backward compatibility
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
 .. py:module:: torch.nn.backends
 .. py:module:: torch.nn.utils.stateless
+.. py:module:: torch.nn.backends.thnn
+.. py:module:: torch.nn.common_types
+.. py:module:: torch.nn.cpp
+.. py:module:: torch.nn.functional
+.. py:module:: torch.nn.grad
+.. py:module:: torch.nn.init
+.. py:module:: torch.nn.modules.activation
+.. py:module:: torch.nn.modules.adaptive
+.. py:module:: torch.nn.modules.batchnorm
+.. py:module:: torch.nn.modules.channelshuffle
+.. py:module:: torch.nn.modules.container
+.. py:module:: torch.nn.modules.conv
+.. py:module:: torch.nn.modules.distance
+.. py:module:: torch.nn.modules.dropout
+.. py:module:: torch.nn.modules.flatten
+.. py:module:: torch.nn.modules.fold
+.. py:module:: torch.nn.modules.instancenorm
+.. py:module:: torch.nn.modules.lazy
+.. py:module:: torch.nn.modules.linear
+.. py:module:: torch.nn.modules.loss
+.. py:module:: torch.nn.modules.module
+.. py:module:: torch.nn.modules.normalization
+.. py:module:: torch.nn.modules.padding
+.. py:module:: torch.nn.modules.pixelshuffle
+.. py:module:: torch.nn.modules.pooling
+.. py:module:: torch.nn.modules.rnn
+.. py:module:: torch.nn.modules.sparse
+.. py:module:: torch.nn.modules.transformer
+.. py:module:: torch.nn.modules.upsampling
+.. py:module:: torch.nn.modules.utils
+.. py:module:: torch.nn.parallel.comm
+.. py:module:: torch.nn.parallel.data_parallel
+.. py:module:: torch.nn.parallel.distributed
+.. py:module:: torch.nn.parallel.parallel_apply
+.. py:module:: torch.nn.parallel.replicate
+.. py:module:: torch.nn.parallel.scatter_gather
+.. py:module:: torch.nn.parameter
+.. py:module:: torch.nn.utils.clip_grad
+.. py:module:: torch.nn.utils.convert_parameters
+.. py:module:: torch.nn.utils.fusion
+.. py:module:: torch.nn.utils.init
+.. py:module:: torch.nn.utils.memory_format
+.. py:module:: torch.nn.utils.parametrizations
+.. py:module:: torch.nn.utils.parametrize
+.. py:module:: torch.nn.utils.prune
+.. py:module:: torch.nn.utils.rnn
+.. py:module:: torch.nn.utils.spectral_norm
+.. py:module:: torch.nn.utils.weight_norm
\ No newline at end of file
diff --git a/docs/source/onnx.rst b/docs/source/onnx.rst
index 56dc2d308df0..9a8f76691369 100644
--- a/docs/source/onnx.rst
+++ b/docs/source/onnx.rst
@@ -62,3 +62,24 @@ also be interested in reading our `development wiki <https://github.com/pytorch/
     onnx_dynamo
     onnx_dynamo_onnxruntime_backend
     onnx_torchscript
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.onnx.errors
+.. py:module:: torch.onnx.operators
+.. py:module:: torch.onnx.symbolic_caffe2
+.. py:module:: torch.onnx.symbolic_helper
+.. py:module:: torch.onnx.symbolic_opset10
+.. py:module:: torch.onnx.symbolic_opset11
+.. py:module:: torch.onnx.symbolic_opset13
+.. py:module:: torch.onnx.symbolic_opset14
+.. py:module:: torch.onnx.symbolic_opset15
+.. py:module:: torch.onnx.symbolic_opset16
+.. py:module:: torch.onnx.symbolic_opset17
+.. py:module:: torch.onnx.symbolic_opset18
+.. py:module:: torch.onnx.symbolic_opset7
+.. py:module:: torch.onnx.symbolic_opset8
+.. py:module:: torch.onnx.symbolic_opset9
+.. py:module:: torch.onnx.utils
+.. py:module:: torch.onnx.verification
+.. py:module:: torch.onnx.symbolic_opset12
\ No newline at end of file
diff --git a/docs/source/optim.rst b/docs/source/optim.rst
index d080cd1d0fa3..4ce1fcdf8f00 100644
--- a/docs/source/optim.rst
+++ b/docs/source/optim.rst
@@ -409,3 +409,23 @@ We train the model for a total of 300 epochs and start to collect EMA averages i
 >>> torch.optim.swa_utils.update_bn(loader, ema_model)
 >>> # Use ema_model to make predictions on test data
 >>> preds = ema_model(test_input)
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.optim.adadelta
+.. py:module:: torch.optim.adagrad
+.. py:module:: torch.optim.adam
+.. py:module:: torch.optim.adamax
+.. py:module:: torch.optim.adamw
+.. py:module:: torch.optim.asgd
+.. py:module:: torch.optim.lbfgs
+.. py:module:: torch.optim.lr_scheduler
+.. py:module:: torch.optim.nadam
+.. py:module:: torch.optim.optimizer
+.. py:module:: torch.optim.radam
+.. py:module:: torch.optim.rmsprop
+.. py:module:: torch.optim.rprop
+.. py:module:: torch.optim.sgd
+.. py:module:: torch.optim.sparse_adam
+.. py:module:: torch.optim.swa_utils
\ No newline at end of file
diff --git a/docs/source/package.rst b/docs/source/package.rst
index 9dc85cc1c662..d8d6e3e28f1f 100644
--- a/docs/source/package.rst
+++ b/docs/source/package.rst
@@ -515,7 +515,7 @@ The ``torch.package`` format makes no guarantees about the contents of ``.data/`
 Currently, the ``.data/`` directory contains the following items:
 
 * ``version``: a version number for the serialized format, so that the ``torch.package`` import infrastructures knows how to load this package.
-* ``extern_modules``: a list of modules that are considered ``extern:class:`PackageImporter`. ``extern`` modules will be imported using the loading environment’s system importer.
+* ``extern_modules``: a list of modules that are considered ``extern``. ``extern`` modules will be imported using the loading environment’s system importer.
 * ``*.storage``: serialized tensor data.
 
 
@@ -817,3 +817,16 @@ API Reference
 
 .. autoclass:: torch.package.Directory
   :members:
+
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.package.analyze.find_first_use_of_broken_modules
+.. py:module:: torch.package.analyze.is_from_package
+.. py:module:: torch.package.analyze.trace_dependencies
+.. py:module:: torch.package.file_structure_representation
+.. py:module:: torch.package.find_file_dependencies
+.. py:module:: torch.package.glob_group
+.. py:module:: torch.package.importer
+.. py:module:: torch.package.package_exporter
+.. py:module:: torch.package.package_importer
diff --git a/docs/source/profiler.rst b/docs/source/profiler.rst
index fa76234eff6f..38871882fa2a 100644
--- a/docs/source/profiler.rst
+++ b/docs/source/profiler.rst
@@ -37,3 +37,9 @@ Intel Instrumentation and Tracing Technology APIs
 .. autofunction:: torch.profiler.itt.range_push
 
 .. autofunction:: torch.profiler.itt.range_pop
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.profiler.itt
+.. py:module:: torch.profiler.profiler
+.. py:module:: torch.profiler.python_tracer
\ No newline at end of file
diff --git a/docs/source/quantization-support.rst b/docs/source/quantization-support.rst
index f54888ec38cc..677816b58a35 100644
--- a/docs/source/quantization-support.rst
+++ b/docs/source/quantization-support.rst
@@ -31,7 +31,7 @@ Preparing model for quantization
     :nosignatures:
     :template: classtemplate.rst
 
-    fuse_modules
+    fuse_modules.fuse_modules
     QuantStub
     DeQuantStub
     QuantWrapper
diff --git a/docs/source/quantization.rst b/docs/source/quantization.rst
index dc9e4bc26579..ba6b14c772e8 100644
--- a/docs/source/quantization.rst
+++ b/docs/source/quantization.rst
@@ -427,7 +427,7 @@ to do the following in addition:
    determine output quantization parameters.
 3. Fuse modules: combine operations/modules into a single module to obtain
    higher accuracy and performance. This is done using the
-   :func:`~torch.ao.quantization.fuse_modules` API, which takes in lists of modules
+   :func:`~torch.ao.quantization.fuse_modules.fuse_modules` API, which takes in lists of modules
    to be fused. We currently support the following fusions:
    [Conv, Relu], [Conv, BatchNorm], [Conv, BatchNorm, Relu], [Linear, Relu]
 
@@ -1246,3 +1246,159 @@ Please take a look at `Limitations of Symbolic Tracing <https://pytorch.org/docs
 .. py:module:: torch.ao.pruning
 .. py:module:: torch.ao.pruning.scheduler
 .. py:module:: torch.ao.pruning.sparsifier
+.. py:module:: torch.ao.nn.intrinsic.modules.fused
+.. py:module:: torch.ao.nn.intrinsic.qat.modules.conv_fused
+.. py:module:: torch.ao.nn.intrinsic.qat.modules.linear_fused
+.. py:module:: torch.ao.nn.intrinsic.qat.modules.linear_relu
+.. py:module:: torch.ao.nn.intrinsic.quantized.dynamic.modules.linear_relu
+.. py:module:: torch.ao.nn.intrinsic.quantized.modules.bn_relu
+.. py:module:: torch.ao.nn.intrinsic.quantized.modules.conv_add
+.. py:module:: torch.ao.nn.intrinsic.quantized.modules.conv_relu
+.. py:module:: torch.ao.nn.intrinsic.quantized.modules.linear_relu
+.. py:module:: torch.ao.nn.qat.dynamic.modules.linear
+.. py:module:: torch.ao.nn.qat.modules.conv
+.. py:module:: torch.ao.nn.qat.modules.embedding_ops
+.. py:module:: torch.ao.nn.qat.modules.linear
+.. py:module:: torch.ao.nn.quantizable.modules.activation
+.. py:module:: torch.ao.nn.quantizable.modules.rnn
+.. py:module:: torch.ao.nn.quantized.dynamic.modules.conv
+.. py:module:: torch.ao.nn.quantized.dynamic.modules.linear
+.. py:module:: torch.ao.nn.quantized.dynamic.modules.rnn
+.. py:module:: torch.ao.nn.quantized.modules.activation
+.. py:module:: torch.ao.nn.quantized.modules.batchnorm
+.. py:module:: torch.ao.nn.quantized.modules.conv
+.. py:module:: torch.ao.nn.quantized.modules.dropout
+.. py:module:: torch.ao.nn.quantized.modules.embedding_ops
+.. py:module:: torch.ao.nn.quantized.modules.functional_modules
+.. py:module:: torch.ao.nn.quantized.modules.linear
+.. py:module:: torch.ao.nn.quantized.modules.normalization
+.. py:module:: torch.ao.nn.quantized.modules.rnn
+.. py:module:: torch.ao.nn.quantized.modules.utils
+.. py:module:: torch.ao.nn.quantized.reference.modules.conv
+.. py:module:: torch.ao.nn.quantized.reference.modules.linear
+.. py:module:: torch.ao.nn.quantized.reference.modules.rnn
+.. py:module:: torch.ao.nn.quantized.reference.modules.sparse
+.. py:module:: torch.ao.nn.quantized.reference.modules.utils
+.. py:module:: torch.ao.nn.sparse.quantized.dynamic.linear
+.. py:module:: torch.ao.nn.sparse.quantized.linear
+.. py:module:: torch.ao.nn.sparse.quantized.utils
+.. py:module:: torch.ao.ns.fx.graph_matcher
+.. py:module:: torch.ao.ns.fx.graph_passes
+.. py:module:: torch.ao.ns.fx.mappings
+.. py:module:: torch.ao.ns.fx.n_shadows_utils
+.. py:module:: torch.ao.ns.fx.ns_types
+.. py:module:: torch.ao.ns.fx.pattern_utils
+.. py:module:: torch.ao.ns.fx.qconfig_multi_mapping
+.. py:module:: torch.ao.ns.fx.utils
+.. py:module:: torch.ao.ns.fx.weight_utils
+.. py:module:: torch.ao.pruning.scheduler.base_scheduler
+.. py:module:: torch.ao.pruning.scheduler.cubic_scheduler
+.. py:module:: torch.ao.pruning.scheduler.lambda_scheduler
+.. py:module:: torch.ao.pruning.sparsifier.base_sparsifier
+.. py:module:: torch.ao.pruning.sparsifier.nearly_diagonal_sparsifier
+.. py:module:: torch.ao.pruning.sparsifier.utils
+.. py:module:: torch.ao.pruning.sparsifier.weight_norm_sparsifier
+.. py:module:: torch.ao.quantization.backend_config.backend_config
+.. py:module:: torch.ao.quantization.backend_config.executorch
+.. py:module:: torch.ao.quantization.backend_config.fbgemm
+.. py:module:: torch.ao.quantization.backend_config.native
+.. py:module:: torch.ao.quantization.backend_config.observation_type
+.. py:module:: torch.ao.quantization.backend_config.onednn
+.. py:module:: torch.ao.quantization.backend_config.qnnpack
+.. py:module:: torch.ao.quantization.backend_config.tensorrt
+.. py:module:: torch.ao.quantization.backend_config.utils
+.. py:module:: torch.ao.quantization.backend_config.x86
+.. py:module:: torch.ao.quantization.fake_quantize
+.. py:module:: torch.ao.quantization.fuser_method_mappings
+.. py:module:: torch.ao.quantization.fuse_modules
+.. py:module:: torch.ao.quantization.fx.convert
+.. py:module:: torch.ao.quantization.fx.custom_config
+.. py:module:: torch.ao.quantization.fx.fuse
+.. py:module:: torch.ao.quantization.fx.fuse_handler
+.. py:module:: torch.ao.quantization.fx.graph_module
+.. py:module:: torch.ao.quantization.fx.lower_to_fbgemm
+.. py:module:: torch.ao.quantization.fx.lower_to_qnnpack
+.. py:module:: torch.ao.quantization.fx.lstm_utils
+.. py:module:: torch.ao.quantization.fx.match_utils
+.. py:module:: torch.ao.quantization.fx.pattern_utils
+.. py:module:: torch.ao.quantization.fx.prepare
+.. py:module:: torch.ao.quantization.fx.qconfig_mapping_utils
+.. py:module:: torch.ao.quantization.fx.quantize_handler
+.. py:module:: torch.ao.quantization.fx.tracer
+.. py:module:: torch.ao.quantization.fx.utils
+.. py:module:: torch.ao.quantization.observer
+.. py:module:: torch.ao.quantization.pt2e.duplicate_dq_pass
+.. py:module:: torch.ao.quantization.pt2e.eval_utils
+.. py:module:: torch.ao.quantization.pt2e.graph_utils
+.. py:module:: torch.ao.quantization.pt2e.port_metadata_pass
+.. py:module:: torch.ao.quantization.pt2e.prepare
+.. py:module:: torch.ao.quantization.pt2e.qat_utils
+.. py:module:: torch.ao.quantization.pt2e.representation.rewrite
+.. py:module:: torch.ao.quantization.pt2e.utils
+.. py:module:: torch.ao.quantization.qconfig
+.. py:module:: torch.ao.quantization.qconfig_mapping
+.. py:module:: torch.ao.quantization.quant_type
+.. py:module:: torch.ao.quantization.quantization_mappings
+.. py:module:: torch.ao.quantization.quantize
+.. py:module:: torch.ao.quantization.quantize_fx
+.. py:module:: torch.ao.quantization.quantize_jit
+.. py:module:: torch.ao.quantization.quantize_pt2e
+.. py:module:: torch.ao.quantization.quantizer.composable_quantizer
+.. py:module:: torch.ao.quantization.quantizer.embedding_quantizer
+.. py:module:: torch.ao.quantization.quantizer.quantizer
+.. py:module:: torch.ao.quantization.quantizer.utils
+.. py:module:: torch.ao.quantization.quantizer.x86_inductor_quantizer
+.. py:module:: torch.ao.quantization.quantizer.xnnpack_quantizer
+.. py:module:: torch.ao.quantization.quantizer.xnnpack_quantizer_utils
+.. py:module:: torch.ao.quantization.stubs
+.. py:module:: torch.ao.quantization.utils
+.. py:module:: torch.nn.intrinsic.modules.fused
+.. py:module:: torch.nn.intrinsic.qat.modules.conv_fused
+.. py:module:: torch.nn.intrinsic.qat.modules.linear_fused
+.. py:module:: torch.nn.intrinsic.qat.modules.linear_relu
+.. py:module:: torch.nn.intrinsic.quantized.dynamic.modules.linear_relu
+.. py:module:: torch.nn.intrinsic.quantized.modules.bn_relu
+.. py:module:: torch.nn.intrinsic.quantized.modules.conv_relu
+.. py:module:: torch.nn.intrinsic.quantized.modules.linear_relu
+.. py:module:: torch.nn.qat.dynamic.modules.linear
+.. py:module:: torch.nn.qat.modules.conv
+.. py:module:: torch.nn.qat.modules.embedding_ops
+.. py:module:: torch.nn.qat.modules.linear
+.. py:module:: torch.nn.quantizable.modules.activation
+.. py:module:: torch.nn.quantizable.modules.rnn
+.. py:module:: torch.nn.quantized.dynamic.modules.conv
+.. py:module:: torch.nn.quantized.dynamic.modules.linear
+.. py:module:: torch.nn.quantized.dynamic.modules.rnn
+.. py:module:: torch.nn.quantized.functional
+.. py:module:: torch.nn.quantized.modules.activation
+.. py:module:: torch.nn.quantized.modules.batchnorm
+.. py:module:: torch.nn.quantized.modules.conv
+.. py:module:: torch.nn.quantized.modules.dropout
+.. py:module:: torch.nn.quantized.modules.embedding_ops
+.. py:module:: torch.nn.quantized.modules.functional_modules
+.. py:module:: torch.nn.quantized.modules.linear
+.. py:module:: torch.nn.quantized.modules.normalization
+.. py:module:: torch.nn.quantized.modules.rnn
+.. py:module:: torch.nn.quantized.modules.utils
+.. py:module:: torch.quantization.fake_quantize
+.. py:module:: torch.quantization.fuse_modules
+.. py:module:: torch.quantization.fuser_method_mappings
+.. py:module:: torch.quantization.fx.convert
+.. py:module:: torch.quantization.fx.fuse
+.. py:module:: torch.quantization.fx.fusion_patterns
+.. py:module:: torch.quantization.fx.graph_module
+.. py:module:: torch.quantization.fx.match_utils
+.. py:module:: torch.quantization.fx.pattern_utils
+.. py:module:: torch.quantization.fx.prepare
+.. py:module:: torch.quantization.fx.quantization_patterns
+.. py:module:: torch.quantization.fx.quantization_types
+.. py:module:: torch.quantization.fx.utils
+.. py:module:: torch.quantization.observer
+.. py:module:: torch.quantization.qconfig
+.. py:module:: torch.quantization.quant_type
+.. py:module:: torch.quantization.quantization_mappings
+.. py:module:: torch.quantization.quantize
+.. py:module:: torch.quantization.quantize_fx
+.. py:module:: torch.quantization.quantize_jit
+.. py:module:: torch.quantization.stubs
+.. py:module:: torch.quantization.utils
\ No newline at end of file
diff --git a/docs/source/torch.overrides.rst b/docs/source/torch.overrides.rst
index 53e02de95ccd..5695372240fe 100644
--- a/docs/source/torch.overrides.rst
+++ b/docs/source/torch.overrides.rst
@@ -2,6 +2,7 @@
 
 torch.overrides
 ---------------
+.. py:module:: torch.overrides
 
 This module exposes various helper functions for the ``__torch_function__``
 protocol. See :ref:`extending-torch-python` for more details on the
diff --git a/docs/source/torch.rst b/docs/source/torch.rst
index 8a064a5c3080..4df084433df8 100644
--- a/docs/source/torch.rst
+++ b/docs/source/torch.rst
@@ -744,3 +744,13 @@ Operator Tags
 .. for tracking purposes
 .. py:module:: torch.utils.model_dump
 .. py:module:: torch.utils.viz
+.. py:module:: torch.functional
+.. py:module:: torch.quasirandom
+.. py:module:: torch.return_types
+.. py:module:: torch.serialization
+.. py:module:: torch.signal.windows.windows
+.. py:module:: torch.sparse.semi_structured
+.. py:module:: torch.storage
+.. py:module:: torch.torch_version
+.. py:module:: torch.types
+.. py:module:: torch.version
\ No newline at end of file
diff --git a/docs/source/utils.rst b/docs/source/utils.rst
index 7f26dcadb828..ee79d2580452 100644
--- a/docs/source/utils.rst
+++ b/docs/source/utils.rst
@@ -11,3 +11,79 @@ torch.utils
     generate_methods_for_privateuse1_backend
     get_cpp_backtrace
     set_module
+
+.. This module needs to be documented. Adding here in the meantime
+.. for tracking purposes
+.. py:module:: torch.utils.backend_registration
+.. py:module:: torch.utils.benchmark.examples.blas_compare_setup
+.. py:module:: torch.utils.benchmark.examples.compare
+.. py:module:: torch.utils.benchmark.examples.fuzzer
+.. py:module:: torch.utils.benchmark.examples.op_benchmark
+.. py:module:: torch.utils.benchmark.examples.simple_timeit
+.. py:module:: torch.utils.benchmark.examples.spectral_ops_fuzz_test
+.. py:module:: torch.utils.benchmark.op_fuzzers.binary
+.. py:module:: torch.utils.benchmark.op_fuzzers.sparse_binary
+.. py:module:: torch.utils.benchmark.op_fuzzers.sparse_unary
+.. py:module:: torch.utils.benchmark.op_fuzzers.spectral
+.. py:module:: torch.utils.benchmark.op_fuzzers.unary
+.. py:module:: torch.utils.benchmark.utils.common
+.. py:module:: torch.utils.benchmark.utils.compare
+.. py:module:: torch.utils.benchmark.utils.compile
+.. py:module:: torch.utils.benchmark.utils.cpp_jit
+.. py:module:: torch.utils.benchmark.utils.fuzzer
+.. py:module:: torch.utils.benchmark.utils.sparse_fuzzer
+.. py:module:: torch.utils.benchmark.utils.timer
+.. py:module:: torch.utils.benchmark.utils.valgrind_wrapper.timer_interface
+.. py:module:: torch.utils.bundled_inputs
+.. py:module:: torch.utils.checkpoint
+.. py:module:: torch.utils.collect_env
+.. py:module:: torch.utils.cpp_backtrace
+.. py:module:: torch.utils.cpp_extension
+.. py:module:: torch.utils.data.backward_compatibility
+.. py:module:: torch.utils.data.dataloader
+.. py:module:: torch.utils.data.datapipes.dataframe.dataframe_wrapper
+.. py:module:: torch.utils.data.datapipes.dataframe.dataframes
+.. py:module:: torch.utils.data.datapipes.dataframe.datapipes
+.. py:module:: torch.utils.data.datapipes.dataframe.structures
+.. py:module:: torch.utils.data.datapipes.datapipe
+.. py:module:: torch.utils.data.datapipes.gen_pyi
+.. py:module:: torch.utils.data.datapipes.iter.callable
+.. py:module:: torch.utils.data.datapipes.iter.combinatorics
+.. py:module:: torch.utils.data.datapipes.iter.combining
+.. py:module:: torch.utils.data.datapipes.iter.filelister
+.. py:module:: torch.utils.data.datapipes.iter.fileopener
+.. py:module:: torch.utils.data.datapipes.iter.grouping
+.. py:module:: torch.utils.data.datapipes.iter.routeddecoder
+.. py:module:: torch.utils.data.datapipes.iter.selecting
+.. py:module:: torch.utils.data.datapipes.iter.sharding
+.. py:module:: torch.utils.data.datapipes.iter.streamreader
+.. py:module:: torch.utils.data.datapipes.iter.utils
+.. py:module:: torch.utils.data.datapipes.map.callable
+.. py:module:: torch.utils.data.datapipes.map.combinatorics
+.. py:module:: torch.utils.data.datapipes.map.combining
+.. py:module:: torch.utils.data.datapipes.map.grouping
+.. py:module:: torch.utils.data.datapipes.map.utils
+.. py:module:: torch.utils.data.datapipes.utils.common
+.. py:module:: torch.utils.data.datapipes.utils.decoder
+.. py:module:: torch.utils.data.datapipes.utils.snapshot
+.. py:module:: torch.utils.data.dataset
+.. py:module:: torch.utils.data.distributed
+.. py:module:: torch.utils.data.graph
+.. py:module:: torch.utils.data.graph_settings
+.. py:module:: torch.utils.data.sampler
+.. py:module:: torch.utils.dlpack
+.. py:module:: torch.utils.file_baton
+.. py:module:: torch.utils.flop_counter
+.. py:module:: torch.utils.hipify.constants
+.. py:module:: torch.utils.hipify.cuda_to_hip_mappings
+.. py:module:: torch.utils.hipify.hipify_python
+.. py:module:: torch.utils.hipify.version
+.. py:module:: torch.utils.hooks
+.. py:module:: torch.utils.jit.log_extract
+.. py:module:: torch.utils.mkldnn
+.. py:module:: torch.utils.mobile_optimizer
+.. py:module:: torch.utils.show_pickle
+.. py:module:: torch.utils.tensorboard.summary
+.. py:module:: torch.utils.tensorboard.writer
+.. py:module:: torch.utils.throughput_benchmark
+.. py:module:: torch.utils.weak
diff --git a/torch/utils/benchmark/examples/blas_compare.py b/torch/utils/benchmark/examples/blas_compare.py
deleted file mode 100644
index 805633a185e2..000000000000
--- a/torch/utils/benchmark/examples/blas_compare.py
+++ /dev/null
@@ -1,231 +0,0 @@
-import argparse
-import datetime
-import itertools as it
-import multiprocessing
-import multiprocessing.dummy
-import os
-import queue
-import pickle
-import shutil
-import subprocess
-import sys
-import tempfile
-import threading
-import time
-from typing import Tuple, Dict
-
-from . import blas_compare_setup
-
-
-MIN_RUN_TIME = 1
-NUM_REPLICATES = 20
-NUM_THREAD_SETTINGS = (1, 2, 4)
-RESULT_FILE = os.path.join(blas_compare_setup.WORKING_ROOT, "blas_results.pkl")
-SCRATCH_DIR = os.path.join(blas_compare_setup.WORKING_ROOT, "scratch")
-
-
-BLAS_CONFIGS = (
-    ("MKL (2020.3)", blas_compare_setup.MKL_2020_3, None),
-    ("MKL (2020.0)", blas_compare_setup.MKL_2020_0, None),
-    ("OpenBLAS", blas_compare_setup.OPEN_BLAS, None)
-)
-
-
-_RESULT_FILE_LOCK = threading.Lock()
-_WORKER_POOL: queue.Queue[Tuple[str, str, int]] = queue.Queue()
-def clear_worker_pool():
-    while not _WORKER_POOL.empty():
-        _, result_file, _ = _WORKER_POOL.get_nowait()
-        os.remove(result_file)
-
-    if os.path.exists(SCRATCH_DIR):
-        shutil.rmtree(SCRATCH_DIR)
-
-
-def fill_core_pool(n: int):
-    clear_worker_pool()
-    os.makedirs(SCRATCH_DIR)
-
-    # Reserve two cores so that bookkeeping does not interfere with runs.
-    cpu_count = multiprocessing.cpu_count() - 2
-
-    # Adjacent cores sometimes share cache, so we space out single core runs.
-    step = max(n, 2)
-    for i in range(0, cpu_count, step):
-        core_str = f"{i}" if n == 1 else f"{i},{i + n - 1}"
-        _, result_file = tempfile.mkstemp(suffix=".pkl", prefix=SCRATCH_DIR)
-        _WORKER_POOL.put((core_str, result_file, n))
-
-
-def _subprocess_main(seed=0, num_threads=1, sub_label="N/A", result_file=None, env=None):
-    import torch
-    from torch.utils.benchmark import Timer
-
-    conda_prefix = os.getenv("CONDA_PREFIX")
-    assert conda_prefix
-    if not torch.__file__.startswith(conda_prefix):
-        raise ValueError(
-            f"PyTorch mismatch: `import torch` resolved to `{torch.__file__}`, "
-            f"which is not in the correct conda env: {conda_prefix}"
-        )
-
-    torch.manual_seed(seed)
-    results = []
-    for n in [4, 8, 16, 32, 64, 128, 256, 512, 1024, 7, 96, 150, 225]:
-        dtypes = (("Single", torch.float32), ("Double", torch.float64))
-        shapes = (
-            # Square MatMul
-            ((n, n), (n, n), "(n x n) x (n x n)", "Matrix-Matrix Product"),
-
-            # Matrix-Vector product
-            ((n, n), (n, 1), "(n x n) x (n x 1)", "Matrix-Vector Product"),
-        )
-        for (dtype_name, dtype), (x_shape, y_shape, shape_str, blas_type) in it.product(dtypes, shapes):
-            t = Timer(
-                stmt="torch.mm(x, y)",
-                label=f"torch.mm {shape_str} {blas_type} ({dtype_name})",
-                sub_label=sub_label,
-                description=f"n = {n}",
-                env=os.path.split(env or "")[1] or None,
-                globals={
-                    "x": torch.rand(x_shape, dtype=dtype),
-                    "y": torch.rand(y_shape, dtype=dtype),
-                },
-                num_threads=num_threads,
-            ).blocked_autorange(min_run_time=MIN_RUN_TIME)
-            results.append(t)
-
-    if result_file is not None:
-        with open(result_file, "wb") as f:
-            pickle.dump(results, f)
-
-
-def run_subprocess(args):
-    seed, env, sub_label, extra_env_vars = args
-    core_str = None
-    try:
-        core_str, result_file, num_threads = _WORKER_POOL.get()
-        with open(result_file, "wb"):
-            pass
-
-        env_vars: Dict[str, str] = {
-            "PATH": os.getenv("PATH") or "",
-            "PYTHONPATH": os.getenv("PYTHONPATH") or "",
-
-            # NumPy
-            "OMP_NUM_THREADS": str(num_threads),
-            "MKL_NUM_THREADS": str(num_threads),
-            "NUMEXPR_NUM_THREADS": str(num_threads),
-        }
-        env_vars.update(extra_env_vars or {})
-
-        subprocess.run(
-            f"source activate {env} && "
-            f"taskset --cpu-list {core_str} "
-            f"python {os.path.abspath(__file__)} "
-            "--DETAIL-in-subprocess "
-            f"--DETAIL-seed {seed} "
-            f"--DETAIL-num-threads {num_threads} "
-            f"--DETAIL-sub-label '{sub_label}' "
-            f"--DETAIL-result-file {result_file} "
-            f"--DETAIL-env {env}",
-            env=env_vars,
-            stdout=subprocess.PIPE,
-            shell=True
-        )
-
-        with open(result_file, "rb") as f:
-            result_bytes = f.read()
-
-        with _RESULT_FILE_LOCK, \
-             open(RESULT_FILE, "ab") as f:
-            f.write(result_bytes)
-
-    except KeyboardInterrupt:
-        pass  # Handle ctrl-c gracefully.
-
-    finally:
-        if core_str is not None:
-            _WORKER_POOL.put((core_str, result_file, num_threads))
-
-
-def _compare_main():
-    results = []
-    with open(RESULT_FILE, "rb") as f:
-        while True:
-            try:
-                results.extend(pickle.load(f))
-            except EOFError:
-                break
-
-    from torch.utils.benchmark import Compare
-
-    comparison = Compare(results)
-    comparison.trim_significant_figures()
-    comparison.colorize()
-    comparison.print()
-
-
-def main():
-    with open(RESULT_FILE, "wb"):
-        pass
-
-    for num_threads in NUM_THREAD_SETTINGS:
-        fill_core_pool(num_threads)
-        workers = _WORKER_POOL.qsize()
-
-        trials = []
-        for seed in range(NUM_REPLICATES):
-            for sub_label, env, extra_env_vars in BLAS_CONFIGS:
-                env_path = os.path.join(blas_compare_setup.WORKING_ROOT, env)
-                trials.append((seed, env_path, sub_label, extra_env_vars))
-
-        n = len(trials)
-        with multiprocessing.dummy.Pool(workers) as pool:
-            start_time = time.time()
-            for i, r in enumerate(pool.imap(run_subprocess, trials)):
-                n_trials_done = i + 1
-                time_per_result = (time.time() - start_time) / n_trials_done
-                eta = int((n - n_trials_done) * time_per_result)
-                print(f"\r{i + 1} / {n}    ETA:{datetime.timedelta(seconds=eta)}".ljust(80), end="")
-                sys.stdout.flush()
-        print(f"\r{n} / {n}  Total time: {datetime.timedelta(seconds=int(time.time() - start_time))}")
-    print()
-
-    # Any env will do, it just needs to have torch for benchmark utils.
-    env_path = os.path.join(blas_compare_setup.WORKING_ROOT, BLAS_CONFIGS[0][1])
-    subprocess.run(
-        f"source activate {env_path} && "
-        f"python {os.path.abspath(__file__)} "
-        "--DETAIL-in-compare",
-        shell=True
-    )
-
-
-if __name__ == "__main__":
-    # These flags are for subprocess control, not controlling the main loop.
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--DETAIL-in-subprocess", "--DETAIL_in_subprocess", action="store_true")
-    parser.add_argument("--DETAIL-in-compare", "--DETAIL_in_compare", action="store_true")
-    parser.add_argument("--DETAIL-seed", "--DETAIL_seed", type=int, default=None)
-    parser.add_argument("--DETAIL-num-threads", "--DETAIL_num_threads", type=int, default=None)
-    parser.add_argument("--DETAIL-sub-label", "--DETAIL_sub_label", type=str, default="N/A")
-    parser.add_argument("--DETAIL-result-file", "--DETAIL_result_file", type=str, default=None)
-    parser.add_argument("--DETAIL-env", "--DETAIL_env", type=str, default=None)
-    args = parser.parse_args()
-
-    if args.DETAIL_in_subprocess:
-        try:
-            _subprocess_main(
-                args.DETAIL_seed,
-                args.DETAIL_num_threads,
-                args.DETAIL_sub_label,
-                args.DETAIL_result_file,
-                args.DETAIL_env,
-            )
-        except KeyboardInterrupt:
-            pass  # Handle ctrl-c gracefully.
-    elif args.DETAIL_in_compare:
-        _compare_main()
-    else:
-        main()
diff --git a/torch/utils/benchmark/examples/end_to_end.py b/torch/utils/benchmark/examples/end_to_end.py
deleted file mode 100644
index a6d05a91c942..000000000000
--- a/torch/utils/benchmark/examples/end_to_end.py
+++ /dev/null
@@ -1,426 +0,0 @@
-"""End-to-end example to test a PR for regressions:
-
-$ python -m examples.end_to_end --pr 39850
-$ python -m examples.end_to_end --pr 39967
-$ python -m examples.end_to_end --pr 39744
-
-NOTE:
-  This example assumes that you have and environment prefixed with
-  `ref_`, and another prefixed with `pr_` for the PR
-  in question. (e.g. `ref_39850` and `pr_39850`).
-
-  A helper script (examples/prepare_e2e.sh) is provided to build
-  the required environments with the correct configuration.
-"""
-
-import argparse
-import itertools as it
-import multiprocessing
-import multiprocessing.dummy
-import os
-import pickle
-import queue
-import subprocess
-import tempfile
-import textwrap
-
-import numpy as np
-import torch
-from torch.utils.benchmark.op_fuzzers import unary
-from torch.utils.benchmark import Timer, Measurement
-from typing import Dict, Tuple, List
-
-
-_MAIN, _SUBPROCESS = "main", "subprocess"
-
-_PR_ENV_TEMPLATE = "pr_{pr}"
-_REF_ENV_TEMPLATE = "ref_{pr}"
-
-_PR_LIST = (
-    # Optimize topk performance for tensor with a large dimension size
-    "39850",
-
-    # Migrate `var` & `std` to ATen
-    "39967",
-
-    # Introducing (Const)StridedRandomAccessor + CompositeRandomAccessor + migrate `sort` to ATen (CPU)
-    "39744",
-)
-
-_CPU, _GPU = "cpu", "gpu"
-_MIN_RUN_SEC = 1
-_REPLICATES = {
-    _CPU: 5,  # CPU has a higher variance.
-    _GPU: 1,
-}
-_RUNS_PER_LOOP = 3
-_NUM_LOOPS = {
-    _CPU: 32,
-    _GPU: 64,
-}
-
-_DEVICES_TO_TEST = {
-    "39850": {_CPU: False, _GPU: True},
-    "39967": {_CPU: True, _GPU: True},
-    "39744": {_CPU: True, _GPU: True},
-}
-
-_AVAILABLE_GPUS = queue.Queue[int]()
-_DTYPES_TO_TEST = {
-    "39850": ("int8", "float32", "float64"),
-    "39967": ("float32", "float64"),
-    "39744": ("int8", "float32", "float64"),
-}
-_DTYPE_STR_TO_DTYPE = {
-    "float64": torch.float64,
-    "float32": torch.float32,
-    "int8": torch.int8,
-}
-
-
-def parse_args():
-    parser = argparse.ArgumentParser()
-    parser.add_argument("--pr", type=str, default=_PR_LIST[0], choices=_PR_LIST)
-    parser.add_argument("--num-gpus", "--num_gpus", type=int, default=None)
-    parser.add_argument("--test-variance", "--test_variance", action="store_true")
-
-    # (Implementation details)
-    parser.add_argument("--DETAIL-context", "--DETAIL_context", type=str, choices=(_MAIN, _SUBPROCESS), default=_MAIN)
-    parser.add_argument("--DETAIL-device", "--DETAIL_device", type=str, choices=(_CPU, _GPU), default=None)
-    parser.add_argument("--DETAIL-env", "--DETAIL_env", type=str, default=None)
-    parser.add_argument("--DETAIL-result-file", "--DETAIL_result_file", type=str, default=None)
-    parser.add_argument("--DETAIL-seed", "--DETAIL_seed", type=int, default=None)
-
-    args = parser.parse_args()
-    if args.num_gpus is None:
-        args.num_gpus = torch.cuda.device_count()
-    return args
-
-
-_SUBPROCESS_CMD_TEMPLATE = (
-    "source activate {source_env} && python -m examples.end_to_end "
-    "--pr {pr} "
-    "--DETAIL-context subprocess "
-    "--DETAIL-device {device} "
-    "--DETAIL-env {env} "
-    "--DETAIL-result-file {result_file} "
-    "--DETAIL-seed {seed}"
-)
-
-
-def construct_stmt_and_label(pr, params):
-    if pr == "39850":
-        k0, k1, k2, dim = (params[i] for i in ["k0", "k1", "k2", "dim"])
-        state = np.random.RandomState(params["random_value"])
-        topk_dim = state.randint(low=0, high=dim)
-        dim_size = [k0, k1, k2][topk_dim]
-        k = max(int(np.floor(2 ** state.uniform(low=0, high=np.log2(dim_size)))), 1)
-
-        return f"torch.topk(x, dim={topk_dim}, k={k})", "topk"
-
-    if pr == "39967":
-        return "torch.std(x)", "std"
-
-    if pr == "39744":
-        state = np.random.RandomState(params["random_value"])
-        sort_dim = state.randint(low=0, high=params["dim"])
-        return f"torch.sort(x, dim={sort_dim})", "sort"
-
-    raise ValueError("Unknown PR")
-
-
-def subprocess_main(args):
-    seed = args.DETAIL_seed
-    cuda = (args.DETAIL_device == _GPU)
-
-    with open(args.DETAIL_result_file, "ab") as f:
-        for dtype_str in _DTYPES_TO_TEST[args.pr]:
-            dtype = _DTYPE_STR_TO_DTYPE[dtype_str]
-            iterator = unary.UnaryOpFuzzer(
-                seed=seed, dtype=dtype, cuda=cuda).take(_RUNS_PER_LOOP)
-            for i, (tensors, tensor_parameters, params) in enumerate(iterator):
-                params["dtype_str"] = dtype_str
-                stmt, label = construct_stmt_and_label(args.pr, params)
-                timer = Timer(
-                    stmt=stmt,
-                    globals=tensors,
-                    label=label,
-                    description=f"[{i}, seed={seed}] ({dtype_str}), stmt = {stmt}",
-                    env=args.DETAIL_env,
-                )
-
-                measurement = timer.blocked_autorange(min_run_time=_MIN_RUN_SEC)
-                measurement.metadata = {
-                    "tensor_parameters": tensor_parameters,
-                    "params": params,
-                }
-                print(measurement)
-                pickle.dump(measurement, f)
-
-
-def _main(args):
-    pools, map_iters, finished_counts = {}, {}, {}
-    pr = args.pr
-    envs = (_REF_ENV_TEMPLATE.format(pr=pr), _PR_ENV_TEMPLATE.format(pr=pr))
-
-    # We initialize both pools at the start so that they run simultaneously
-    # if applicable
-    if _DEVICES_TO_TEST[args.pr][_GPU]:
-        finished_counts[_GPU] = 0
-        for i in range(args.num_gpus):
-            _AVAILABLE_GPUS.put(i)
-
-        pools[_GPU] = multiprocessing.dummy.Pool(args.num_gpus)
-        trials = [
-            (seed, envs, pr, True, finished_counts, args.test_variance)
-            for seed in range(_NUM_LOOPS[_GPU])] * _REPLICATES[_GPU]
-        map_iters[_GPU] = pools[_GPU].imap(map_fn, trials)
-
-    if _DEVICES_TO_TEST[args.pr][_CPU]:
-        finished_counts[_CPU] = 0
-        cpu_workers = int(multiprocessing.cpu_count() / 3)
-        pools[_CPU] = multiprocessing.dummy.Pool(cpu_workers)
-        trials = [
-            (seed, envs, pr, False, finished_counts, args.test_variance)
-            for seed in range(_NUM_LOOPS[_CPU])] * _REPLICATES[_CPU]
-        map_iters[_CPU] = pools[_CPU].imap(map_fn, trials)
-
-    results = []
-    for map_iter in map_iters.values():
-        for r in map_iter:
-            results.append(r)
-            progress = [
-                f"{k}: {v} / {_NUM_LOOPS[k] * _REPLICATES[k]}"
-                for k, v in finished_counts.items()]
-            print(f"\r{(' ' * 10).join(progress)}", end="")
-    print()
-
-    for pool in pools.values():
-        pool.close()
-
-    process_results(results, args.test_variance)
-
-
-# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
-# == Data processing and string formatting ====================================
-# /////////////////////////////////////////////////////////////////////////////
-def merge(measurements):
-    if not measurements:
-        return None
-
-    states = [m.__getstate__() for m in measurements]
-    for k in states[0].keys():
-        if k in ("number_per_run", "times", "metadata"):
-            continue
-
-        assert all(s[k] == states[0][k] for s in states)
-
-    numbers_per_run = {m.number_per_run for m in measurements}
-    n = numbers_per_run.pop() if len(numbers_per_run) == 1 else 1
-
-    merged_state = states[0]
-    times = [[t / m.number_per_run * n for t in m.times] for m in measurements]
-    merged_state["times"] = list(it.chain(*times))
-    merged_state["number_per_run"] = n
-    merged_state["metadata"] = states[0]["metadata"]
-    return Measurement(**merged_state)
-
-
-def process_results(results, test_variance):
-    paired_results: Dict[Tuple[str, str, int, bool, int], List] = {}
-    for (seed, use_gpu), result_batch in results:
-        for r in result_batch:
-            key = (r.label, r.description, r.num_threads, use_gpu, seed)
-            paired_results.setdefault(key, [[], []])
-            index = 0 if r.env.startswith("ref") else 1
-            paired_results[key][index].append(r)
-
-    paired_results = {
-        key: [merge(r_ref_list), merge(r_pr_list)]
-        for key, (r_ref_list, r_pr_list) in paired_results.items()
-    }
-
-    flagged_for_removal = set()
-    for key, (r_ref, r_pr) in paired_results.items():
-        if any(r is None or r.has_warnings for r in (r_ref, r_pr)):
-            flagged_for_removal.add(key)
-
-    paired_results = {
-        k: v for k, v in paired_results.items()
-        if k not in flagged_for_removal
-    }
-    print(f"{len(flagged_for_removal)} samples were culled, {len(paired_results)} remain")
-
-    gpu_results = [(k, v) for k, v in paired_results.items() if k[3]]
-    cpu_results = [(k, v) for k, v in paired_results.items() if not k[3]]
-
-    if cpu_results:
-        construct_table(cpu_results, "CPU", test_variance)
-
-    if gpu_results:
-        construct_table(gpu_results, "GPU", test_variance)
-
-
-def construct_table(results, device_str, test_variance):
-    device_str = f"== {device_str} {' (Variance Test)' if test_variance else ''}  ".ljust(40, "=")
-    print(f"{'=' * 40}\n{device_str}\n{'=' * 40}\n")
-    results = sorted((
-        (key, (r_ref, r_pr), r_pr.median / r_ref.median - 1)
-        for key, (r_ref, r_pr) in results
-    ), key=lambda i: i[2])
-
-    n = len(results)
-    n_regressed = len([i for i in results if i[2] > 0.05])
-    n_improved = len([i for i in results if i[2] < -0.05])
-    n_unchanged = n - n_improved - n_regressed
-    legends = ["Improved  (>5%):", "Regressed (>5%):", "Within 5%:"]
-    for legend, count in zip(legends, [n_improved, n_regressed, n_unchanged]):
-        print(f"{legend:<17} {count:>6}  ({count / len(results) * 100:>3.0f}%)")
-
-    keys_to_print = (
-        {i[0] for i in results[20:30]} |
-        {i[0] for i in results[int(n // 2 - 5):int(n // 2 + 5)]} |
-        {i[0] for i in results[-30:-20]}
-    )
-    ellipsis_after = {results[29][0], results[int(n // 2 + 4)][0]}
-
-    column_labels = (
-        f"Relative Δ     Absolute Δ      |      numel{'':>8}dtype{'':>14}"
-        f"shape{'':>10}steps{'':>10}layout{'':>7}task specific\n{'=' * 126}"
-    )
-
-    _, result_log_file = tempfile.mkstemp(suffix=".log")
-    with open(result_log_file, "w") as f:
-        f.write(f"{device_str}\n\n{column_labels}\n")
-        print(f"\n{column_labels}\n[First twenty omitted (these tend to be noisy) ]")
-        for key, (r_ref, r_pr), rel_diff in results:
-            row = row_str(rel_diff, r_pr.median - r_ref.median, r_ref)
-            f.write(f"{row}\n")
-            if key in keys_to_print:
-                print(row)
-            if key in ellipsis_after:
-                print("...")
-        print("[Last twenty omitted (these tend to be noisy) ]")
-
-    print(textwrap.dedent("""
-        steps:
-            Indicates that `x` is sliced from a larger Tensor. For instance, if
-            shape is [12, 4] and steps are [2, 1], then a larger Tensor of size
-            [24, 4] was created, and then x = base_tensor[::2, ::1]. Omitted if
-            all elements are ones.
-
-        layout:
-            Indicates that `x` is not contiguous due to permutation. Invoking
-            `x.permute(layout)` (e.g. x.permute((2, 0, 1)) if layout = [2, 0, 1])
-            would produce a Tensor with physical memory layout matching logical
-            memory layout. (Though still not contiguous if `steps` contains
-            non-one elements.)
-        """))
-
-    print(f"\nComplete results in: {result_log_file}")
-
-
-def row_str(rel_diff, diff_seconds, measurement):
-    params = measurement.metadata["params"]
-    tensor_parameters = measurement.metadata["tensor_parameters"]
-
-    dim = params["dim"]
-    x_numel = tensor_parameters["x"]["numel"]
-    steps = [params[f"x_step_{i}"] for i in range(dim)]
-    order = tensor_parameters['x']["order"]
-    order = str("" if all(i == j for i, j in zip(order, range(dim))) else order)
-
-    task_specific = ""
-    if measurement.stmt.startswith("torch.topk"):
-        dim_str, k_str = measurement.stmt[:-1].replace("torch.topk(x, ", "").split(", ")
-        task_specific = f"{dim_str}, {k_str:<8}"
-    elif measurement.stmt.startswith("torch.std"):
-        pass
-    elif measurement.stmt.startswith("torch.sort"):
-        task_specific = measurement.stmt[:-1].replace("torch.sort(x, ", "")
-
-    return (
-        f"{rel_diff * 100:>5.0f}%     {abs(diff_seconds) * 1e6:>11.1f} us{'':>6}|"
-        f"{x_numel:>12}   {params['dtype_str']:>10}   "
-        f"{str([params[f'k{i}'] for i in range(dim)]):>17}  "
-        f"{str(steps) if not all(i == 1 for i in steps) else '':>12}  {order:>12}"
-        f"{'':>8}{task_specific}"
-    )
-
-
-# \\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\
-# == Subprocess and environment management ====================================
-# /////////////////////////////////////////////////////////////////////////////
-def read_results(result_file: str):
-    output = []
-    with open(result_file, "rb") as f:
-        while True:
-            try:
-                output.append(pickle.load(f))
-            except EOFError:
-                break
-    return output
-
-
-def run(cmd, cuda_visible_devices=""):
-    return subprocess.run(
-        cmd,
-        env={
-            "CUDA_VISIBLE_DEVICES": str(cuda_visible_devices),
-            "PATH": os.getenv("PATH", ""),
-        },
-        stdout=subprocess.PIPE,
-        shell=True
-    )
-
-
-def test_source(envs):
-    """Ensure that subprocess"""
-    for env in envs:
-        result = run(f"source activate {env}")
-        if result.returncode != 0:
-            raise ValueError(f"Failed to source environment `{env}`")
-
-
-def map_fn(args):
-    seed, envs, pr, use_gpu, finished_counts, test_variance = args
-    gpu = _AVAILABLE_GPUS.get() if use_gpu else None
-    try:
-        _, result_file = tempfile.mkstemp(suffix=".pkl")
-        for env in envs:
-            cmd = _SUBPROCESS_CMD_TEMPLATE.format(
-                source_env=envs[0] if test_variance else env,
-                env=env, pr=pr, device=_GPU if use_gpu else _CPU,
-                result_file=result_file, seed=seed,
-            )
-            run(cmd=cmd, cuda_visible_devices=gpu if use_gpu else "")
-        finished_counts[_GPU if use_gpu else _CPU] += 1
-        return (seed, use_gpu), read_results(result_file)
-    except KeyboardInterrupt:
-        pass  # Handle ctrl-c gracefully.
-    finally:
-        if gpu is not None:
-            _AVAILABLE_GPUS.put(gpu)
-        if os.path.exists(result_file):
-            os.remove(result_file)
-
-
-def main(args):
-    test_source([
-        _REF_ENV_TEMPLATE.format(pr=args.pr),
-        _PR_ENV_TEMPLATE.format(pr=args.pr),
-    ])
-    _main(args)
-
-
-if __name__ == "__main__":
-    args = parse_args()
-
-    if args.DETAIL_context == "main":
-        main(args)
-
-    if args.DETAIL_context == "subprocess":
-        try:
-            subprocess_main(args)
-        except KeyboardInterrupt:
-            pass  # Handle ctrl-c gracefully.