[3/3] Update .pyi Python stub files and enable 'UFMT' linter (#95268)

Changes: - #95200 1. Recognize `.py.in` and `.pyi.in` files as Python in VS Code for a better development experience. 2. Fix deep setting merge in `tools/vscode_settings.py`. - #95267 3. Use `Namedtuple` rather than `namedtuple + __annotations__` for `torch.nn.utils.rnn.PackedSequence_`: `namedtuple + __annotations__`: ```python PackedSequence_ = namedtuple('PackedSequence_', ['data', 'batch_sizes', 'sorted_indices', 'unsorted_indices']) # type annotation for PackedSequence_ to make it compatible with TorchScript PackedSequence_.__annotations__ = {'data': torch.Tensor, 'batch_sizes': torch.Tensor, 'sorted_indices': Optional[torch.Tensor], 'unsorted_indices': Optional[torch.Tensor]} ``` `Namedtuple`: Python 3.6+ ```python class PackedSequence_(NamedTuple): data: torch.Tensor batch_sizes: torch.Tensor sorted_indices: Optional[torch.Tensor] unsorted_indices: Optional[torch.Tensor] ``` - => this PR: #95268 4. Sort import statements and remove unnecessary imports in `.pyi`, `.pyi.in` files. 5. Format `.pyi`, `.pyi.in` files and remove unnecessary ellipsis `...` in type stubs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/95268 Approved by: https://github.com/huydhn
2025-10-20 12:54:11 +08:00 · 2023-03-01 23:50:52 +00:00
parent b3d8fae042
commit 1fd119948e
49 changed files with 1819 additions and 946 deletions
--- a/.lintrunner.toml
+++ b/.lintrunner.toml
@ -841,6 +841,7 @@ include_patterns = [
    'torch/_refs/**/*.py',
    'torch/_subclasses/**/*.py',
    'torch/_*.py',
+    'torch/**/*.pyi',
    'torch/testing/_internal/opinfo/**/*.py',
    'torchgen/**/*.py',
    'torch/_functorch/make_functional.py',
--- a/.vscode/extensions.json
+++ b/.vscode/extensions.json
@ -1,5 +1,6 @@
 {
  "recommendations": [
-    "ms-python.python"
+    "ms-python.python",
+    "omnilib.ufmt"
  ]
 }
--- a/.vscode/settings_recommended.json
+++ b/.vscode/settings_recommended.json
@ -4,7 +4,8 @@
    },
    "files.associations": {
        "*.py.in": "python",
-        "*.pyi.in": "python"
+        "*.pyi.in": "python",
+        "editor.defaultFormatter": "omnilib.ufmt"
    },
    "files.eol": "\n",
    "files.insertFinalNewline": true,
--- a/torch/_C/_VariableFunctions.pyi.in
+++ b/torch/_C/_VariableFunctions.pyi.in
@ -1,12 +1,37 @@
 # ${generated_comment}

-from torch import Tensor, Generator, strided, memory_format, contiguous_format, strided, inf
-from typing import List, Tuple, Optional, Union, Any, ContextManager, Callable, overload, Iterator, NamedTuple, Sequence, Literal, TypeVar
-
-from torch.types import _int, _float, _bool, Number, _dtype, _device, _qscheme, _size, _layout, SymInt, Device
-import torch
-
 import builtins
+from typing import (
+    Any,
+    Callable,
+    ContextManager,
+    Iterator,
+    List,
+    Literal,
+    NamedTuple,
+    Optional,
+    overload,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+)
+
+import torch
+from torch import contiguous_format, Generator, inf, memory_format, strided, Tensor
+from torch.types import (
+    _bool,
+    _device,
+    _dtype,
+    _float,
+    _int,
+    _layout,
+    _qscheme,
+    _size,
+    Device,
+    Number,
+    SymInt,
+)

 ${function_hints}

--- a/torch/_C/init.pyi.in
+++ b/torch/_C/init.pyi.in
--- a/torch/_C/_autograd.pyi
+++ b/torch/_C/_autograd.pyi
@ -1,8 +1,14 @@
-from typing import List, Set, Callable, Any, Union, Optional
 from enum import Enum
+from typing import Any, Callable, List, Optional, Set

 import torch
-from ._profiler import _ProfilerEvent, ActiveProfilerType, ProfilerActivity, ProfilerConfig
+
+from ._profiler import (
+    _ProfilerEvent,
+    ActiveProfilerType,
+    ProfilerActivity,
+    ProfilerConfig,
+)

 # Defined in tools/autograd/init.cpp

@ -22,7 +28,6 @@ class DeviceType(Enum):
    Meta = ...
    Vulkan = ...
    Metal = ...
-    ...

 class ProfilerEvent:
    def cpu_elapsed_us(self, other: ProfilerEvent) -> float: ...
@ -41,7 +46,6 @@ class ProfilerEvent:
    def thread_id(self) -> int: ...
    def flops(self) -> float: ...
    def is_async(self) -> bool: ...
-    ...

 class _KinetoEvent:
    def name(self) -> str: ...
@ -50,7 +54,6 @@ class _KinetoEvent:
    def duration_us(self) -> int: ...
    def is_async(self) -> bool: ...
    def linked_correlation_id(self) -> int: ...
-    ...

 class _ProfilerResult:
    def events(self) -> List[_KinetoEvent]: ...
@ -58,11 +61,16 @@ class _ProfilerResult:
    def save(self, path: str) -> None: ...
    def experimental_event_tree(self) -> List[_ProfilerEvent]: ...

-class SavedTensor:
-    ...
+class SavedTensor: ...

-def _enable_profiler(config: ProfilerConfig, activities: Set[ProfilerActivity]) -> None: ...
-def _prepare_profiler(config: ProfilerConfig, activities: Set[ProfilerActivity]) -> None: ...
+def _enable_profiler(
+    config: ProfilerConfig,
+    activities: Set[ProfilerActivity],
+) -> None: ...
+def _prepare_profiler(
+    config: ProfilerConfig,
+    activities: Set[ProfilerActivity],
+) -> None: ...
 def _disable_profiler() -> _ProfilerResult: ...
 def _profiler_enabled() -> bool: ...
 def _add_metadata_json(key: str, value: str) -> None: ...
@ -73,15 +81,15 @@ def _record_function_with_args_exit(handle: torch.Tensor) -> None: ...
 def _supported_activities() -> Set[ProfilerActivity]: ...
 def _enable_record_function(enable: bool) -> None: ...
 def _set_empty_test_observer(is_global: bool, sampling_prob: float) -> None: ...
-def _push_saved_tensors_default_hooks(pack_hook: Callable, unpack_hook: Callable) -> None: ...
+def _push_saved_tensors_default_hooks(
+    pack_hook: Callable,
+    unpack_hook: Callable,
+) -> None: ...
 def _pop_saved_tensors_default_hooks() -> None: ...
-
 def _unsafe_set_version_counter(t: torch.Tensor, prev_version: int) -> None: ...
-
 def _enable_profiler_legacy(config: ProfilerConfig) -> None: ...
 def _disable_profiler_legacy() -> List[List[ProfilerEvent]]: ...
 def _profiler_type() -> ActiveProfilerType: ...
-
 def _saved_tensors_hooks_enable() -> None: ...
 def _saved_tensors_hooks_disable(message: str) -> None: ...
 def _saved_tensors_hooks_get_disabled_error_message() -> Optional[str]: ...
--- a/torch/_C/_cudnn.pyi
+++ b/torch/_C/_cudnn.pyi
@ -1,6 +1,6 @@
 from enum import Enum

-from torch.types import Tuple, Number, _bool
+from torch.types import _bool, Tuple

 # Defined in torch/csrc/cuda/shared/cudnn.cpp
 is_cuda: _bool
--- a/torch/_C/_distributed_autograd.pyi
+++ b/torch/_C/_distributed_autograd.pyi
@ -1,5 +1,6 @@
+from typing import Any, Dict, List, Set
+
 import torch
-from typing import Dict, List, Set, Any

 # This module is defined in torch/csrc/distributed/autograd/init.cpp

@ -20,6 +21,6 @@ def _get_debug_info() -> Dict[str, str]: ...
 def backward(
    context_id: int,
    roots: List[torch.Tensor],
-    retain_graph = False
+    retain_graph=False,
 ) -> None: ...
 def get_gradients(context_id: int) -> Dict[torch.Tensor, torch.Tensor]: ...
--- a/torch/_C/_distributed_c10d.pyi
+++ b/torch/_C/_distributed_c10d.pyi
@ -17,7 +17,8 @@ class BuiltinCommHookType(Enum):

 def _register_comm_hook(reducer: Reducer, state: Any, comm_hook: Any): ...
 def _register_builtin_comm_hook(
-    reducer: Reducer, comm_hook_type: BuiltinCommHookType
+    reducer: Reducer,
+    comm_hook_type: BuiltinCommHookType,
 ): ...

 class GradBucket:
@ -50,7 +51,9 @@ class Reducer:
    def _get_zeros_like_grad_buckets(self) -> List[GradBucket]: ...
    def _push_all_rebuilt_params(self) -> None: ...
    def _set_forward_pass_work_handle(
-        self, work: Work, use_static_world_size: bool
+        self,
+        work: Work,
+        use_static_world_size: bool,
    ): ...
    def _get_local_used_map(self) -> Tensor: ...
    def _set_ddp_runtime_logging_sample_rate(self, sample_rate: int) -> None: ...
@ -90,7 +93,6 @@ class DebugLevel(Enum):
    DETAIL = ...

 class ReduceOp:
-
    def __init__(self, op: "RedOpType"): ...

    SUM = ...
@ -149,7 +151,10 @@ class Store:
    def get(self, key: str) -> bytes: ...
    def add(self, key: str, value: int) -> int: ...
    def compare_set(
-        self, key: str, expected_value: str, desired_value: str
+        self,
+        key: str,
+        expected_value: str,
+        desired_value: str,
    ) -> bytes: ...
    def delete_key(self, key: str) -> bool: ...
    def num_keys(self) -> int: ...
@ -195,7 +200,6 @@ class Work:
    def _source_rank(self) -> int: ...
    def result(self) -> List[Tensor]: ...
    def synchronize(self): ...
-    ...

 class ProcessGroup:
    class Options: ...
@ -385,11 +389,9 @@ class ProcessGroupGloo(ProcessGroup):
        timeout: timedelta,
    ): ...
    @staticmethod
-    def create_device(hostname=str(), interface=str()) -> Device: ...
-    ...
+    def create_device(hostname="", interface="") -> Device: ...
    @staticmethod
    def create_default_device() -> Device: ...
-    ...

 class _ProcessGroupWrapper(ProcessGroup):
    def __init__(self, pg: ProcessGroup, gloo_pg: ProcessGroupGloo): ...
@ -409,7 +411,6 @@ class ProcessGroupNCCL(ProcessGroup):
    def _group_start() -> None: ...
    @staticmethod
    def _group_end() -> None: ...
-    ...

 class ProcessGroupUCC(ProcessGroup):
    def __init__(
--- a/torch/_C/_distributed_rpc.pyi
+++ b/torch/_C/_distributed_rpc.pyi
@ -1,8 +1,8 @@
-from typing import Any, Dict, List, Optional, Tuple, Union, overload
 from datetime import timedelta
-import enum
+from typing import Any, Dict, List, Optional, overload, Tuple
+
 import torch
-from torch.types import Device
+
 from . import Future
 from ._autograd import ProfilerEvent
 from ._distributed_c10d import ProcessGroup, Store
@ -59,13 +59,12 @@ class PyRRef:
    def remote(self, timeout: float = _UNSET_RPC_TIMEOUT) -> Any: ...
    def _serialize(self) -> Tuple: ...
    @staticmethod
-    def _deserialize(tp: Tuple) -> 'PyRRef': ...
+    def _deserialize(tp: Tuple) -> "PyRRef": ...
    def _get_type(self) -> Any: ...
    def _get_future(self) -> Future: ...
    def _get_profiling_future(self) -> Future: ...
    def _set_profiling_future(self, profilingFuture: Future): ...
    def __repr__(self) -> str: ...
-    ...

 class _TensorPipeRpcBackendOptionsBase(RpcBackendOptions):
    num_worker_threads: int
@ -79,8 +78,13 @@ class _TensorPipeRpcBackendOptionsBase(RpcBackendOptions):
        rpc_timeout: float = _DEFAULT_RPC_TIMEOUT_SEC,
        init_method: str = _DEFAULT_INIT_METHOD,
        device_maps: Dict[str, Dict[torch.device, torch.device]] = {},
-        devices: List[torch.device] = list()): ...
-    def _set_device_map(self, to: str, device_map: Dict[torch.device, torch.device]): ...
+        devices: List[torch.device] = [],
+    ): ...
+    def _set_device_map(
+        self,
+        to: str,
+        device_map: Dict[torch.device, torch.device],
+    ): ...

 class TensorPipeAgent(RpcAgent):
    def __init__(
@ -108,7 +112,8 @@ class TensorPipeAgent(RpcAgent):
        worker_info: WorkerInfo,
        my_devices: List[torch.device],
        reverse_device_map: Dict[str, Dict[torch.device, torch.device]],
-        is_join: bool): ...
+        is_join: bool,
+    ): ...
    def _get_backend_options(self) -> _TensorPipeRpcBackendOptionsBase: ...
    @property
    def is_static_group(self) -> bool: ...
@ -116,7 +121,7 @@ class TensorPipeAgent(RpcAgent):
    def store(self) -> Store: ...

 def _is_current_rpc_agent_set() -> bool: ...
-def _get_current_rpc_agent()-> RpcAgent: ...
+def _get_current_rpc_agent() -> RpcAgent: ...
 def _set_and_start_rpc_agent(agent: RpcAgent): ...
 def _reset_current_rpc_agent(): ...
 def _delete_all_user_and_unforked_owner_rrefs(timeout: timedelta = ...): ...
@ -128,15 +133,15 @@ def _invoke_rpc_builtin(
    opName: str,
    rpcTimeoutSeconds: float,
    *args: Any,
-    **kwargs: Any
-    ): ...
+    **kwargs: Any,
+): ...
 def _invoke_rpc_python_udf(
    dst: WorkerInfo,
    pickledPythonUDF: str,
    tensors: List[torch.Tensor],
    rpcTimeoutSeconds: float,
-    isAsyncExecution: bool
-    ): ...
+    isAsyncExecution: bool,
+): ...
 def _invoke_rpc_torchscript(
    dstWorkerName: str,
    qualifiedNameStr: str,
@ -144,29 +149,29 @@ def _invoke_rpc_torchscript(
    kwargsDict: Dict,
    rpcTimeoutSeconds: float,
    isAsyncExecution: bool,
-    ): ...
+): ...
 def _invoke_remote_builtin(
    dst: WorkerInfo,
    opName: str,
    rpcTimeoutSeconds: float,
    *args: Any,
-    **kwargs: Any
-    ): ...
+    **kwargs: Any,
+): ...
 def _invoke_remote_python_udf(
    dst: WorkerInfo,
    pickledPythonUDF: str,
    tensors: List[torch.Tensor],
    rpcTimeoutSeconds: float,
    isAsyncExecution: bool,
-    ): ...
+): ...
 def _invoke_remote_torchscript(
    dstWorkerName: WorkerInfo,
    qualifiedNameStr: str,
    rpcTimeoutSeconds: float,
    isAsyncExecution: bool,
    *args: Any,
-    **kwargs: Any
-    ): ...
+    **kwargs: Any,
+): ...
 def get_rpc_timeout() -> float: ...
 def enable_gil_profiling(flag: bool): ...
 def _set_rpc_timeout(rpcTimeoutSeconds: float): ...
--- a/torch/_C/_distributed_rpc_testing.pyi
+++ b/torch/_C/_distributed_rpc_testing.pyi
@ -1,12 +1,13 @@
+from typing import Dict, List
+
 import torch
+
 from ._distributed_c10d import ProcessGroup, Store
 from ._distributed_rpc import (
    _TensorPipeRpcBackendOptionsBase,
    TensorPipeAgent,
    WorkerInfo,
 )
-from typing import List, Dict, overload
-from datetime import timedelta

 # This module is defined in torch/csrc/distributed/rpc/testing/init.cpp

--- a/torch/_C/_dynamo/eval_frame.pyi
+++ b/torch/_C/_dynamo/eval_frame.pyi
@ -1,5 +1,5 @@
 import types
-from typing import Union
+
 from torch._dynamo.types import DynamoCallback, DynamoGuardHook

 def set_eval_frame(callback: DynamoCallback) -> DynamoCallback: ...
--- a/torch/_C/_functions.pyi
+++ b/torch/_C/_functions.pyi
@ -1,12 +1,11 @@
-from torch import Tensor
 from typing import AnyStr, List

+from torch import Tensor
+
 class UndefinedGrad:
    def __init__(self) -> None: ...
    def __call__(self, *inputs: Tensor) -> List[Tensor]: ...
-    ...

 class DelayedError:
    def __init__(self, msg: AnyStr, num_inputs: int) -> None: ...
    def __call__(self, inputs: List[Tensor]) -> List[Tensor]: ...
-    ...
--- a/torch/_C/_functorch.pyi
+++ b/torch/_C/_functorch.pyi
@ -1,7 +1,8 @@
-from torch import Tensor
 from enum import Enum
 from typing import Optional, Tuple

+from torch import Tensor
+
 # Defined in torch/csrc/functorch/init.cpp

 def _set_dynamic_layer_keys_included(included: bool) -> None: ...
@ -18,7 +19,6 @@ def _wrap_for_grad(tensor: Tensor, level: int) -> Tensor: ...
 def _unwrap_batched(tensor: Tensor, level: int) -> Tuple[Tensor, Optional[int]]: ...
 def current_level() -> int: ...
 def _add_batch_dim(tensor: Tensor, bdim: int, level: int) -> Tensor: ...
-
 def set_single_level_autograd_function_allowed(allowed: bool) -> None: ...
 def get_single_level_autograd_function_allowed() -> bool: ...

--- a/torch/_C/_lazy.pyi
+++ b/torch/_C/_lazy.pyi
@ -1,4 +1,5 @@
 from typing import List
+
 from torch import Tensor

 # defined in torch/csrc/lazy/python/init.cpp
--- a/torch/_C/_lazy_ts_backend.pyi
+++ b/torch/_C/_lazy_ts_backend.pyi
@ -1,8 +1,11 @@
-#defined in torch/csrc/lazy/python/init.cpp
+# defined in torch/csrc/lazy/python/init.cpp
+
+from typing import Any, List, Tuple

-from typing import List, Tuple, Any
 from torch import Tensor

 def _init(): ...
-def _get_tensors_ts_device_data_node(tensors: List[Tensor]) -> Tuple[List[int], List[Any]]: ...
+def _get_tensors_ts_device_data_node(
+    tensors: List[Tensor],
+) -> Tuple[List[int], List[Any]]: ...
 def _run_cached_graph(hash_str: str, graph_inputs: List[Any]) -> List[Tensor]: ...
--- a/torch/_C/_monitor.pyi
+++ b/torch/_C/_monitor.pyi
@ -1,8 +1,8 @@
 # Defined in torch/csrc/monitor/python_init.cpp

-from typing import List, Dict, Callable, Union
-from enum import Enum
 import datetime
+from enum import Enum
+from typing import Callable, Dict, List, Union

 class Aggregation(Enum):
    VALUE = ...
@ -16,7 +16,10 @@ class Stat:
    name: str
    count: int
    def __init__(
-        self, name: str, aggregations: List[Aggregation], window_size: int,
+        self,
+        name: str,
+        aggregations: List[Aggregation],
+        window_size: int,
        max_samples: int = -1,
    ) -> None: ...
    def add(self, v: float) -> None: ...
--- a/torch/_C/_nn.pyi.in
+++ b/torch/_C/_nn.pyi.in
@ -1,6 +1,7 @@
-from torch import Tensor, memory_format
-from typing import Callable, Optional, List, overload, Tuple
-from torch.types import _bool, _dtype, _device
+from typing import Callable, List, Optional, overload, Tuple
+
+from torch import memory_format, Tensor
+from torch.types import _bool, _device, _dtype

 # Defined in tools/autograd/templates/python_nn_functions.cpp

@ -10,27 +11,56 @@ ${dispatched_hints}
 def mkldnn_linear(input: Tensor, weight: Tensor, bias: Optional[Tensor]) -> Tensor: ...

 # Defined at aten/src/ATen/native/mkldnn/MKLDNNConversions.cpp
-def mkldnn_reorder_conv2d_weight(self: Tensor, padding: List, stride: List, dilatation: List, groups: int) -> Tensor: ...
-def mkldnn_reorder_conv3d_weight(self: Tensor, padding: List, stride: List, dilatation: List, groups: int) -> Tensor: ...
+def mkldnn_reorder_conv2d_weight(
+    self: Tensor,
+    padding: List,
+    stride: List,
+    dilatation: List,
+    groups: int,
+) -> Tensor: ...
+def mkldnn_reorder_conv3d_weight(
+    self: Tensor,
+    padding: List,
+    stride: List,
+    dilatation: List,
+    groups: int,
+) -> Tensor: ...

 # Defined in aten/src/ATen/native/mkldnn/Prelu.cpp
 def mkldnn_prelu(input: Tensor, weight: Tensor) -> Tensor: ...

 # Defined at tools/autograd/templates/python_nn_functions.cpp
@overload
-def _parse_to(device: _device, dtype: _dtype, non_blocking: _bool, copy: _bool, *,
-              memory_format: memory_format) -> Tuple[_device, _dtype, _bool, memory_format]: ...
+def _parse_to(
+    device: _device,
+    dtype: _dtype,
+    non_blocking: _bool,
+    copy: _bool,
+    *,
+    memory_format: memory_format,
+) -> Tuple[_device, _dtype, _bool, memory_format]: ...
@overload
-def _parse_to(dtype: _dtype, non_blocking: _bool, copy: _bool, *,
-              memory_format: memory_format) -> Tuple[_device, _dtype, _bool, memory_format]: ...
+def _parse_to(
+    dtype: _dtype,
+    non_blocking: _bool,
+    copy: _bool,
+    *,
+    memory_format: memory_format,
+) -> Tuple[_device, _dtype, _bool, memory_format]: ...
@overload
-def _parse_to(tensor: Tensor, non_blocking: _bool, copy: _bool, *,
-              memory_format: memory_format) -> Tuple[_device, _dtype, _bool, memory_format]: ...
+def _parse_to(
+    tensor: Tensor,
+    non_blocking: _bool,
+    copy: _bool,
+    *,
+    memory_format: memory_format,
+) -> Tuple[_device, _dtype, _bool, memory_format]: ...

 # Defined in aten/src/ATen/naitve/PadSequence.cpp
-def pad_sequence(sequences: List[Tensor], batch_first: bool = False,
-                 padding_value: float = ...) -> Tensor: ...
-
+def pad_sequence(
+    sequences: List[Tensor],
+    batch_first: bool = False,
+    padding_value: float = ...,
+) -> Tensor: ...
 def flatten_dense_tensors(tensors: List[Tensor]) -> Tensor: ...
-
 def unflatten_dense_tensors(flat: Tensor, tensors: List[Tensor]) -> List[Tensor]: ...
--- a/torch/_C/_profiler.pyi
+++ b/torch/_C/_profiler.pyi
@ -53,7 +53,6 @@ class _ExperimentalConfig:
        profiler_measure_per_kernel: bool = ...,
        verbose: bool = ...,
    ) -> None: ...
-    ...

 class ProfilerConfig:
    def __init__(
@ -66,7 +65,6 @@ class ProfilerConfig:
        with_modules: bool,
        experimental_config: _ExperimentalConfig,
    ) -> None: ...
-    ...

 class _ProfilerEvent:
    start_tid: int
--- a/torch/_C/return_types.pyi.in
+++ b/torch/_C/return_types.pyi.in
@ -1,8 +1,32 @@
 # ${generated_comment}

-from torch import Tensor, Generator, strided, memory_format, contiguous_format, strided, inf
-from typing import List, Tuple, Optional, Union, Any, ContextManager, Callable, overload, Iterator, NamedTuple, Sequence, Literal, TypeVar
+from typing import (
+    Any,
+    Callable,
+    ContextManager,
+    Iterator,
+    List,
+    Literal,
+    NamedTuple,
+    Optional,
+    overload,
+    Sequence,
+    Tuple,
+    TypeVar,
+    Union,
+)

-from torch.types import _int, _float, _bool, Number, _dtype, _device, _qscheme, _size, _layout
+from torch import contiguous_format, Generator, inf, memory_format, strided, Tensor
+from torch.types import (
+    _bool,
+    _device,
+    _dtype,
+    _float,
+    _int,
+    _layout,
+    _qscheme,
+    _size,
+    Number,
+)

 ${namedtuple_defs}
--- a/torch/fx/init.pyi
+++ b/torch/fx/init.pyi
@ -1,7 +1,11 @@
+from ._symbolic_trace import (
+    symbolic_trace as symbolic_trace,
+    Tracer as Tracer,
+    wrap as wrap,
+)
 from .graph import Graph as Graph
 from .graph_module import GraphModule as GraphModule
-from .node import Node as Node, map_arg as map_arg
-from .proxy import Proxy as Proxy
-from ._symbolic_trace import Tracer as Tracer, symbolic_trace as symbolic_trace, wrap as wrap
 from .interpreter import Interpreter as Interpreter, Transformer as Transformer
+from .node import map_arg as map_arg, Node as Node
+from .proxy import Proxy as Proxy
 from .subgraph_rewriter import replace_pattern as replace_pattern
--- a/torch/nn/functional.pyi.in
+++ b/torch/nn/functional.pyi.in
@ -1,7 +1,17 @@
+from typing import Any, Callable, Dict, List, Optional, Sequence, Tuple, Union
+
 from torch import Tensor
-from torch.types import _size, _dtype
-from typing import Any, Optional, Tuple, Dict, List, Callable, Sequence, Union
-from .common_types import _ratio_any_t, _size_any_t, _size_1_t, _size_2_t, _size_3_t, _size_2_opt_t, _size_3_opt_t
+from torch.types import _dtype, _size
+
+from .common_types import (
+    _ratio_any_t,
+    _size_1_t,
+    _size_2_opt_t,
+    _size_2_t,
+    _size_3_opt_t,
+    _size_3_t,
+    _size_any_t,
+)

 # 'TypedDict' is a new accepted type that represents a dictionary with a fixed set of allowed keys.
 # It is standards-track but not in `typing` yet. We leave this hear to be uncommented once the feature
@ -15,7 +25,6 @@ from .common_types import _ratio_any_t, _size_any_t, _size_1_t, _size_2_t, _size
 GRID_SAMPLE_INTERPOLATION_MODES = Dict[str, int]
 GRID_SAMPLE_PADDING_MODES = Dict[str, int]

-
 # These stubs were generated by running stubgen (`stubgen --parse-only functional.py`), followed by manual cleaning.
 #
 # The 'BroadcastingList{1,2,3}' types were replaced by `_size` or _output_ratio, as appropriate.
@ -26,374 +35,570 @@ GRID_SAMPLE_PADDING_MODES = Dict[str, int]
 # deleted from the stub and replaced by generated declarations. See `gen_pyi` for the implementation of the code
 # generation logic for those functions. In the future, it might be worth looking into using the mypy plugin system
 # to encode the type semantics of `_add_docstr`, should that system ever become widespread.
-def fractional_max_pool2d_with_indices(input: Tensor, kernel_size: _size, output_size: Optional[_size] = ...,
-                                       output_ratio: Optional[_ratio_any_t] = ..., return_indices: bool = ...,
-                                       _random_samples: Optional[Tensor] = ...) -> Tuple[Tensor, Tensor]: ...
-
-
-def fractional_max_pool3d_with_indices(input: Tensor, kernel_size: _size, output_size: Optional[_size] = ...,
-                                       output_ratio: Optional[_ratio_any_t] = ..., return_indices: bool = ...,
-                                       _random_samples: Optional[Tensor] = ...) -> Tuple[Tensor, Tensor]: ...
-
-
-def max_pool1d_with_indices(input: Tensor, kernel_size: _size, stride: Optional[_size] = ..., padding: _size = ...,
-                            dilation: _size = ..., ceil_mode: bool = ..., return_indices: bool = ...) -> Tuple[
-    Tensor, Tensor]: ...
-
-
-def max_pool2d_with_indices(input: Tensor, kernel_size: _size, stride: Optional[_size] = ..., padding: _size = ...,
-                            dilation: _size = ..., ceil_mode: bool = ..., return_indices: bool = ...) -> Tuple[
-    Tensor, Tensor]: ...
-
-
-def max_pool3d_with_indices(input: Tensor, kernel_size: _size, stride: Optional[_size] = ..., padding: _size = ...,
-                            dilation: _size = ..., ceil_mode: bool = ..., return_indices: bool = ...) -> Tuple[
-    Tensor, Tensor]: ...
-
-
-def max_unpool1d(input: Tensor, indices: Tensor, kernel_size: _size, stride: Optional[_size] = ...,
-                 padding: _size = ..., output_size: Optional[_size] = ...) -> Tensor: ...
-
-
-def max_unpool2d(input: Tensor, indices: Tensor, kernel_size: _size, stride: Optional[_size] = ...,
-                 padding: _size = ..., output_size: Optional[_size] = ...) -> Tensor: ...
-
-
-def max_unpool3d(input: Tensor, indices: Tensor, kernel_size: _size, stride: Optional[_size] = ...,
-                 padding: _size = ..., output_size: Optional[_size] = ...) -> Tensor: ...
-
-
-def lp_pool1d(input: Tensor, norm_type: float, kernel_size: _size_1_t, stride: Union[Optional[_size], Optional[int]] = ...,
-              ceil_mode: bool = ...) -> Tensor: ...
-
-
-def lp_pool2d(input: Tensor, norm_type: float, kernel_size: _size_2_t, stride: Union[Optional[_size], Optional[int]] = ...,
-              ceil_mode: bool = ...) -> Tensor: ...
-
-
-def adaptive_max_pool1d_with_indices(input: Tensor, output_size: _size, return_indices: bool = ...) -> Tuple[
-    Tensor, Tensor]: ...
-
-
-def adaptive_max_pool2d_with_indices(input: Tensor, output_size: _size_2_opt_t, return_indices: bool = ...) -> Tuple[
-    Tensor, Tensor]: ...
-
-
-def adaptive_max_pool3d_with_indices(input: Tensor, output_size: _size_3_opt_t, return_indices: bool = ...) -> Tuple[
-    Tensor, Tensor]: ...
-
-
+def fractional_max_pool2d_with_indices(
+    input: Tensor,
+    kernel_size: _size,
+    output_size: Optional[_size] = ...,
+    output_ratio: Optional[_ratio_any_t] = ...,
+    return_indices: bool = ...,
+    _random_samples: Optional[Tensor] = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def fractional_max_pool3d_with_indices(
+    input: Tensor,
+    kernel_size: _size,
+    output_size: Optional[_size] = ...,
+    output_ratio: Optional[_ratio_any_t] = ...,
+    return_indices: bool = ...,
+    _random_samples: Optional[Tensor] = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def max_pool1d_with_indices(
+    input: Tensor,
+    kernel_size: _size,
+    stride: Optional[_size] = ...,
+    padding: _size = ...,
+    dilation: _size = ...,
+    ceil_mode: bool = ...,
+    return_indices: bool = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def max_pool2d_with_indices(
+    input: Tensor,
+    kernel_size: _size,
+    stride: Optional[_size] = ...,
+    padding: _size = ...,
+    dilation: _size = ...,
+    ceil_mode: bool = ...,
+    return_indices: bool = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def max_pool3d_with_indices(
+    input: Tensor,
+    kernel_size: _size,
+    stride: Optional[_size] = ...,
+    padding: _size = ...,
+    dilation: _size = ...,
+    ceil_mode: bool = ...,
+    return_indices: bool = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def max_unpool1d(
+    input: Tensor,
+    indices: Tensor,
+    kernel_size: _size,
+    stride: Optional[_size] = ...,
+    padding: _size = ...,
+    output_size: Optional[_size] = ...,
+) -> Tensor: ...
+def max_unpool2d(
+    input: Tensor,
+    indices: Tensor,
+    kernel_size: _size,
+    stride: Optional[_size] = ...,
+    padding: _size = ...,
+    output_size: Optional[_size] = ...,
+) -> Tensor: ...
+def max_unpool3d(
+    input: Tensor,
+    indices: Tensor,
+    kernel_size: _size,
+    stride: Optional[_size] = ...,
+    padding: _size = ...,
+    output_size: Optional[_size] = ...,
+) -> Tensor: ...
+def lp_pool1d(
+    input: Tensor,
+    norm_type: float,
+    kernel_size: _size_1_t,
+    stride: Union[Optional[_size], Optional[int]] = ...,
+    ceil_mode: bool = ...,
+) -> Tensor: ...
+def lp_pool2d(
+    input: Tensor,
+    norm_type: float,
+    kernel_size: _size_2_t,
+    stride: Union[Optional[_size], Optional[int]] = ...,
+    ceil_mode: bool = ...,
+) -> Tensor: ...
+def adaptive_max_pool1d_with_indices(
+    input: Tensor,
+    output_size: _size,
+    return_indices: bool = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def adaptive_max_pool2d_with_indices(
+    input: Tensor,
+    output_size: _size_2_opt_t,
+    return_indices: bool = ...,
+) -> Tuple[Tensor, Tensor]: ...
+def adaptive_max_pool3d_with_indices(
+    input: Tensor,
+    output_size: _size_3_opt_t,
+    return_indices: bool = ...,
+) -> Tuple[Tensor, Tensor]: ...
 def adaptive_avg_pool1d(input: Tensor, output_size: _size_1_t) -> Tensor: ...
-
-
 def adaptive_avg_pool2d(input: Tensor, output_size: _size_2_opt_t) -> Tensor: ...
-
-
 def adaptive_avg_pool3d(input: Tensor, output_size: _size_3_opt_t) -> Tensor: ...
-
-
-def dropout(input: Tensor, p: float = ..., training: bool = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def alpha_dropout(input: Tensor, p: float = ..., training: bool = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def dropout1d(input: Tensor, p: float = ..., training: bool = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def dropout2d(input: Tensor, p: float = ..., training: bool = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def dropout3d(input: Tensor, p: float = ..., training: bool = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def feature_alpha_dropout(input: Tensor, p: float = ..., training: bool = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def threshold(input: Tensor, threshold: float, value: float, inplace: bool = ...) -> Tensor: ...
-
-
+def dropout(
+    input: Tensor,
+    p: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
+def alpha_dropout(
+    input: Tensor,
+    p: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
+def dropout1d(
+    input: Tensor,
+    p: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
+def dropout2d(
+    input: Tensor,
+    p: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
+def dropout3d(
+    input: Tensor,
+    p: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
+def feature_alpha_dropout(
+    input: Tensor,
+    p: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
+def threshold(
+    input: Tensor,
+    threshold: float,
+    value: float,
+    inplace: bool = ...,
+) -> Tensor: ...
 def relu(input: Tensor, inplace: bool = ...) -> Tensor: ...
-
-
 def glu(input: Tensor, dim: int = ...) -> Tensor: ...
-
-
-def hardtanh(input: Tensor, min_val: float = ..., max_val: float = ..., inplace: bool = ...) -> Tensor: ...
-
-
+def hardtanh(
+    input: Tensor,
+    min_val: float = ...,
+    max_val: float = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
 def relu6(input: Tensor, inplace: bool = ...) -> Tensor: ...
-
-
 def elu(input: Tensor, alpha: float = ..., inplace: bool = ...) -> Tensor: ...
-
-
 def selu(input: Tensor, inplace: bool = ...) -> Tensor: ...
-
-
 def celu(input: Tensor, alpha: float = ..., inplace: bool = ...) -> Tensor: ...
-
-
-def leaky_relu(input: Tensor, negative_slope: float = ..., inplace: bool = ...) -> Tensor: ...
-
-
+def leaky_relu(
+    input: Tensor,
+    negative_slope: float = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
 def prelu(input: Tensor, weight: Tensor) -> Tensor: ...
-
-
-def rrelu(input: Tensor, lower: float = ..., upper: float = ..., training: bool = ...,
-          inplace: bool = ...) -> Tensor: ...
-
-
+def rrelu(
+    input: Tensor,
+    lower: float = ...,
+    upper: float = ...,
+    training: bool = ...,
+    inplace: bool = ...,
+) -> Tensor: ...
 def gelu(input: Any, approximate: str = ...): ...
-
-
 def hardshrink(input: Tensor, lambd: float = ...) -> Tensor: ...
-
-
 def tanhshrink(input: Any): ...
-
-
 def softsign(input: Any): ...
-
-
-def softmin(input: Tensor, dim: Optional[int] = ..., _stacklevel: int = ..., dtype: Optional[_dtype] = ...) -> Tensor: ...
-
-
-def softmax(input: Tensor, dim: Optional[int] = ..., _stacklevel: int = ..., dtype: Optional[_dtype] = ...) -> Tensor: ...
-
-
-def gumbel_softmax(logits: Tensor, tau: float = ..., hard: bool = ..., eps: float = ..., dim: int = ...) -> Tensor: ...
-
-
-def log_softmax(input: Tensor, dim: Optional[int] = ..., _stacklevel: int = ...,
-                dtype: Optional[_dtype] = ...) -> Tensor: ...
-
-
+def softmin(
+    input: Tensor,
+    dim: Optional[int] = ...,
+    _stacklevel: int = ...,
+    dtype: Optional[_dtype] = ...,
+) -> Tensor: ...
+def softmax(
+    input: Tensor,
+    dim: Optional[int] = ...,
+    _stacklevel: int = ...,
+    dtype: Optional[_dtype] = ...,
+) -> Tensor: ...
+def gumbel_softmax(
+    logits: Tensor,
+    tau: float = ...,
+    hard: bool = ...,
+    eps: float = ...,
+    dim: int = ...,
+) -> Tensor: ...
+def log_softmax(
+    input: Tensor,
+    dim: Optional[int] = ...,
+    _stacklevel: int = ...,
+    dtype: Optional[_dtype] = ...,
+) -> Tensor: ...
 def tanh(input: Any): ...
-
-
 def sigmoid(input: Any) -> Tensor: ...
-
 def hardsigmoid(input: Tensor, inplace: bool = False) -> Tensor: ...
-
-
 def linear(input: Tensor, weight: Tensor, bias: Optional[Tensor] = ...) -> Tensor: ...
-
-
-def bilinear(input1: Tensor, input2: Tensor, weight: Tensor, bias: Optional[Tensor] = ...) -> Tensor: ...
-
-
+def bilinear(
+    input1: Tensor,
+    input2: Tensor,
+    weight: Tensor,
+    bias: Optional[Tensor] = ...,
+) -> Tensor: ...
 def silu(input: Tensor, inplace: bool = False) -> Tensor: ...
-
 def mish(input: Tensor, inplace: bool = False) -> Tensor: ...
-
 def hardswish(input: Tensor, inplace: bool = False) -> Tensor: ...
-
-
-def embedding(input: Tensor, weight: Tensor, padding_idx: Optional[int] = ..., max_norm: Optional[float] = ...,
-              norm_type: float = ..., scale_grad_by_freq: bool = ..., sparse: bool = ...) -> Tensor: ...
-
-
-def embedding_bag(input: Tensor, weight: Tensor, offsets: Optional[Tensor] = ..., max_norm: Optional[float] = ...,
-                  norm_type: float = ..., scale_grad_by_freq: bool = ..., mode: str = ...,
-                  sparse: bool = ..., per_sample_weights: Optional[Tensor] = ...,
-                  include_last_offset: bool = ..., padding_idx: Optional[int] = ...) -> Tensor: ...
-
-def batch_norm(input: Tensor, running_mean: Optional[Tensor], running_var: Optional[Tensor],
-               weight: Optional[Tensor] = ..., bias: Optional[Tensor] = ..., training: bool = ...,
-               momentum: float = ..., eps: float = ...) -> Tensor: ...
-
-
-def instance_norm(input: Tensor, running_mean: Optional[Tensor] = ..., running_var: Optional[Tensor] = ...,
-                  weight: Optional[Tensor] = ..., bias: Optional[Tensor] = ..., use_input_stats: bool = ...,
-                  momentum: float = ..., eps: float = ...) -> Tensor: ...
-
-
-def layer_norm(input: Tensor, normalized_shape: Sequence[int], weight: Optional[Tensor] = ..., bias: Optional[Tensor] = ...,
-               eps: float = ...) -> Tensor: ...
-
-
-def group_norm(input: Tensor, num_groups: int, weight: Optional[Tensor] = ..., bias: Optional[Tensor] = ...,
-               eps: float = ...) -> Tensor: ...
-
-
-def local_response_norm(input: Tensor, size: int, alpha: float = ..., beta: float = ..., k: float = ...) -> Tensor: ...
-
-
-def ctc_loss(log_probs: Tensor, targets: Tensor, input_lengths: Tensor, target_lengths: Tensor, blank: int = ...,
-             reduction: str = ..., zero_infinity: bool = ...) -> Tensor: ...
-
-
-def nll_loss(input: Tensor, target: Tensor, weight: Optional[Tensor] = ..., size_average: Optional[bool] = ...,
-             ignore_index: int = ..., reduce: Optional[bool] = ..., reduction: str = ...) -> Tensor: ...
-
-
-def poisson_nll_loss(input: Tensor, target: Tensor, log_input: bool = ..., full: bool = ...,
-                     size_average: Optional[bool] = ..., eps: float = ..., reduce: Optional[bool] = ...,
-                     reduction: str = ...) -> Tensor: ...
-
-
-def gaussian_nll_loss(input: Tensor, target: Tensor, var: Tensor, full: Optional[bool] = ...,
-                      eps: Optional[float] = ..., reduction: Optional[str] = ...) -> Tensor: ...
-
-
-def kl_div(input: Tensor, target: Tensor, size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-           reduction: str = ..., log_target: bool = ...) -> Tensor: ...
-
-
-def cross_entropy(input: Tensor, target: Tensor, weight: Optional[Tensor] = ..., size_average: Optional[bool] = ...,
-                  ignore_index: int = ..., reduce: Optional[bool] = ..., reduction: str = ...,
-                  label_smoothing: float = ...) -> Tensor: ...
-
-
-def binary_cross_entropy(input: Tensor, target: Tensor, weight: Optional[Tensor] = ...,
-                         size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                         reduction: str = ...) -> Tensor: ...
-
-
-def binary_cross_entropy_with_logits(input: Tensor, target: Tensor, weight: Optional[Tensor] = ...,
-                                     size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                                     reduction: str = ..., pos_weight: Optional[Tensor] = ...) -> Tensor: ...
-
-
-def smooth_l1_loss(input: Tensor, target: Tensor, size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                   reduction: str = ..., beta: float = ...) -> Tensor: ...
-
-
-def huber_loss(input: Tensor, target: Tensor, reduction: str = ..., delta: float = ...) -> Tensor: ...
-
-
-def l1_loss(input: Tensor, target: Tensor, size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-            reduction: str = ...) -> Tensor: ...
-
-
-def mse_loss(input: Tensor, target: Tensor, size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-             reduction: str = ...) -> Tensor: ...
-
-
-def margin_ranking_loss(input1: Tensor, input2: Tensor, target: Tensor, margin: float = ...,
-                        size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                        reduction: str = ...) -> Tensor: ...
-
-
-def hinge_embedding_loss(input: Tensor, target: Tensor, margin: float = ..., size_average: Optional[bool] = ...,
-                         reduce: Optional[bool] = ..., reduction: str = ...) -> Tensor: ...
-
-
-def multilabel_margin_loss(input: Tensor, target: Tensor, size_average: Optional[bool] = ...,
-                           reduce: Optional[bool] = ..., reduction: str = ...) -> Tensor: ...
-
-
-def soft_margin_loss(input: Tensor, target: Tensor, size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                     reduction: str = ...) -> Tensor: ...
-
-
-def multilabel_soft_margin_loss(input: Tensor, target: Tensor, weight: Optional[Tensor] = ...,
-                                size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                                reduction: str = ...) -> Tensor: ...
-
-
-def cosine_embedding_loss(input1: Tensor, input2: Tensor, target: Tensor, margin: float = ...,
-                          size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                          reduction: str = ...) -> Tensor: ...
-
-
-def multi_margin_loss(input: Tensor, target: Tensor, p: int = ..., margin: float = ..., weight: Optional[Tensor] = ...,
-                      size_average: Optional[bool] = ..., reduce: Optional[bool] = ...,
-                      reduction: str = ...) -> Tensor: ...
-
-
-def upsample(input: Any, size: Optional[Any] = ..., scale_factor: Optional[Any] = ..., mode: str = ...,
-             align_corners: Optional[Any] = ...): ...
-
-
-def interpolate(input: Any, size: Optional[Any] = ..., scale_factor: Optional[Any] = ..., mode: str = ...,
-                align_corners: Optional[Any] = ..., recompute_scale_factor: Optional[Any] = ...,
-                antialias: bool = ...): ...
-
-
-def upsample_nearest(input: Any, size: Optional[Any] = ..., scale_factor: Optional[Any] = ...): ...
-
-
-def upsample_bilinear(input: Any, size: Optional[Any] = ..., scale_factor: Optional[Any] = ...): ...
-
-
-def grid_sample(input: Tensor, grid: Tensor, mode: str = ..., padding_mode: str = ...,
-                align_corners: Optional[Any] = ...) -> Tensor: ...
-
-
-def affine_grid(theta: Tensor, size: List[int], align_corners: Optional[Any] = ...) -> Tensor: ...
-
-
-def pad(input: Tensor, pad: Sequence[int], mode: str = ..., value: float = ...) -> Tensor: ...
-
-
-def pairwise_distance(x1: Tensor, x2: Tensor, p: float = ..., eps: float = ..., keepdim: bool = ...) -> Tensor: ...
-
-
-def triplet_margin_loss(anchor: Tensor, positive: Tensor, negative: Tensor, margin: float = ..., p: float = ...,
-                        eps: float = ..., swap: bool = ..., size_average: Optional[bool] = ...,
-                        reduce: Optional[bool] = ..., reduction: str = ...) -> Tensor: ...
-
-
-def triplet_margin_with_distance_loss(anchor: Tensor, positive: Tensor, negative: Tensor, *,
-                                      distance_function: Optional[Callable[[Tensor, Tensor], Tensor]]=...,
-                                      margin: float=..., swap: bool=..., reduction: str=...) -> Tensor: ...
-
-
-def normalize(input: Tensor, p: float = ..., dim: int = ..., eps: float = ...,
-              out: Optional[Tensor] = ...) -> Tensor: ...
-
-
-def assert_int_or_pair(arg: Any, arg_name: Any, message: Any) -> None: ...
-
-
-def unfold(input: Tensor, kernel_size: _size_any_t, dilation: _size_any_t = ..., padding: _size_any_t = ...,
-           stride: _size_any_t = ...) -> Tensor: ...
-
-
-def fold(input: Tensor, output_size: _size_any_t, kernel_size: _size_any_t, dilation: _size_any_t = ..., padding: _size_any_t = ...,
-         stride: _size_any_t = ...) -> Tensor: ...
-
-
+def embedding(
+    input: Tensor,
+    weight: Tensor,
+    padding_idx: Optional[int] = ...,
+    max_norm: Optional[float] = ...,
+    norm_type: float = ...,
+    scale_grad_by_freq: bool = ...,
+    sparse: bool = ...,
+) -> Tensor: ...
+def embedding_bag(
+    input: Tensor,
+    weight: Tensor,
+    offsets: Optional[Tensor] = ...,
+    max_norm: Optional[float] = ...,
+    norm_type: float = ...,
+    scale_grad_by_freq: bool = ...,
+    mode: str = ...,
+    sparse: bool = ...,
+    per_sample_weights: Optional[Tensor] = ...,
+    include_last_offset: bool = ...,
+    padding_idx: Optional[int] = ...,
+) -> Tensor: ...
+def batch_norm(
+    input: Tensor,
+    running_mean: Optional[Tensor],
+    running_var: Optional[Tensor],
+    weight: Optional[Tensor] = ...,
+    bias: Optional[Tensor] = ...,
+    training: bool = ...,
+    momentum: float = ...,
+    eps: float = ...,
+) -> Tensor: ...
+def instance_norm(
+    input: Tensor,
+    running_mean: Optional[Tensor] = ...,
+    running_var: Optional[Tensor] = ...,
+    weight: Optional[Tensor] = ...,
+    bias: Optional[Tensor] = ...,
+    use_input_stats: bool = ...,
+    momentum: float = ...,
+    eps: float = ...,
+) -> Tensor: ...
+def layer_norm(
+    input: Tensor,
+    normalized_shape: Sequence[int],
+    weight: Optional[Tensor] = ...,
+    bias: Optional[Tensor] = ...,
+    eps: float = ...,
+) -> Tensor: ...
+def group_norm(
+    input: Tensor,
+    num_groups: int,
+    weight: Optional[Tensor] = ...,
+    bias: Optional[Tensor] = ...,
+    eps: float = ...,
+) -> Tensor: ...
+def local_response_norm(
+    input: Tensor,
+    size: int,
+    alpha: float = ...,
+    beta: float = ...,
+    k: float = ...,
+) -> Tensor: ...
+def ctc_loss(
+    log_probs: Tensor,
+    targets: Tensor,
+    input_lengths: Tensor,
+    target_lengths: Tensor,
+    blank: int = ...,
+    reduction: str = ...,
+    zero_infinity: bool = ...,
+) -> Tensor: ...
+def nll_loss(
+    input: Tensor,
+    target: Tensor,
+    weight: Optional[Tensor] = ...,
+    size_average: Optional[bool] = ...,
+    ignore_index: int = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def poisson_nll_loss(
+    input: Tensor,
+    target: Tensor,
+    log_input: bool = ...,
+    full: bool = ...,
+    size_average: Optional[bool] = ...,
+    eps: float = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def gaussian_nll_loss(
+    input: Tensor,
+    target: Tensor,
+    var: Tensor,
+    full: Optional[bool] = ...,
+    eps: Optional[float] = ...,
+    reduction: Optional[str] = ...,
+) -> Tensor: ...
+def kl_div(
+    input: Tensor,
+    target: Tensor,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+    log_target: bool = ...,
+) -> Tensor: ...
+def cross_entropy(
+    input: Tensor,
+    target: Tensor,
+    weight: Optional[Tensor] = ...,
+    size_average: Optional[bool] = ...,
+    ignore_index: int = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+    label_smoothing: float = ...,
+) -> Tensor: ...
+def binary_cross_entropy(
+    input: Tensor,
+    target: Tensor,
+    weight: Optional[Tensor] = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def binary_cross_entropy_with_logits(
+    input: Tensor,
+    target: Tensor,
+    weight: Optional[Tensor] = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+    pos_weight: Optional[Tensor] = ...,
+) -> Tensor: ...
+def smooth_l1_loss(
+    input: Tensor,
+    target: Tensor,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+    beta: float = ...,
+) -> Tensor: ...
+def huber_loss(
+    input: Tensor,
+    target: Tensor,
+    reduction: str = ...,
+    delta: float = ...,
+) -> Tensor: ...
+def l1_loss(
+    input: Tensor,
+    target: Tensor,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def mse_loss(
+    input: Tensor,
+    target: Tensor,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def margin_ranking_loss(
+    input1: Tensor,
+    input2: Tensor,
+    target: Tensor,
+    margin: float = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def hinge_embedding_loss(
+    input: Tensor,
+    target: Tensor,
+    margin: float = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def multilabel_margin_loss(
+    input: Tensor,
+    target: Tensor,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def soft_margin_loss(
+    input: Tensor,
+    target: Tensor,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def multilabel_soft_margin_loss(
+    input: Tensor,
+    target: Tensor,
+    weight: Optional[Tensor] = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def cosine_embedding_loss(
+    input1: Tensor,
+    input2: Tensor,
+    target: Tensor,
+    margin: float = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def multi_margin_loss(
+    input: Tensor,
+    target: Tensor,
+    p: int = ...,
+    margin: float = ...,
+    weight: Optional[Tensor] = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def upsample(
+    input: Any,
+    size: Optional[Any] = ...,
+    scale_factor: Optional[Any] = ...,
+    mode: str = ...,
+    align_corners: Optional[Any] = ...,
+): ...
+def interpolate(
+    input: Any,
+    size: Optional[Any] = ...,
+    scale_factor: Optional[Any] = ...,
+    mode: str = ...,
+    align_corners: Optional[Any] = ...,
+    recompute_scale_factor: Optional[Any] = ...,
+    antialias: bool = ...,
+): ...
+def upsample_nearest(
+    input: Any,
+    size: Optional[Any] = ...,
+    scale_factor: Optional[Any] = ...,
+): ...
+def upsample_bilinear(
+    input: Any,
+    size: Optional[Any] = ...,
+    scale_factor: Optional[Any] = ...,
+): ...
+def grid_sample(
+    input: Tensor,
+    grid: Tensor,
+    mode: str = ...,
+    padding_mode: str = ...,
+    align_corners: Optional[Any] = ...,
+) -> Tensor: ...
+def affine_grid(
+    theta: Tensor,
+    size: List[int],
+    align_corners: Optional[Any] = ...,
+) -> Tensor: ...
+def pad(
+    input: Tensor,
+    pad: Sequence[int],
+    mode: str = ...,
+    value: float = ...,
+) -> Tensor: ...
+def pairwise_distance(
+    x1: Tensor,
+    x2: Tensor,
+    p: float = ...,
+    eps: float = ...,
+    keepdim: bool = ...,
+) -> Tensor: ...
+def triplet_margin_loss(
+    anchor: Tensor,
+    positive: Tensor,
+    negative: Tensor,
+    margin: float = ...,
+    p: float = ...,
+    eps: float = ...,
+    swap: bool = ...,
+    size_average: Optional[bool] = ...,
+    reduce: Optional[bool] = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def triplet_margin_with_distance_loss(
+    anchor: Tensor,
+    positive: Tensor,
+    negative: Tensor,
+    *,
+    distance_function: Optional[Callable[[Tensor, Tensor], Tensor]] = ...,
+    margin: float = ...,
+    swap: bool = ...,
+    reduction: str = ...,
+) -> Tensor: ...
+def normalize(
+    input: Tensor,
+    p: float = ...,
+    dim: int = ...,
+    eps: float = ...,
+    out: Optional[Tensor] = ...,
+) -> Tensor: ...
+def assert_int_or_pair(
+    arg: Any,
+    arg_name: Any,
+    message: Any,
+) -> None: ...
+def unfold(
+    input: Tensor,
+    kernel_size: _size_any_t,
+    dilation: _size_any_t = ...,
+    padding: _size_any_t = ...,
+    stride: _size_any_t = ...,
+) -> Tensor: ...
+def fold(
+    input: Tensor,
+    output_size: _size_any_t,
+    kernel_size: _size_any_t,
+    dilation: _size_any_t = ...,
+    padding: _size_any_t = ...,
+    stride: _size_any_t = ...,
+) -> Tensor: ...
 def _canonical_mask(
-        mask: Optional[Tensor],
-        mask_name: str,
-        other_type: Optional[_dtype],
-        other_name: str,
-        target_type: _dtype,
-        check_other: bool = True,
+    mask: Optional[Tensor],
+    mask_name: str,
+    other_type: Optional[_dtype],
+    other_name: str,
+    target_type: _dtype,
+    check_other: bool = True,
 ) -> Optional[Tensor]: ...
-
 def _none_or_dtype(input: Optional[Tensor]) -> Optional[_dtype]: ...
-
-def multi_head_attention_forward(query: Tensor,
-                                 key: Tensor,
-                                 value: Tensor,
-                                 embed_dim_to_check: int,
-                                 num_heads: int,
-                                 in_proj_weight: Optional[Tensor],
-                                 in_proj_bias: Optional[Tensor],
-                                 bias_k: Optional[Tensor],
-                                 bias_v: Optional[Tensor],
-                                 add_zero_attn: bool,
-                                 dropout_p: float,
-                                 out_proj_weight: Tensor,
-                                 out_proj_bias: Optional[Tensor],
-                                 training: bool = True,
-                                 key_padding_mask: Optional[Tensor] = None,
-                                 need_weights: bool = True,
-                                 attn_mask: Optional[Tensor] = None,
-                                 use_separate_proj_weight: bool = False,
-                                 q_proj_weight: Optional[Tensor] = None,
-                                 k_proj_weight: Optional[Tensor] = None,
-                                 v_proj_weight: Optional[Tensor] = None,
-                                 static_k: Optional[Tensor] = None,
-                                 static_v: Optional[Tensor] = None,
-                                 average_attn_weights: bool = True,
-                                 is_causal: bool = False
-                                 ) -> Tuple[Tensor, Optional[Tensor]]: ...
-
+def multi_head_attention_forward(
+    query: Tensor,
+    key: Tensor,
+    value: Tensor,
+    embed_dim_to_check: int,
+    num_heads: int,
+    in_proj_weight: Optional[Tensor],
+    in_proj_bias: Optional[Tensor],
+    bias_k: Optional[Tensor],
+    bias_v: Optional[Tensor],
+    add_zero_attn: bool,
+    dropout_p: float,
+    out_proj_weight: Tensor,
+    out_proj_bias: Optional[Tensor],
+    training: bool = True,
+    key_padding_mask: Optional[Tensor] = None,
+    need_weights: bool = True,
+    attn_mask: Optional[Tensor] = None,
+    use_separate_proj_weight: bool = False,
+    q_proj_weight: Optional[Tensor] = None,
+    k_proj_weight: Optional[Tensor] = None,
+    v_proj_weight: Optional[Tensor] = None,
+    static_k: Optional[Tensor] = None,
+    static_v: Optional[Tensor] = None,
+    average_attn_weights: bool = True,
+    is_causal: bool = False,
+) -> Tuple[Tensor, Optional[Tensor]]: ...

 ${imported_hints}

--- a/torch/nn/parallel/init.pyi
+++ b/torch/nn/parallel/init.pyi
@ -1,4 +1,4 @@
-from .data_parallel import DataParallel as DataParallel, data_parallel as data_parallel
+from .data_parallel import data_parallel as data_parallel, DataParallel as DataParallel
 from .distributed import DistributedDataParallel as DistributedDataParallel
 from .parallel_apply import parallel_apply as parallel_apply
 from .replicate import replicate as replicate
--- a/torch/nn/parallel/common_types.pyi
+++ b/torch/nn/parallel/common_types.pyi
@ -1,5 +1,6 @@
-from typing import Union, Sequence
-from ... import device
+from typing import Sequence, Union
+
+from torch import device

 _device_t = Union[int, device]
 _devices_t = Sequence[_device_t]
--- a/torch/nn/parallel/data_parallel.pyi
+++ b/torch/nn/parallel/data_parallel.pyi
@ -1,7 +1,8 @@
 from typing import Any, Optional
-from .common_types import _devices_t, _device_t
+
+from torch import device, Tensor
 from ..modules import Module
-from ... import device, Tensor
+from .common_types import _device_t, _devices_t

 class DataParallel(Module):
    module: Module = ...
@ -10,10 +11,19 @@ class DataParallel(Module):
    output_device: _device_t = ...
    src_device_obj: device = ...

-    def __init__(self, module: Module, device_ids: Optional[_devices_t] = ..., output_device: Optional[_device_t] = ...,
-                 dim: int = ...) -> None: ...
+    def __init__(
+        self,
+        module: Module,
+        device_ids: Optional[_devices_t] = ...,
+        output_device: Optional[_device_t] = ...,
+        dim: int = ...,
+    ) -> None: ...

-
-def data_parallel(module: Module, inputs: Any, device_ids: Optional[_devices_t] = ...,
-                  output_device: Optional[_device_t] = ..., dim: int = ...,
-                  module_kwargs: Optional[Any] = ...) -> Tensor: ...
+def data_parallel(
+    module: Module,
+    inputs: Any,
+    device_ids: Optional[_devices_t] = ...,
+    output_device: Optional[_device_t] = ...,
+    dim: int = ...,
+    module_kwargs: Optional[Any] = ...,
+) -> Tensor: ...
--- a/torch/nn/parallel/parallel_apply.pyi
+++ b/torch/nn/parallel/parallel_apply.pyi
@ -1,7 +1,11 @@
-from typing import Any, Optional, Sequence, List
-from .common_types import _devices_t
+from typing import Any, List, Optional, Sequence
+
 from ..modules import Module
+from .common_types import _devices_t

-
-def parallel_apply(modules: Sequence[Module], inputs: Sequence[Any], kwargs_tup: Optional[Any] = ...,
-                   devices: Optional[_devices_t] = ...) -> List[Any]: ...
+def parallel_apply(
+    modules: Sequence[Module],
+    inputs: Sequence[Any],
+    kwargs_tup: Optional[Any] = ...,
+    devices: Optional[_devices_t] = ...,
+) -> List[Any]: ...
--- a/torch/nn/parallel/replicate.pyi
+++ b/torch/nn/parallel/replicate.pyi
@ -1,7 +1,10 @@
-from typing import List, Union, Sequence, TypeVar
+from typing import List, Sequence, Union
+
 from ..modules import Module
 from .common_types import _devices_t

-
-def replicate(network: Module, devices: Union[_devices_t, Sequence[_devices_t]], detach: bool = ...) -> List[
-    Module]: ...
+def replicate(
+    network: Module,
+    devices: Union[_devices_t, Sequence[_devices_t]],
+    detach: bool = ...,
+) -> List[Module]: ...
--- a/torch/nn/parallel/scatter_gather.pyi
+++ b/torch/nn/parallel/scatter_gather.pyi
@ -1,13 +1,17 @@
-from typing import Any, Dict, List, Tuple, overload, TypeVar
-from ... import Tensor
+from typing import Any, Dict, List, overload, Tuple, TypeVar
+
+from torch import Tensor
 from .common_types import _device_t, _devices_t

-
-T = TypeVar('T', Dict, List, Tuple)
+T = TypeVar("T", Dict, List, Tuple)

 # For some reason, 'scatter' returns a tuple when given a single Tensor input but a list otherwise.
@overload
-def scatter(inputs: Tensor, target_gpus: _devices_t, dim: int = ...) -> Tuple[Tensor, ...]: ...
+def scatter(
+    inputs: Tensor,
+    target_gpus: _devices_t,
+    dim: int = ...,
+) -> Tuple[Tensor, ...]: ...

 # flake8 will raise a spurious error here since `torch/__init__.pyi` has not been generated yet
 # so mypy will interpret `Tensor` as `Any` since it is an import from what it believes to be an
@ -16,9 +20,11 @@ def scatter(inputs: Tensor, target_gpus: _devices_t, dim: int = ...) -> Tuple[Te
@overload
 def scatter(inputs: T, target_gpus: _devices_t, dim: int = ...) -> List[T]: ...

-
 # TODO More precise types here.
-def scatter_kwargs(inputs: Any, kwargs: Any, target_gpus: _devices_t, dim: int = ...) -> Any: ...
-
-
+def scatter_kwargs(
+    inputs: Any,
+    kwargs: Any,
+    target_gpus: _devices_t,
+    dim: int = ...,
+) -> Any: ...
 def gather(outputs: Any, target_device: _device_t, dim: int = ...) -> Any: ...
--- a/torch/nn/parameter.pyi
+++ b/torch/nn/parameter.pyi
@ -1,23 +1,40 @@
-import torch
-from .. import Tensor
-from typing import Tuple, Optional
 import builtins
+from typing import Optional, Tuple
+
+import torch
+from torch import Tensor

 class Parameter(Tensor):
-    def __init__(self, data: Tensor=..., requires_grad: builtins.bool=...): ...
-
-    ...
+    def __init__(
+        self,
+        data: Tensor = ...,
+        requires_grad: builtins.bool = ...,
+    ): ...

 def is_lazy(param: Tensor): ...

 class UninitializedParameter(Tensor):
-    def __init__(self, data: Tensor=..., requires_grad: builtins.bool=...): ...
-
-    def materialize(self, shape: Tuple[int, ...], device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None): ...
-    ...
+    def __init__(
+        self,
+        data: Tensor = ...,
+        requires_grad: builtins.bool = ...,
+    ): ...
+    def materialize(
+        self,
+        shape: Tuple[int, ...],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ): ...

 class UninitializedBuffer(Tensor):
-    def __init__(self, data: Tensor=..., requires_grad: builtins.bool=...): ...
-
-    def materialize(self, shape: Tuple[int, ...], device: Optional[torch.device] = None, dtype: Optional[torch.dtype] = None): ...
-    ...
+    def __init__(
+        self,
+        data: Tensor = ...,
+        requires_grad: builtins.bool = ...,
+    ): ...
+    def materialize(
+        self,
+        shape: Tuple[int, ...],
+        device: Optional[torch.device] = None,
+        dtype: Optional[torch.dtype] = None,
+    ): ...
--- a/torch/nn/utils/rnn.pyi
+++ b/torch/nn/utils/rnn.pyi
@ -1,13 +1,13 @@
 from typing import (
    Any,
    List,
+    NamedTuple,
    Optional,
+    overload,
    Sequence,
    Tuple,
    TypeVar,
    Union,
-    NamedTuple,
-    overload,
 )

 from torch import Tensor
@ -21,68 +21,78 @@ class PackedSequence_(NamedTuple):

 def bind(optional: Any, fn: Any): ...

-
-T = TypeVar('T')
-
+T = TypeVar("T")

 class PackedSequence(PackedSequence_):
-    def __new__(cls, data: Tensor, batch_sizes: Optional[Tensor] = ..., sorted_indices: Optional[Tensor] = ...,
-                unsorted_indices: Optional[Tensor] = ...) -> PackedSequence: ...
-
+    def __new__(
+        cls,
+        data: Tensor,
+        batch_sizes: Optional[Tensor] = ...,
+        sorted_indices: Optional[Tensor] = ...,
+        unsorted_indices: Optional[Tensor] = ...,
+    ) -> PackedSequence: ...
    def pin_memory(self: T) -> T: ...
-
    def cuda(self: T, *args: Any, **kwargs: Any) -> T: ...
-
    def cpu(self: T) -> T: ...
-
    def double(self: T) -> T: ...
-
    def float(self: T) -> T: ...
-
    def half(self: T) -> T: ...
-
    def long(self: T) -> T: ...
-
    def int(self: T) -> T: ...
-
    def short(self: T) -> T: ...
-
    def char(self: T) -> T: ...
-
    def byte(self: T) -> T: ...
-
    @overload
-    def to(self: T, dtype: _dtype, non_blocking: bool = False, copy: bool = False) -> T: ...
-
+    def to(
+        self: T,
+        dtype: _dtype,
+        non_blocking: bool = False,
+        copy: bool = False,
+    ) -> T: ...
    @overload
-    def to(self: T, device: Optional[Union[_device, str]] = None, dtype: Optional[_dtype] = None,
-           non_blocking: bool = False, copy: bool = False) -> T: ...
-
+    def to(
+        self: T,
+        device: Optional[Union[_device, str]] = None,
+        dtype: Optional[_dtype] = None,
+        non_blocking: bool = False,
+        copy: bool = False,
+    ) -> T: ...
    @overload
-    def to(self, other: Tensor, non_blocking: bool = False, copy: bool = False) -> T: ...
-
+    def to(
+        self,
+        other: Tensor,
+        non_blocking: bool = False,
+        copy: bool = False,
+    ) -> T: ...
    @property
    def is_cuda(self) -> bool: ...
-
    def is_pinned(self) -> bool: ...

-
 def invert_permutation(permutation: Optional[Tensor]): ...
-
-
-def pack_padded_sequence(input: Tensor, lengths: Tensor, batch_first: bool = ...,
-                         enforce_sorted: bool = ...) -> PackedSequence: ...
-
-
-def pad_packed_sequence(sequence: PackedSequence, batch_first: bool = ..., padding_value: float = ...,
-                        total_length: Optional[int] = ...) -> Tuple[Tensor, ...]: ...
-
-
-def pad_sequence(sequences: List[Tensor], batch_first: bool = False, padding_value: float = ...) -> Tensor: ...
-
-
-def pack_sequence(sequences: Sequence[Tensor], enforce_sorted: bool = ...) -> PackedSequence: ...
-
-
-def get_packed_sequence(data: Tensor, batch_sizes: Optional[Tensor], sorted_indices: Optional[Tensor],
-                        unsorted_indices: Optional[Tensor]) -> PackedSequence: ...
+def pack_padded_sequence(
+    input: Tensor,
+    lengths: Tensor,
+    batch_first: bool = ...,
+    enforce_sorted: bool = ...,
+) -> PackedSequence: ...
+def pad_packed_sequence(
+    sequence: PackedSequence,
+    batch_first: bool = ...,
+    padding_value: float = ...,
+    total_length: Optional[int] = ...,
+) -> Tuple[Tensor, ...]: ...
+def pad_sequence(
+    sequences: List[Tensor],
+    batch_first: bool = False,
+    padding_value: float = ...,
+) -> Tensor: ...
+def pack_sequence(
+    sequences: Sequence[Tensor],
+    enforce_sorted: bool = ...,
+) -> PackedSequence: ...
+def get_packed_sequence(
+    data: Tensor,
+    batch_sizes: Optional[Tensor],
+    sorted_indices: Optional[Tensor],
+    unsorted_indices: Optional[Tensor],
+) -> PackedSequence: ...
--- a/torch/optim/init.pyi
+++ b/torch/optim/init.pyi
@ -1,5 +1,4 @@
-from . import swa_utils as swa_utils
-from . import lr_scheduler as lr_scheduler
+from . import lr_scheduler as lr_scheduler, swa_utils as swa_utils
 from .adadelta import Adadelta as Adadelta
 from .adagrad import Adagrad as Adagrad
 from .adam import Adam as Adam
--- a/torch/optim/_multi_tensor/init.pyi
+++ b/torch/optim/_multi_tensor/init.pyi
@ -1,4 +1,5 @@
 from functools import partial
+
 from torch import optim

 Adam = partial(optim.Adam, foreach=True)
--- a/torch/optim/adadelta.pyi
+++ b/torch/optim/adadelta.pyi
@ -1,5 +1,11 @@
-from typing import Tuple
 from .optimizer import _params_t, Optimizer

 class Adadelta(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., rho: float=..., eps: float=..., weight_decay: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        rho: float = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+    ) -> None: ...
--- a/torch/optim/adagrad.pyi
+++ b/torch/optim/adagrad.pyi
@ -1,5 +1,12 @@
-from typing import Tuple
 from .optimizer import _params_t, Optimizer

 class Adagrad(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., lr_decay: float=..., weight_decay: float=..., initial_accumulator_value: float=...,  eps: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        lr_decay: float = ...,
+        weight_decay: float = ...,
+        initial_accumulator_value: float = ...,
+        eps: float = ...,
+    ) -> None: ...
--- a/torch/optim/adam.pyi
+++ b/torch/optim/adam.pyi
@ -1,5 +1,20 @@
-from typing import Tuple, Optional
+from typing import Optional, Tuple
+
 from .optimizer import _params_t, Optimizer

 class Adam(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=..., weight_decay: float=..., amsgrad: bool = ..., *, foreach: Optional[bool] = ..., maximize: bool = ..., capturable: bool = ..., differentiable: bool = ..., fused: bool = ...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        betas: Tuple[float, float] = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+        amsgrad: bool = ...,
+        *,
+        foreach: Optional[bool] = ...,
+        maximize: bool = ...,
+        capturable: bool = ...,
+        differentiable: bool = ...,
+        fused: bool = ...,
+    ) -> None: ...
--- a/torch/optim/adamax.pyi
+++ b/torch/optim/adamax.pyi
@ -1,5 +1,13 @@
 from typing import Tuple
+
 from .optimizer import _params_t, Optimizer

 class Adamax(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=..., weight_decay: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        betas: Tuple[float, float] = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+    ) -> None: ...
--- a/torch/optim/adamw.pyi
+++ b/torch/optim/adamw.pyi
@ -1,5 +1,20 @@
-from typing import Tuple, Optional
+from typing import Optional, Tuple
+
 from .optimizer import _params_t, Optimizer

 class AdamW(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=..., weight_decay: float=..., amsgrad: bool = ..., *, foreach: Optional[bool] = ..., maximize: bool = ..., capturable: bool = ..., differentiable: bool = ..., fused: bool = ...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        betas: Tuple[float, float] = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+        amsgrad: bool = ...,
+        *,
+        foreach: Optional[bool] = ...,
+        maximize: bool = ...,
+        capturable: bool = ...,
+        differentiable: bool = ...,
+        fused: bool = ...,
+    ) -> None: ...
--- a/torch/optim/asgd.pyi
+++ b/torch/optim/asgd.pyi
@ -1,5 +1,12 @@
-from typing import Tuple
 from .optimizer import _params_t, Optimizer

 class ASGD(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., lambd: float=..., alpha: float=..., t0: float=..., weight_decay: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        lambd: float = ...,
+        alpha: float = ...,
+        t0: float = ...,
+        weight_decay: float = ...,
+    ) -> None: ...
--- a/torch/optim/lbfgs.pyi
+++ b/torch/optim/lbfgs.pyi
@ -1,5 +1,16 @@
-from typing import Tuple, Optional
+from typing import Optional
+
 from .optimizer import _params_t, Optimizer

 class LBFGS(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., max_iter: int=..., max_eval: Optional[int]=..., tolerance_grad: float=..., tolerance_change: float=..., history_size: int=..., line_search_fn: Optional[str]=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        max_iter: int = ...,
+        max_eval: Optional[int] = ...,
+        tolerance_grad: float = ...,
+        tolerance_change: float = ...,
+        history_size: int = ...,
+        line_search_fn: Optional[str] = ...,
+    ) -> None: ...
--- a/torch/optim/lr_scheduler.pyi
+++ b/torch/optim/lr_scheduler.pyi
@ -1,4 +1,5 @@
-from typing import Iterable, Any, Optional, Callable, Union, List, Dict
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
+
 from .optimizer import Optimizer

 class LRScheduler:
@ -6,60 +7,131 @@ class LRScheduler:
    base_lrs: List[float] = ...
    last_epoch: int = ...
    verbose: bool = ...
-    def __init__(self, optimizer: Optimizer, last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...
    def state_dict(self) -> Dict[str, Any]: ...
    def load_state_dict(self, state_dict: Dict[str, Any]) -> None: ...
    def get_last_lr(self) -> List[float]: ...
    def get_lr(self) -> float: ...
    def step(self, epoch: Optional[int] = ...) -> None: ...
-    def print_lr(self, is_verbose: bool, group: Dict[str, Any], lr: float, epoch: Optional[int] = ...) -> None: ...
+    def print_lr(
+        self,
+        is_verbose: bool,
+        group: Dict[str, Any],
+        lr: float,
+        epoch: Optional[int] = ...,
+    ) -> None: ...

-class _LRScheduler(LRScheduler):
-    ...
+class _LRScheduler(LRScheduler): ...

 class LambdaLR(LRScheduler):
    lr_lambdas: List[Callable[[int], float]] = ...
-    def __init__(self, optimizer: Optimizer, lr_lambda: Union[Callable[[int], float], List[Callable[[int], float]]], last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        lr_lambda: Union[Callable[[int], float], List[Callable[[int], float]]],
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class MultiplicativeLR(LRScheduler):
    lr_lambdas: List[Callable[[int], float]] = ...
-    def __init__(self, optimizer: Optimizer, lr_lambda: Union[Callable[[int], float], List[Callable[[int], float]]], last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        lr_lambda: Union[Callable[[int], float], List[Callable[[int], float]]],
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class StepLR(LRScheduler):
    step_size: int = ...
    gamma: float = ...
-    def __init__(self, optimizer: Optimizer, step_size: int, gamma: float = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        step_size: int,
+        gamma: float = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class MultiStepLR(LRScheduler):
    milestones: Iterable[int] = ...
    gamma: float = ...
-    def __init__(self, optimizer: Optimizer, milestones: Iterable[int], gamma: float = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        milestones: Iterable[int],
+        gamma: float = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class ConstantLR(LRScheduler):
    factor: float = ...
    total_iters: int = ...
-    def __init__(self, optimizer: Optimizer, factor: float=..., total_iters: int=..., last_epoch: int=..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        factor: float = ...,
+        total_iters: int = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class LinearLR(LRScheduler):
    start_factor: float = ...
    end_factor: float = ...
    total_iters: int = ...
-    def __init__(self, optimizer: Optimizer, start_factor: float=..., end_factor: float= ..., total_iters: int= ..., last_epoch: int= ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        start_factor: float = ...,
+        end_factor: float = ...,
+        total_iters: int = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class ExponentialLR(LRScheduler):
    gamma: float = ...
-    def __init__(self, optimizer: Optimizer, gamma: float, last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        gamma: float,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class ChainedScheduler(LRScheduler):
    def __init__(self, schedulers: List[LRScheduler]) -> None: ...

 class SequentialLR(LRScheduler):
-    def __init__(self, optimizer: Optimizer, schedulers: List[LRScheduler], milestones: List[int], last_epoch: int=..., verbose: bool=...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        schedulers: List[LRScheduler],
+        milestones: List[int],
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class CosineAnnealingLR(LRScheduler):
    T_max: int = ...
    eta_min: float = ...
-    def __init__(self, optimizer: Optimizer, T_max: int, eta_min: float = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        T_max: int,
+        eta_min: float = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class ReduceLROnPlateau:
    factor: float = ...
@ -77,7 +149,19 @@ class ReduceLROnPlateau:
    mode_worse: Optional[float] = ...
    eps: float = ...
    last_epoch: int = ...
-    def __init__(self, optimizer: Optimizer, mode: str = ..., factor: float = ..., patience: int = ..., threshold: float = ..., threshold_mode: str = ..., cooldown: int = ..., min_lr: Union[List[float], float] = ..., eps: float = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        mode: str = ...,
+        factor: float = ...,
+        patience: int = ...,
+        threshold: float = ...,
+        threshold_mode: str = ...,
+        cooldown: int = ...,
+        min_lr: Union[List[float], float] = ...,
+        eps: float = ...,
+        verbose: bool = ...,
+    ) -> None: ...
    def step(self, metrics: Any, epoch: Optional[int] = ...) -> None: ...
    @property
    def in_cooldown(self) -> bool: ...
@ -95,7 +179,23 @@ class CyclicLR(LRScheduler):
    cycle_momentum: bool = ...
    base_momentums: List[float] = ...
    max_momentums: List[float] = ...
-    def __init__(self, optimizer: Optimizer, base_lr: Union[float, List[float]], max_lr: Union[float, List[float]], step_size_up: int = ..., step_size_down: Optional[int] = ..., mode: str = ..., gamma: float = ..., scale_fn: Optional[Callable[[float], float]] = ..., scale_mode: str = ..., cycle_momentum: bool = ..., base_momentum: float = ..., max_momentum: float = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        base_lr: Union[float, List[float]],
+        max_lr: Union[float, List[float]],
+        step_size_up: int = ...,
+        step_size_down: Optional[int] = ...,
+        mode: str = ...,
+        gamma: float = ...,
+        scale_fn: Optional[Callable[[float], float]] = ...,
+        scale_mode: str = ...,
+        cycle_momentum: bool = ...,
+        base_momentum: float = ...,
+        max_momentum: float = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...
    def scale_fn(self, x: Any) -> float: ...

 class CosineAnnealingWarmRestarts(LRScheduler):
@ -104,7 +204,15 @@ class CosineAnnealingWarmRestarts(LRScheduler):
    T_mult: Optional[int] = ...
    eta_min: Optional[float] = ...
    T_cur: Any = ...
-    def __init__(self, optimizer: Optimizer, T_0: int, T_mult: int = ..., eta_min: float = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        T_0: int,
+        T_mult: int = ...,
+        eta_min: float = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...
    def step(self, epoch: Optional[Any] = ...): ...

 class OneCycleLR(LRScheduler):
@ -112,9 +220,33 @@ class OneCycleLR(LRScheduler):
    anneal_func: Callable[[float, float, float], float] = ...
    cycle_momentum: bool = ...
    use_beta1: bool = ...
-    def __init__(self, optimizer: Optimizer, max_lr: Union[float, List[float]], total_steps: int = ..., epochs: int = ..., steps_per_epoch: int = ..., pct_start: float = ..., anneal_strategy: str = ..., cycle_momentum: bool = ..., base_momentum: Union[float, List[float]] = ..., max_momentum: Union[float, List[float]] = ..., div_factor: float = ..., final_div_factor: float = ..., three_phase: bool = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        max_lr: Union[float, List[float]],
+        total_steps: int = ...,
+        epochs: int = ...,
+        steps_per_epoch: int = ...,
+        pct_start: float = ...,
+        anneal_strategy: str = ...,
+        cycle_momentum: bool = ...,
+        base_momentum: Union[float, List[float]] = ...,
+        max_momentum: Union[float, List[float]] = ...,
+        div_factor: float = ...,
+        final_div_factor: float = ...,
+        three_phase: bool = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...

 class PolynomialLR(LRScheduler):
    total_iters: int = ...
    power: float = ...
-    def __init__(self, optimizer: Optimizer, total_iters: int = ..., power: float = ..., last_epoch: int = ..., verbose: bool = ...) -> None: ...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        total_iters: int = ...,
+        power: float = ...,
+        last_epoch: int = ...,
+        verbose: bool = ...,
+    ) -> None: ...
--- a/torch/optim/nadam.pyi
+++ b/torch/optim/nadam.pyi
@ -1,5 +1,14 @@
 from typing import Tuple
+
 from .optimizer import _params_t, Optimizer

 class NAdam(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=..., weight_decay: float=..., momentum_decay: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        betas: Tuple[float, float] = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+        momentum_decay: float = ...,
+    ) -> None: ...
--- a/torch/optim/optimizer.pyi
+++ b/torch/optim/optimizer.pyi
@ -1,11 +1,11 @@
-from typing import Iterable, Union, Callable, Optional, List, Dict, Any
-from .. import Tensor
+from typing import Any, Callable, Dict, Iterable, List, Optional, Union
+
+from torch import Tensor
 from torch.utils.hooks import RemovableHandle

 _params_t = Union[Iterable[Tensor], Iterable[Dict[str, Any]]]

 def register_optimizer_step_pre_hook(hook: Callable[..., None]) -> RemovableHandle: ...
-
 def register_optimizer_step_post_hook(hook: Callable[..., None]) -> RemovableHandle: ...

 class Optimizer:
@ -19,6 +19,6 @@ class Optimizer:
    def register_step_post_hook(self, hook: Callable[..., None]) -> RemovableHandle: ...
    def state_dict(self) -> Dict[str, Any]: ...
    def load_state_dict(self, state_dict: Dict[str, Any]) -> None: ...
-    def zero_grad(self, set_to_none: bool=...) -> None: ...
-    def step(self, closure: Optional[Callable[[], float]]=...) -> Optional[float]: ...
+    def zero_grad(self, set_to_none: bool = ...) -> None: ...
+    def step(self, closure: Optional[Callable[[], float]] = ...) -> Optional[float]: ...
    def add_param_group(self, param_group: Dict[str, Any]) -> None: ...
--- a/torch/optim/radam.pyi
+++ b/torch/optim/radam.pyi
@ -1,5 +1,13 @@
 from typing import Tuple
+
 from .optimizer import _params_t, Optimizer

 class RAdam(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=..., weight_decay: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        betas: Tuple[float, float] = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+    ) -> None: ...
--- a/torch/optim/rmsprop.pyi
+++ b/torch/optim/rmsprop.pyi
@ -1,5 +1,13 @@
-from typing import Tuple
 from .optimizer import _params_t, Optimizer

 class RMSprop(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., alpha: float=..., eps: float=..., weight_decay: float=..., momentum: float=...,  centered: bool=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        alpha: float = ...,
+        eps: float = ...,
+        weight_decay: float = ...,
+        momentum: float = ...,
+        centered: bool = ...,
+    ) -> None: ...
--- a/torch/optim/rprop.pyi
+++ b/torch/optim/rprop.pyi
@ -1,5 +1,12 @@
 from typing import Tuple
+
 from .optimizer import _params_t, Optimizer

 class Rprop(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., etas: Tuple[float, float]=..., step_sizes: Tuple[float, float]=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        etas: Tuple[float, float] = ...,
+        step_sizes: Tuple[float, float] = ...,
+    ) -> None: ...
--- a/torch/optim/sgd.pyi
+++ b/torch/optim/sgd.pyi
@ -1,4 +1,12 @@
 from .optimizer import _params_t, Optimizer

 class SGD(Optimizer):
-    def __init__(self, params: _params_t, lr: float, momentum: float=..., dampening: float=..., weight_decay:float=..., nesterov:bool=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float,
+        momentum: float = ...,
+        dampening: float = ...,
+        weight_decay: float = ...,
+        nesterov: bool = ...,
+    ) -> None: ...
--- a/torch/optim/sparse_adam.pyi
+++ b/torch/optim/sparse_adam.pyi
@ -1,6 +1,12 @@
-
 from typing import Tuple
+
 from .optimizer import _params_t, Optimizer

 class SparseAdam(Optimizer):
-    def __init__(self, params: _params_t, lr: float=..., betas: Tuple[float, float]=..., eps: float=...) -> None: ...
+    def __init__(
+        self,
+        params: _params_t,
+        lr: float = ...,
+        betas: Tuple[float, float] = ...,
+        eps: float = ...,
+    ) -> None: ...
--- a/torch/optim/swa_utils.pyi
+++ b/torch/optim/swa_utils.pyi
@ -1,17 +1,31 @@
-from .optimizer import Optimizer
-from ..nn.modules import Module
+from typing import Any, Callable, Iterable, Union
+
+from torch import device, Tensor
+from torch.nn.modules import Module
 from .lr_scheduler import _LRScheduler
-from .. import device, Tensor
-from typing import Iterable, Any, Optional, Callable, Union, List
+from .optimizer import Optimizer

 class AveragedModel(Module):
-    def __init__(self, model: Module, device: Union[int, device]=...,
-                 avg_fn: Callable[[Tensor, Tensor, int], Tensor]=...) -> None:...
+    def __init__(
+        self,
+        model: Module,
+        device: Union[int, device] = ...,
+        avg_fn: Callable[[Tensor, Tensor, int], Tensor] = ...,
+    ) -> None: ...
+    def update_parameters(self, model: Module) -> None: ...

-    def update_parameters(self, model: Module) -> None:...
-
-def update_bn(loader: Iterable[Any], model: Module, device: Union[int, device]=...) -> None:...
+def update_bn(
+    loader: Iterable[Any],
+    model: Module,
+    device: Union[int, device] = ...,
+) -> None: ...

 class SWALR(_LRScheduler):
-    def __init__(self, optimizer: Optimizer, swa_lr: float, anneal_epochs: int,
-                 anneal_strategy: str, last_epoch: int=...) -> None:...
+    def __init__(
+        self,
+        optimizer: Optimizer,
+        swa_lr: float,
+        anneal_epochs: int,
+        anneal_strategy: str,
+        last_epoch: int = ...,
+    ) -> None: ...
--- a/torch/utils/data/datapipes/datapipe.pyi.in
+++ b/torch/utils/data/datapipes/datapipe.pyi.in
@ -3,16 +3,16 @@
 # Note that, for mypy, .pyi file takes precedent over .py file, such that we must define the interface for other
 # classes/objects here, even though we are not injecting extra code into them at the moment.

-from torch.utils.data.datapipes._typing import _DataPipeMeta, _IterDataPipeMeta
+from typing import Any, Callable, Dict, Generic, Iterator, Optional, TypeVar
+
+from torch.utils.data import Dataset, default_collate, IterableDataset
 from torch.utils.data.datapipes._hook_iterator import _SnapshotState
-from typing import Any, Callable, Dict, Generic, Iterator, List, Optional, TypeVar, Union
-from torch.utils.data import Dataset, IterableDataset, default_collate
+from torch.utils.data.datapipes._typing import _DataPipeMeta, _IterDataPipeMeta

-T_co = TypeVar('T_co', covariant=True)
-T = TypeVar('T')
+T_co = TypeVar("T_co", covariant=True)
+T = TypeVar("T")
 UNTRACABLE_DATAFRAME_PIPES: Any

-
 class MapDataPipe(Dataset[T_co], metaclass=_DataPipeMeta):
    functions: Dict[str, Callable] = ...
    reduce_ex_hook: Optional[Callable] = ...
@ -23,7 +23,11 @@ class MapDataPipe(Dataset[T_co], metaclass=_DataPipeMeta):
    @classmethod
    def register_function(cls, function_name: Any, function: Any) -> None: ...
    @classmethod
-    def register_datapipe_as_function(cls, function_name: Any, cls_to_register: Any): ...
+    def register_datapipe_as_function(
+        cls,
+        function_name: Any,
+        cls_to_register: Any,
+    ): ...
    def __getstate__(self): ...
    def __reduce_ex__(self, *args: Any, **kwargs: Any): ...
    @classmethod
@ -32,7 +36,6 @@ class MapDataPipe(Dataset[T_co], metaclass=_DataPipeMeta):
    def set_reduce_ex_hook(cls, hook_fn: Any) -> None: ...
    ${MapDataPipeMethods}

-
 class IterDataPipe(IterableDataset[T_co], metaclass=_IterDataPipeMeta):
    functions: Dict[str, Callable] = ...
    reduce_ex_hook: Optional[Callable] = ...
@ -46,7 +49,12 @@ class IterDataPipe(IterableDataset[T_co], metaclass=_IterDataPipeMeta):
    @classmethod
    def register_function(cls, function_name: Any, function: Any) -> None: ...
    @classmethod
-    def register_datapipe_as_function(cls, function_name: Any, cls_to_register: Any, enable_df_api_tracing: bool = ...): ...
+    def register_datapipe_as_function(
+        cls,
+        function_name: Any,
+        cls_to_register: Any,
+        enable_df_api_tracing: bool = ...,
+    ): ...
    def __getstate__(self): ...
    def __reduce_ex__(self, *args: Any, **kwargs: Any): ...
    @classmethod
@ -55,39 +63,29 @@ class IterDataPipe(IterableDataset[T_co], metaclass=_IterDataPipeMeta):
    def set_reduce_ex_hook(cls, hook_fn: Any) -> None: ...
    ${IterDataPipeMethods}

-
 class DFIterDataPipe(IterDataPipe):
    def _is_dfpipe(self): ...

-
 class _DataPipeSerializationWrapper:
    def __init__(self, datapipe): ...
    def __getstate__(self): ...
    def __setstate__(self, state): ...
    def __len__(self): ...

-
 class _IterDataPipeSerializationWrapper(_DataPipeSerializationWrapper, IterDataPipe):
    def __iter__(self): ...

-
 class _MapDataPipeSerializationWrapper(_DataPipeSerializationWrapper, MapDataPipe):
    def __getitem__(self, idx): ...

-
 class DataChunk(list, Generic[T]):
    def __init__(self, items):
        super().__init__(items)
        self.items = items
-
-    def as_str(self, indent=''):
+    def as_str(self, indent: str = "") -> str:
        res = indent + "[" + ", ".join(str(i) for i in iter(self)) + "]"
        return res
-
    def __iter__(self) -> Iterator[T]:
-        for i in super().__iter__():
-            yield i
-
+        yield from super().__iter__()
    def raw_iterator(self) -> T:  # type: ignore[misc]
-        for i in self.items:
-            yield i
+        yield from self.items