PEP585 update - torch/nn torch/optim torch/package torch/profiler torch/serialization torch/sparse torch/xpu (#145175)

See #145101 for details.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/145175
Approved by: https://github.com/bobrenjc93
This commit is contained in:
Aaron Orenstein
2025-01-20 20:13:01 -08:00
committed by PyTorch MergeBot
parent 803017f3cb
commit 0afd335174
37 changed files with 495 additions and 546 deletions

View File

@ -1,5 +1,5 @@
# mypy: allow-untyped-defs
from typing import cast, List, Optional, Union
from typing import cast, Optional, Union
import torch
from torch import Tensor
@ -157,10 +157,10 @@ class Adagrad(Optimizer):
loss = closure()
for group in self.param_groups:
params_with_grad: List[Tensor] = []
grads: List[Tensor] = []
state_sums: List[Tensor] = []
state_steps: List[Tensor] = []
params_with_grad: list[Tensor] = []
grads: list[Tensor] = []
state_sums: list[Tensor] = []
state_steps: list[Tensor] = []
has_sparse_grad, has_complex = self._init_group(
group, params_with_grad, grads, state_sums, state_steps
@ -240,10 +240,10 @@ Adagrad.__doc__ = (
def adagrad(
params: List[Tensor],
grads: List[Tensor],
state_sums: List[Tensor],
state_steps: List[Tensor],
params: list[Tensor],
grads: list[Tensor],
state_sums: list[Tensor],
state_steps: list[Tensor],
fused: Optional[bool] = None,
grad_scale: Optional[Tensor] = None,
found_inf: Optional[Tensor] = None,
@ -319,10 +319,10 @@ def _make_sparse(grad, grad_indices, values):
def _single_tensor_adagrad(
params: List[Tensor],
grads: List[Tensor],
state_sums: List[Tensor],
state_steps: List[Tensor],
params: list[Tensor],
grads: list[Tensor],
state_sums: list[Tensor],
state_steps: list[Tensor],
grad_scale: Optional[Tensor],
found_inf: Optional[Tensor],
*,
@ -380,10 +380,10 @@ def _single_tensor_adagrad(
def _multi_tensor_adagrad(
params: List[Tensor],
grads: List[Tensor],
state_sums: List[Tensor],
state_steps: List[Tensor],
params: list[Tensor],
grads: list[Tensor],
state_sums: list[Tensor],
state_steps: list[Tensor],
grad_scale: Optional[Tensor],
found_inf: Optional[Tensor],
*,
@ -412,10 +412,10 @@ def _multi_tensor_adagrad(
device_state_sums_,
device_state_steps_,
), _ in grouped_tensorlists.values():
device_params = cast(List[Tensor], device_params_)
device_grads = cast(List[Tensor], device_grads_)
device_state_sums = cast(List[Tensor], device_state_sums_)
device_state_steps = cast(List[Tensor], device_state_steps_)
device_params = cast(list[Tensor], device_params_)
device_grads = cast(list[Tensor], device_grads_)
device_state_sums = cast(list[Tensor], device_state_sums_)
device_state_steps = cast(list[Tensor], device_state_steps_)
device_has_sparse_grad = has_sparse_grad and any(
grad.is_sparse for grad in device_grads
@ -487,10 +487,10 @@ def _multi_tensor_adagrad(
def _fused_adagrad(
params: List[Tensor],
grads: List[Tensor],
state_sums: List[Tensor],
state_steps: List[Tensor],
params: list[Tensor],
grads: list[Tensor],
state_sums: list[Tensor],
state_steps: list[Tensor],
grad_scale: Optional[Tensor],
found_inf: Optional[Tensor],
*,
@ -530,10 +530,10 @@ def _fused_adagrad(
),
_,
) in grouped_tensors.items():
device_params = cast(List[Tensor], device_params_)
device_grads = cast(List[Tensor], device_grads_)
device_state_sums = cast(List[Tensor], device_state_sums_)
device_state_steps = cast(List[Tensor], device_state_steps_)
device_params = cast(list[Tensor], device_params_)
device_grads = cast(list[Tensor], device_grads_)
device_state_sums = cast(list[Tensor], device_state_sums_)
device_state_steps = cast(list[Tensor], device_state_steps_)
device_grad_scale, device_found_inf = None, None
if grad_scale is not None and grad_scale_dict is not None: