[1/N] Add strict parameter to Python zip calls (#165531)

Add `strict=True/False` to zip calls in test utils. `strict=True` is passed when possible.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/165531
Approved by: https://github.com/Skylion007
This commit is contained in:
Yuanyuan Chen
2025-10-18 05:26:29 +00:00
committed by PyTorch MergeBot
parent 0f0b4bf029
commit aaac8cb0f5
24 changed files with 111 additions and 74 deletions

View File

@ -92,7 +92,9 @@ def default_tolerances(
f"Expected a torch.Tensor or a torch.dtype, but got {type(input)} instead."
)
dtype_precisions = dtype_precisions or _DTYPE_PRECISIONS
rtols, atols = zip(*[dtype_precisions.get(dtype, (0.0, 0.0)) for dtype in dtypes])
rtols, atols = zip(
*[dtype_precisions.get(dtype, (0.0, 0.0)) for dtype in dtypes], strict=True
)
return max(rtols), max(atols)

View File

@ -437,7 +437,7 @@ class TestAutocast(TestCase):
if isinstance(first, torch.Tensor):
return torch.equal(first, second)
elif isinstance(first, collections.abc.Iterable):
return all(compare(f, s) for f, s in zip(first, second))
return all(compare(f, s) for f, s in zip(first, second, strict=False))
else:
return first == second

View File

@ -252,7 +252,7 @@ def tf32_on_and_off(tf32_precision=1e-5, *, only_if=True):
@functools.wraps(f)
def wrapped(*args, **kwargs):
kwargs.update(zip(arg_names, args))
kwargs.update(zip(arg_names, args, strict=False))
cond = torch.cuda.is_tf32_supported() and only_if
if 'device' in kwargs:
cond = cond and (torch.device(kwargs['device']).type == 'cuda')
@ -325,7 +325,7 @@ def _create_scaling_models_optimizers(device="cuda", optimizer_ctor=torch.optim.
mod_control = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device)
mod_scaling = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device)
with torch.no_grad():
for c, s in zip(mod_control.parameters(), mod_scaling.parameters()):
for c, s in zip(mod_control.parameters(), mod_scaling.parameters(), strict=True):
s.copy_(c)
kwargs = {"lr": 1.0}

View File

@ -1153,7 +1153,7 @@ def run_subtests(
subtest_config_values: list[list[Any]] = [item[1] for item in subtest_config_items]
for values in itertools.product(*subtest_config_values):
# Map keyword to chosen value
subtest_kwargs = dict(zip(subtest_config_keys, values))
subtest_kwargs = dict(zip(subtest_config_keys, values, strict=True))
with cls_inst.subTest(**subtest_kwargs):
torch._dynamo.reset()
test_fn(*test_args, **test_kwargs, **subtest_kwargs)

View File

@ -157,7 +157,7 @@ def _assert_module_states(
assert rank0_states is not None # mypy
for state in olist[1:]:
assert state is not None # mypy
for (_, p1), (_, p2) in zip(rank0_states, state):
for (_, p1), (_, p2) in zip(rank0_states, state, strict=True):
assert_fn(p1, p2)
@ -1135,7 +1135,9 @@ def check_sharded_parity(
prefixes_to_ignore: tuple[str, ...] = (),
):
for (replicated_name, replicated_param), (sharded_name, sharded_param) in zip(
replicated_module.named_parameters(), sharded_module.named_parameters()
replicated_module.named_parameters(),
sharded_module.named_parameters(),
strict=True,
):
clean_sharded_name = sharded_name
for prefix in prefixes_to_ignore:

View File

@ -135,7 +135,7 @@ def check_against_reference(self, func, reference_func, output_func, args, kwarg
self.assertEqual(outputs, outputs_test)
self.assertEqual(grads, grads_test)
for g2, g2_test in zip(grads2, grads2_test):
for g2, g2_test in zip(grads2, grads2_test, strict=True):
if g2 is None and g2_test is None:
continue
self.assertEqual(g2, g2_test, atol=5e-4, rtol=1e-4)

View File

@ -449,7 +449,7 @@ def sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs):
biases = [None, channels, None]
is_training = [True, False, False]
for weight, bias, training in zip(weights, biases, is_training):
for weight, bias, training in zip(weights, biases, is_training, strict=True):
yield SampleInput(
make_arg(input_shape),
args=(
@ -3631,7 +3631,7 @@ class _TestParamsMaxPoolBase:
def _gen_kwargs(self):
keys = self.kwargs.keys()
for values in product(*self.kwargs.values()):
yield dict(zip(keys, values))
yield dict(zip(keys, values, strict=True))
def gen_input_params(self):
yield from product(self._gen_shape(), self._gen_kwargs())
@ -4400,7 +4400,7 @@ def sample_inputs_instance_norm(opinfo, device, dtype, requires_grad, **kwargs):
weights = [channels, None]
biases = [None, None]
for weight_channels, bias_channels in zip(weights, biases):
for weight_channels, bias_channels in zip(weights, biases, strict=True):
running_mean = make_arg_without_requires_grad(channels, low=0)
running_var = make_arg_without_requires_grad(channels, low=0)
yield SampleInput(
@ -11625,7 +11625,7 @@ def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=Fal
split_sorter = [sorter[i] if (sorter is not None) else None for i in splits]
split_ret = [np.searchsorted(s_seq, b, side=side, sorter=s_sort)
for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter)]
for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter, strict=True)]
split_ret = [i.astype(np.int32) for i in split_ret] if out_int32 else split_ret
return np.stack(split_ret).reshape(orig_shape)

View File

@ -91,7 +91,7 @@ def reduced_f32_on_and_off(bf32_precision=1e-2, tf32_precision=1e-5):
@functools.wraps(f)
def wrapped(*args, **kwargs):
kwargs.update(zip(arg_names, args))
kwargs.update(zip(arg_names, args, strict=False))
cond = True
if "device" in kwargs:
cond = cond and (torch.device(kwargs["device"]).type == "cpu")

View File

@ -1413,7 +1413,7 @@ def module_inputs_torch_nn_L1Loss(module_info, device, dtype, requires_grad, tra
forward_input=FunctionInput(make_input((2, 3, 4)),
make_input((2, 3, 4))),
reference_fn=lambda m, p, i, t: 1. / i.numel() * sum((a - b).abs().sum()
for a, b in zip(i, t))),
for a, b in zip(i, t, strict=True))),
ModuleInput(constructor_input=FunctionInput(),
forward_input=FunctionInput(make_input(()), make_input(())),
reference_fn=lambda m, p, i, t: 1. / i.numel() * (i - t).abs().sum(),

View File

@ -2633,7 +2633,7 @@ def get_new_module_tests():
# add conv padding mode tests:
for padding_mode, cpp_padding_mode in zip(
['reflect', 'circular', 'replicate', 'zeros'],
['torch::kReflect', 'torch::kCircular', 'torch::kReplicate', 'torch::kZeros']):
['torch::kReflect', 'torch::kCircular', 'torch::kReplicate', 'torch::kZeros'], strict=True):
# conv signature:
# in_channels, out_channels, kernel_size, stride=1,
# padding=0, dilation=1, groups=1,
@ -2848,8 +2848,8 @@ def nllloss_reference(input, target, weight=None, ignore_index=-100,
return (result, norm)
losses_and_weights = [nll_loss_helper(i, t, weight, ignore_index)
for i, t in zip(input, target)]
losses, weights = zip(*losses_and_weights)
for i, t in zip(input, target, strict=True)]
losses, weights = zip(*losses_and_weights, strict=True)
losses_tensor = input.new_tensor(losses)
if reduction == 'mean':
return sum(losses_tensor) / sum(weights)
@ -3268,7 +3268,7 @@ class NNTestCase(TestCase):
for i in range(output_size):
param, d_param = self._get_parameters(module)
# make non grad zeros
d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param)]
d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param, strict=True)]
d_out = torch.zeros_like(output)
flat_d_out = d_out.view(-1)
@ -3282,7 +3282,7 @@ class NNTestCase(TestCase):
d_input = self._backward(module, input, output, d_out)
if jacobian_input:
for jacobian_x, d_x in zip(flat_jacobian_input, _iter_tensors(d_input)):
for jacobian_x, d_x in zip(flat_jacobian_input, _iter_tensors(d_input), strict=True):
jacobian_x[:, i] = d_x.contiguous().view(-1)
if jacobian_parameters:
jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0)
@ -3320,7 +3320,7 @@ class NNTestCase(TestCase):
numerical_t = list(_iter_tensors(numerical))
differences = []
for a, n in zip(analytical_t, numerical_t):
for a, n in zip(analytical_t, numerical_t, strict=True):
if a.numel() != 0:
differences.append(a.add(n, alpha=-1).abs().max())
# TODO: compare structure (ensure analytic jacobian has correct shape)
@ -3528,7 +3528,7 @@ class ModuleTest(TestBase):
gpu_module = self.constructor(*self.constructor_args).float().cuda()
cpu_param = test_case._get_parameters(cpu_module)
gpu_param = test_case._get_parameters(gpu_module)
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0], strict=True):
gpu_p.data.copy_(cpu_p)
test_case._zero_grad_input(cpu_input_tuple)
@ -3549,7 +3549,7 @@ class ModuleTest(TestBase):
cpu_gradInput = test_case._backward(cpu_module, cpu_input_tuple, cpu_output, cpu_gradOutput)
gpu_gradInput = test_case._backward(gpu_module, gpu_input_tuple, gpu_output, gpu_gradOutput)
test_case.assertEqual(cpu_gradInput, gpu_gradInput, atol=self.precision, rtol=0, exact_dtype=False)
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1], strict=True):
test_case.assertEqual(cpu_d_p, gpu_d_p, atol=self.precision, rtol=0)
# Run double-backwards on CPU and GPU and compare results
@ -3575,7 +3575,7 @@ class ModuleTest(TestBase):
gpu_gradOutput,
create_graph=True)
for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs):
for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs, strict=True):
test_case.assertEqual(cpu_d_i, gpu_d_i, atol=self.precision, rtol=0, exact_dtype=False)
# We mix output into the second backwards computation so that
@ -3598,7 +3598,7 @@ class ModuleTest(TestBase):
gpu_input_tuple + (gpu_gradOutput,) + tuple(gpu_module.parameters()),
retain_graph=True)
test_case.assertEqual(cpu_gradInput, gpu_gradInput, atol=self.precision, rtol=0, exact_dtype=False)
for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg):
for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg, strict=True):
test_case.assertEqual(cpu_d_p, gpu_d_p, atol=self.precision, rtol=0, exact_dtype=False)
self.test_noncontig(test_case, gpu_module, gpu_input_tuple)

View File

@ -692,7 +692,7 @@ class parametrize(_TestParametrizer):
return f"{name}{idx}"
def _default_subtest_name(self, idx, values):
return '_'.join([self._formatted_str_repr(idx, a, v) for a, v in zip(self.arg_names, values)])
return '_'.join([self._formatted_str_repr(idx, a, v) for a, v in zip(self.arg_names, values, strict=True)])
def _get_subtest_name(self, idx, values, explicit_name=None):
if explicit_name:
@ -736,7 +736,7 @@ class parametrize(_TestParametrizer):
raise RuntimeError(f'Expected # values == # arg names, but got: {len(values)} '
f'values and {len(self.arg_names)} names for test "{test.__name__}"')
param_kwargs = dict(zip(self.arg_names, values))
param_kwargs = dict(zip(self.arg_names, values, strict=True))
test_name = self._get_subtest_name(idx, values, explicit_name=maybe_name)
@ -3696,7 +3696,7 @@ class TestCase(expecttest.TestCase):
n_compressed_dims, n_plain_dims = size[-1 - dense_dims] // blocksize1, size[-2 - dense_dims] // blocksize0
blocknnz = nnz // (blocksize0 * blocksize1)
sparse_tensors = [random_sparse_compressed(n_compressed_dims, n_plain_dims, blocknnz) for _ in range(n_batch)]
sparse_tensors_it = map(list, zip(*sparse_tensors))
sparse_tensors_it = map(list, zip(*sparse_tensors, strict=True))
values = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, blocknnz, *blocksize, *dense_size)
compressed_indices = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, -1)

View File

@ -234,7 +234,7 @@ def generate_cct_and_mode(autograd_view_consistency=True):
# tensor results to be that of the tensors that alias the input
result = func(*args, **kwargs)
if isinstance(result, (tuple, list)):
for a, b in zip(rs, result):
for a, b in zip(rs, result, strict=True):
a.set_(b)
else:
rs.set_(result)
@ -303,7 +303,7 @@ def generate_subclass_choices(flat_args, CCT, cct_mode):
for which_args_are_wrapped in itertools.product(*subclass_options):
result = [maybe_map(partial(wrap, CCT=CCT, cct_mode=cct_mode), should_wrap_arg, arg)
for should_wrap_arg, arg in zip(which_args_are_wrapped, flat_args)]
for should_wrap_arg, arg in zip(which_args_are_wrapped, flat_args, strict=True)]
yield result, which_args_are_wrapped
@ -539,11 +539,11 @@ def check_forward_ad_formula(op: Callable, args, kwargs, gradcheck_wrapper=None,
return fwAD.make_dual(primal.detach(), tangent)
elif is_tensorlist(primal):
return tuple(fwAD.make_dual(pri.detach(), tang) if tang is not None else pri
for pri, tang in zip(primal, tangent))
for pri, tang in zip(primal, tangent, strict=True))
return primal
def compute_expected_grad(args, tangent_args, kwargs, tangent_kwargs):
op_args = tuple(map(maybe_make_dual, zip(args, tangent_args)))
op_args = tuple(map(maybe_make_dual, zip(args, tangent_args, strict=True)))
op_kwargs = {k: maybe_make_dual((v, tangent_kwargs[k])) for k, v in kwargs.items()}
if gradcheck_wrapper is None:
@ -572,7 +572,7 @@ def check_forward_ad_formula(op: Callable, args, kwargs, gradcheck_wrapper=None,
new_tang_args, new_tang_kwargs, \
which_tang_args_are_wrapped, which_tang_kwargs_are_wrapped = tang_choice
op_args = tuple(map(maybe_make_dual, zip(new_args, new_tang_args)))
op_args = tuple(map(maybe_make_dual, zip(new_args, new_tang_args, strict=True)))
op_kwargs = {k: maybe_make_dual((v, new_tang_kwargs[k])) for k, v in new_kwargs.items()}
try:

View File

@ -144,7 +144,9 @@ class CustomTensorPlainOut(torch.Tensor):
new_out = pytree.tree_unflatten(
(
CustomTensorPlainOut(tensor1, tensor2)
for tensor1, tensor2 in zip(out_inner_flat_1, out_inner_flat_2)
for tensor1, tensor2 in zip(
out_inner_flat_1, out_inner_flat_2, strict=True
)
),
spec,
)

View File

@ -60,7 +60,7 @@ class VerifyStateDictMixin:
dist_osd: dict[str, Any],
) -> None:
params = list(chain.from_iterable(g["params"] for g in optim.param_groups))
param_pid_mapping = dict(zip(params, range(len(params))))
param_pid_mapping = dict(zip(params, range(len(params)), strict=True))
fqn_pid_mapping = {}
for fqn, param in model.named_parameters():
pid = param_pid_mapping[param]
@ -90,7 +90,7 @@ class VerifyStateDictMixin:
dist_osd[_PG] = [new_pg]
self.assertEqual(len(osd[_PG]), len(dist_osd[_PG]))
for group, dist_group in zip(osd[_PG], dist_osd[_PG]):
for group, dist_group in zip(osd[_PG], dist_osd[_PG], strict=True):
self.assertEqual(len(group), len(dist_group))
for key, value in group.items():
# Below doesn't work because param_groups can have None

View File

@ -238,7 +238,9 @@ class Trainer:
sparse_microbatch = torch.split(sparse_features, 2)
values_microbatch = torch.split(values, 2)
batches = []
for d, s, v in zip(dense_microbatch, sparse_microbatch, values_microbatch):
for d, s, v in zip(
dense_microbatch, sparse_microbatch, values_microbatch, strict=True
):
feature_set = FeatureSet(dense_features=d, sparse_features=s, values=v)
batches.append(feature_set)

View File

@ -678,7 +678,7 @@ class DistributedTest:
# Verify buffers across ranks.
m1_buffers = list(m1.buffers())
m2_buffers = list(m2.buffers())
for buf1, buf2 in zip(m1_buffers, m2_buffers):
for buf1, buf2 in zip(m1_buffers, m2_buffers, strict=True):
gathered_bufs = [
torch.empty_like(buf1) for _ in range(dist.get_world_size())
]
@ -3045,7 +3045,7 @@ class DistributedTest:
curr_values = master_values if rank == src else worker_values
tensors = [
_build_tensor(src + 1, val, dtype=dtype)
for dtype, val in zip(dtypes, curr_values)
for dtype, val in zip(dtypes, curr_values, strict=True)
]
if cuda:
tensors = [t.cuda(rank_to_GPU[rank][0]) for t in tensors]
@ -3066,7 +3066,9 @@ class DistributedTest:
)
expected_tensors = [
_build_tensor(src + 1, expected_value, dtype=dtype)
for dtype, expected_value in zip(dtypes, expected_values)
for dtype, expected_value in zip(
dtypes, expected_values, strict=True
)
]
self.assertEqual(tensors, expected_tensors)
@ -3338,7 +3340,7 @@ class DistributedTest:
)
if rank == dest:
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
for t1, t2 in zip(tensors, expected_tensors):
for t1, t2 in zip(tensors, expected_tensors, strict=True):
self.assertEqual(t1, t2)
self._barrier()
@ -3440,7 +3442,7 @@ class DistributedTest:
expected_tensors = [
_build_tensor(dest + 1, i, dtype=dtype) for i in group
]
for t1, t2 in zip(tensors, expected_tensors):
for t1, t2 in zip(tensors, expected_tensors, strict=True):
self.assertEqual(t1, t2)
self._barrier()
@ -3624,8 +3626,8 @@ class DistributedTest:
tensor_shapes=tensor_shapes,
)
for l1, l2 in zip(output_tensor_lists, expected_tensors):
for t1, t2 in zip(l1, l2):
for l1, l2 in zip(output_tensor_lists, expected_tensors, strict=True):
for t1, t2 in zip(l1, l2, strict=True):
if not torch.equal(t1, t2):
return False
return True
@ -3824,7 +3826,7 @@ class DistributedTest:
]
out_tensors = [t.cuda(rank_to_GPU[rank][0]) for t in out_tensors]
dist.all_to_all(out_tensors, in_tensors, group=group_id)
for t1, t2 in zip(out_tensors, expected_tensors):
for t1, t2 in zip(out_tensors, expected_tensors, strict=True):
self.assertEqual(t1, t2)
self._barrier()
@ -4203,7 +4205,7 @@ class DistributedTest:
def _assert_equal_param(self, param_gpu, param_DDP):
self.assertEqual(len(param_gpu), len(param_DDP))
for p_gpu, p_DDP in zip(param_gpu, param_DDP):
for p_gpu, p_DDP in zip(param_gpu, param_DDP, strict=True):
self.assertEqual(p_gpu, p_DDP)
def _test_DDP_niter(
@ -4618,6 +4620,7 @@ class DistributedTest:
for hook_param, allreduce_param in zip(
ddp_model_with_optimizer_hook.parameters(),
ddp_model_with_no_hook.parameters(),
strict=True,
):
self.assertEqual(hook_param, allreduce_param)
@ -4649,6 +4652,7 @@ class DistributedTest:
for hook_param, allreduce_param in zip(
ddp_model_with_optimizer_hook.parameters(),
ddp_model_with_no_hook.parameters(),
strict=True,
):
self.assertEqual(hook_param, allreduce_param)
@ -4825,7 +4829,9 @@ class DistributedTest:
optimizer_kwargs=optim_kwargs,
)
for p1, p2 in zip(model.parameters(), model_optim_in_bwd.parameters()):
for p1, p2 in zip(
model.parameters(), model_optim_in_bwd.parameters(), strict=True
):
self.assertEqual(p1, p2, "Parameters not initially equal!")
# Enable determinism in cudnn operators
with torch.backends.cudnn.flags(
@ -4843,7 +4849,9 @@ class DistributedTest:
inp
).sum().backward() # runs optimizer as well
for p1, p2 in zip(
model.parameters(), model_optim_in_bwd.parameters()
model.parameters(),
model_optim_in_bwd.parameters(),
strict=True,
):
self.assertEqual(
p1, p2, f"Params not equal at iteration {i}"
@ -5323,7 +5331,9 @@ class DistributedTest:
# sync grads
step_model(ddp_model, ddp_input, ddp_target)
for i, j in zip(model.parameters(), ddp_model.parameters()):
for i, j in zip(
model.parameters(), ddp_model.parameters(), strict=True
):
if not i.requires_grad:
continue
if iteration % 2 == 0:
@ -5562,6 +5572,7 @@ class DistributedTest:
for i, j in zip(
ddp_model_grad_not_view.parameters(),
ddp_model_grad_is_view.parameters(),
strict=True,
):
self.assertEqual(i, j)
@ -5667,7 +5678,9 @@ class DistributedTest:
target,
)
for p1, p2 in zip(
net.parameters(), net_using_post_localSGD_opt.parameters()
net.parameters(),
net_using_post_localSGD_opt.parameters(),
strict=True,
):
self.assertEqual(p1.data, p2.data)
@ -6817,7 +6830,7 @@ class DistributedTest:
# they are the same as new_model on rank_to_broadcast.
if rank == rank_to_broadcast:
expected_states = new_model.state_dict().values()
for t, expected in zip(net_module_states, expected_states):
for t, expected in zip(net_module_states, expected_states, strict=True):
self.assertEqual(t, expected)
@skip_if_lt_x_gpu(2)
@ -7134,7 +7147,9 @@ class DistributedTest:
# Validate model state dicts are equal
for (_, local_tensor), (_, dist_tensor) in zip(
local_model.state_dict().items(), net.module.state_dict().items()
local_model.state_dict().items(),
net.module.state_dict().items(),
strict=True,
):
self.assertEqual(local_tensor, dist_tensor)
@ -7722,13 +7737,17 @@ class DistributedTest:
# materialized param grad is not touched by DDP, so its grad should
# be the same as if running locally.
for materialized_param, local_param in zip(
ddp.module.fc2.parameters(), local_model.fc2.parameters()
ddp.module.fc2.parameters(),
local_model.fc2.parameters(),
strict=True,
):
self.assertEqual(materialized_param.grad, local_param.grad)
# fc1 parameter grad should still be different, due to allreduce.
for synced_param, local_param in zip(
ddp.module.fc1.parameters(), local_model.fc1.parameters()
ddp.module.fc1.parameters(),
local_model.fc1.parameters(),
strict=True,
):
self.assertFalse(synced_param.grad == local_param.grad)
@ -8581,7 +8600,7 @@ class DistributedTest:
# Verify grads are the same
for local_param, dist_param in zip(
local_net.parameters(), net.parameters()
local_net.parameters(), net.parameters(), strict=True
):
local_grad = local_param.grad
dist_grad = dist_param.grad
@ -8631,7 +8650,7 @@ class DistributedTest:
torch._C._functions.UndefinedGrad()(out).backward()
torch._C._functions.UndefinedGrad()(local_out).backward()
for (dist_param_name, dist_param), (local_param_name, local_param) in zip(
net.named_parameters(), local_net.named_parameters()
net.named_parameters(), local_net.named_parameters(), strict=True
):
dist_grad = dist_param.grad
local_grad = local_param.grad
@ -8689,7 +8708,9 @@ class DistributedTest:
self.assertTrue(
static_model._get_ddp_logging_data().get("has_rebuilt_buckets", 0)
)
for i, j in zip(base_model.parameters(), static_model.parameters()):
for i, j in zip(
base_model.parameters(), static_model.parameters(), strict=True
):
self.assertEqual(i, j)
@require_backend_is_available({"gloo"})
@ -9297,7 +9318,7 @@ class DistributedTest:
loss_static.backward()
self._model_step(model_static_graph)
for p, p_static in zip(
model.parameters(), model_static_graph.parameters()
model.parameters(), model_static_graph.parameters(), strict=True
):
self.assertEqual(p, p_static)
@ -9974,7 +9995,7 @@ class DistributedTest:
p.grad.data = p.grad / iters
for p_ddp, p_local in zip(
model.parameters(), local_model.parameters()
model.parameters(), local_model.parameters(), strict=True
):
self.assertTrue(
torch.allclose(p_ddp.grad, p_local.grad),
@ -10191,7 +10212,9 @@ class DistributedTest:
# (refer to https://github.com/numpy/numpy/blob/266aad7478bc7fbcc55eea7f942a0d373b838396/numpy/random/mtrand.pyi)
# To make sure random state was restored properly, all entries should equal the original
for entry1, entry2 in zip(
hook_state.rng.get_state(), dummy_hook_state.rng.get_state()
hook_state.rng.get_state(),
dummy_hook_state.rng.get_state(),
strict=True,
):
np.testing.assert_array_equal(entry1, entry2)
@ -10212,7 +10235,7 @@ class DistributedTest:
# Check that gradients after 10 epochs are the same
for orig_param, dummy_param in zip(
ddp_model.parameters(), dummy_ddp_model.parameters()
ddp_model.parameters(), dummy_ddp_model.parameters(), strict=True
):
self.assertEqual(orig_param.grad, dummy_param.grad)
@ -10299,7 +10322,9 @@ class DistributedTest:
self.assertEqual(out_ddp, out_ddp_static)
out_ddp.backward()
out_ddp_static.backward()
for p1, p2 in zip(ddp.parameters(), ddp_static.parameters()):
for p1, p2 in zip(
ddp.parameters(), ddp_static.parameters(), strict=True
):
self.assertEqual(p1.grad, p2.grad)
@skip_if_lt_x_gpu(2)
@ -10392,7 +10417,9 @@ class DistributedTest:
test_model_1._get_ddp_logging_data().get("num_buckets_reduced"), 1
)
for i, j in zip(base_model.parameters(), test_model_1.parameters()):
for i, j in zip(
base_model.parameters(), test_model_1.parameters(), strict=True
):
self.assertEqual(i, j)

View File

@ -457,7 +457,9 @@ class ProcessLocalGroup(dist.ProcessGroup):
):
works = [
self._reduce_scatter_base(output_tensor, input_tensor, opts)
for output_tensor, input_tensor in zip(output_tensors, input_tensors)
for output_tensor, input_tensor in zip(
output_tensors, input_tensors, strict=True
)
]
for work in works[:-1]:
work.wait()
@ -467,7 +469,7 @@ class ProcessLocalGroup(dist.ProcessGroup):
self, output_tensor_list, input_tensor_list, opts=AllgatherOptions()
):
res = None
for o_t, i_t in zip(output_tensor_list, input_tensor_list):
for o_t, i_t in zip(output_tensor_list, input_tensor_list, strict=True):
res = self._allgather_base(o_t, i_t)
return res

View File

@ -2749,7 +2749,7 @@ class TensorPipeCudaDistAutogradTest(RpcAgentTestFixture):
for i in range(len(futs)):
local_gradients = [p.grad for p in local_layers[i].parameters()]
for g1, g2 in zip(futs[i].wait(), local_gradients):
for g1, g2 in zip(futs[i].wait(), local_gradients, strict=True):
self.assertEqual(g1, g2)
rpc.shutdown()

View File

@ -46,7 +46,7 @@ class BatchUpdateParameterServer:
@rpc.functions.async_execution
def update_and_fetch_model(ps_rref, grads):
self = ps_rref.local_value()
for p, g in zip(self.model.parameters(), grads):
for p, g in zip(self.model.parameters(), grads, strict=True):
if p.grad is None:
p.grad = g
else:

View File

@ -216,7 +216,7 @@ class Agent:
returns.insert(0, R)
returns = torch.tensor(returns)
returns = (returns - returns.mean()) / (returns.std() + self.eps)
for log_prob, R in zip(probs, returns):
for log_prob, R in zip(probs, returns, strict=True):
policy_loss.append(-log_prob * R)
self.optimizer.zero_grad()
policy_loss = torch.cat(policy_loss).sum()

View File

@ -249,7 +249,7 @@ class JitTestCase(JitCommonTestCase):
saved_module_buffer_2.seek(0)
code_files_2, _debug_files_2 = extract_files(saved_module_buffer_2)
for a, b in zip(code_files, code_files_2):
for a, b in zip(code_files, code_files_2, strict=True):
self.assertMultiLineEqual(a, b)
if isinstance(m, torch._C.ScriptModule):
@ -617,7 +617,7 @@ class JitTestCase(JitCommonTestCase):
self.assertEqual(outputs, outputs_ge)
if inputs_require_grads:
self.assertEqual(grads, grads_ge, atol=grad_atol, rtol=grad_rtol)
for g2, g2_ge in zip(grads2, grads2_ge):
for g2, g2_ge in zip(grads2, grads2_ge, strict=True):
if g2 is None and g2_ge is None:
continue
self.assertEqual(g2, g2_ge, atol=8e-4, rtol=8e-4)

View File

@ -228,11 +228,11 @@ def multiple_logs_to_string(module: str, *log_options: str) -> tuple[list[io.Str
def tmp_redirect_logs():
loggers = [torch._logging.getArtifactLogger(module, option) for option in log_options]
try:
for logger, handler in zip(loggers, handlers):
for logger, handler in zip(loggers, handlers, strict=True):
logger.addHandler(handler)
yield
finally:
for logger, handler in zip(loggers, handlers):
for logger, handler in zip(loggers, handlers, strict=True):
logger.removeHandler(handler)
def ctx_manager() -> AbstractContextManager[None]:

View File

@ -402,9 +402,9 @@ def sample_inputs_masked_logaddexp(op_info, device, dtype, requires_grad, **kwar
make_tensor, dtype=dtype, device=device, requires_grad=requires_grad
)
for shape, input_masks, other_masks in zip(
shapes, input_mask_lists, other_mask_lists
shapes, input_mask_lists, other_mask_lists, strict=True
):
for input_mask, other_mask in zip(input_masks, other_masks):
for input_mask, other_mask in zip(input_masks, other_masks, strict=True):
yield SampleInput(
make_arg(shape),
make_arg(shape),

View File

@ -78,7 +78,7 @@ class TwoTensor(torch.Tensor):
# our two inner tensors return the same value
out_flat = [
cls(o_a, o_b) if isinstance(o_a, torch.Tensor) else o_a
for o_a, o_b in zip(out_a_flat, out_b_flat)
for o_a, o_b in zip(out_a_flat, out_b_flat, strict=True)
]
out = pytree.tree_unflatten(out_flat, spec)
from torch._higher_order_ops.cond import cond_op