diff --git a/torch/testing/_comparison.py b/torch/testing/_comparison.py index 6c4506f1a8a9..1d4a050b8047 100644 --- a/torch/testing/_comparison.py +++ b/torch/testing/_comparison.py @@ -92,7 +92,9 @@ def default_tolerances( f"Expected a torch.Tensor or a torch.dtype, but got {type(input)} instead." ) dtype_precisions = dtype_precisions or _DTYPE_PRECISIONS - rtols, atols = zip(*[dtype_precisions.get(dtype, (0.0, 0.0)) for dtype in dtypes]) + rtols, atols = zip( + *[dtype_precisions.get(dtype, (0.0, 0.0)) for dtype in dtypes], strict=True + ) return max(rtols), max(atols) diff --git a/torch/testing/_internal/autocast_test_lists.py b/torch/testing/_internal/autocast_test_lists.py index 11cfb179a97e..b3616fede6ce 100644 --- a/torch/testing/_internal/autocast_test_lists.py +++ b/torch/testing/_internal/autocast_test_lists.py @@ -437,7 +437,7 @@ class TestAutocast(TestCase): if isinstance(first, torch.Tensor): return torch.equal(first, second) elif isinstance(first, collections.abc.Iterable): - return all(compare(f, s) for f, s in zip(first, second)) + return all(compare(f, s) for f, s in zip(first, second, strict=False)) else: return first == second diff --git a/torch/testing/_internal/common_cuda.py b/torch/testing/_internal/common_cuda.py index 916221d33651..8202a32ae8ad 100644 --- a/torch/testing/_internal/common_cuda.py +++ b/torch/testing/_internal/common_cuda.py @@ -252,7 +252,7 @@ def tf32_on_and_off(tf32_precision=1e-5, *, only_if=True): @functools.wraps(f) def wrapped(*args, **kwargs): - kwargs.update(zip(arg_names, args)) + kwargs.update(zip(arg_names, args, strict=False)) cond = torch.cuda.is_tf32_supported() and only_if if 'device' in kwargs: cond = cond and (torch.device(kwargs['device']).type == 'cuda') @@ -325,7 +325,7 @@ def _create_scaling_models_optimizers(device="cuda", optimizer_ctor=torch.optim. mod_control = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device) mod_scaling = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device) with torch.no_grad(): - for c, s in zip(mod_control.parameters(), mod_scaling.parameters()): + for c, s in zip(mod_control.parameters(), mod_scaling.parameters(), strict=True): s.copy_(c) kwargs = {"lr": 1.0} diff --git a/torch/testing/_internal/common_distributed.py b/torch/testing/_internal/common_distributed.py index 64ea87852a86..719713e7c9f6 100644 --- a/torch/testing/_internal/common_distributed.py +++ b/torch/testing/_internal/common_distributed.py @@ -1153,7 +1153,7 @@ def run_subtests( subtest_config_values: list[list[Any]] = [item[1] for item in subtest_config_items] for values in itertools.product(*subtest_config_values): # Map keyword to chosen value - subtest_kwargs = dict(zip(subtest_config_keys, values)) + subtest_kwargs = dict(zip(subtest_config_keys, values, strict=True)) with cls_inst.subTest(**subtest_kwargs): torch._dynamo.reset() test_fn(*test_args, **test_kwargs, **subtest_kwargs) diff --git a/torch/testing/_internal/common_fsdp.py b/torch/testing/_internal/common_fsdp.py index c18fbccb795d..dd211599cf14 100644 --- a/torch/testing/_internal/common_fsdp.py +++ b/torch/testing/_internal/common_fsdp.py @@ -157,7 +157,7 @@ def _assert_module_states( assert rank0_states is not None # mypy for state in olist[1:]: assert state is not None # mypy - for (_, p1), (_, p2) in zip(rank0_states, state): + for (_, p1), (_, p2) in zip(rank0_states, state, strict=True): assert_fn(p1, p2) @@ -1135,7 +1135,9 @@ def check_sharded_parity( prefixes_to_ignore: tuple[str, ...] = (), ): for (replicated_name, replicated_param), (sharded_name, sharded_param) in zip( - replicated_module.named_parameters(), sharded_module.named_parameters() + replicated_module.named_parameters(), + sharded_module.named_parameters(), + strict=True, ): clean_sharded_name = sharded_name for prefix in prefixes_to_ignore: diff --git a/torch/testing/_internal/common_jit.py b/torch/testing/_internal/common_jit.py index 6ca05c51189b..ac6e851d7e28 100644 --- a/torch/testing/_internal/common_jit.py +++ b/torch/testing/_internal/common_jit.py @@ -135,7 +135,7 @@ def check_against_reference(self, func, reference_func, output_func, args, kwarg self.assertEqual(outputs, outputs_test) self.assertEqual(grads, grads_test) - for g2, g2_test in zip(grads2, grads2_test): + for g2, g2_test in zip(grads2, grads2_test, strict=True): if g2 is None and g2_test is None: continue self.assertEqual(g2, g2_test, atol=5e-4, rtol=1e-4) diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index bafe4b241d3c..82e630519eb8 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -449,7 +449,7 @@ def sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs): biases = [None, channels, None] is_training = [True, False, False] - for weight, bias, training in zip(weights, biases, is_training): + for weight, bias, training in zip(weights, biases, is_training, strict=True): yield SampleInput( make_arg(input_shape), args=( @@ -3631,7 +3631,7 @@ class _TestParamsMaxPoolBase: def _gen_kwargs(self): keys = self.kwargs.keys() for values in product(*self.kwargs.values()): - yield dict(zip(keys, values)) + yield dict(zip(keys, values, strict=True)) def gen_input_params(self): yield from product(self._gen_shape(), self._gen_kwargs()) @@ -4400,7 +4400,7 @@ def sample_inputs_instance_norm(opinfo, device, dtype, requires_grad, **kwargs): weights = [channels, None] biases = [None, None] - for weight_channels, bias_channels in zip(weights, biases): + for weight_channels, bias_channels in zip(weights, biases, strict=True): running_mean = make_arg_without_requires_grad(channels, low=0) running_var = make_arg_without_requires_grad(channels, low=0) yield SampleInput( @@ -11625,7 +11625,7 @@ def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=Fal split_sorter = [sorter[i] if (sorter is not None) else None for i in splits] split_ret = [np.searchsorted(s_seq, b, side=side, sorter=s_sort) - for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter)] + for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter, strict=True)] split_ret = [i.astype(np.int32) for i in split_ret] if out_int32 else split_ret return np.stack(split_ret).reshape(orig_shape) diff --git a/torch/testing/_internal/common_mkldnn.py b/torch/testing/_internal/common_mkldnn.py index 44da60a5ad1f..70ab98137bd7 100644 --- a/torch/testing/_internal/common_mkldnn.py +++ b/torch/testing/_internal/common_mkldnn.py @@ -91,7 +91,7 @@ def reduced_f32_on_and_off(bf32_precision=1e-2, tf32_precision=1e-5): @functools.wraps(f) def wrapped(*args, **kwargs): - kwargs.update(zip(arg_names, args)) + kwargs.update(zip(arg_names, args, strict=False)) cond = True if "device" in kwargs: cond = cond and (torch.device(kwargs["device"]).type == "cpu") diff --git a/torch/testing/_internal/common_modules.py b/torch/testing/_internal/common_modules.py index 2cd6a89a0452..120a76eb5ef3 100644 --- a/torch/testing/_internal/common_modules.py +++ b/torch/testing/_internal/common_modules.py @@ -1413,7 +1413,7 @@ def module_inputs_torch_nn_L1Loss(module_info, device, dtype, requires_grad, tra forward_input=FunctionInput(make_input((2, 3, 4)), make_input((2, 3, 4))), reference_fn=lambda m, p, i, t: 1. / i.numel() * sum((a - b).abs().sum() - for a, b in zip(i, t))), + for a, b in zip(i, t, strict=True))), ModuleInput(constructor_input=FunctionInput(), forward_input=FunctionInput(make_input(()), make_input(())), reference_fn=lambda m, p, i, t: 1. / i.numel() * (i - t).abs().sum(), diff --git a/torch/testing/_internal/common_nn.py b/torch/testing/_internal/common_nn.py index aaca0efe1eb4..68a35e8c40a1 100644 --- a/torch/testing/_internal/common_nn.py +++ b/torch/testing/_internal/common_nn.py @@ -2633,7 +2633,7 @@ def get_new_module_tests(): # add conv padding mode tests: for padding_mode, cpp_padding_mode in zip( ['reflect', 'circular', 'replicate', 'zeros'], - ['torch::kReflect', 'torch::kCircular', 'torch::kReplicate', 'torch::kZeros']): + ['torch::kReflect', 'torch::kCircular', 'torch::kReplicate', 'torch::kZeros'], strict=True): # conv signature: # in_channels, out_channels, kernel_size, stride=1, # padding=0, dilation=1, groups=1, @@ -2848,8 +2848,8 @@ def nllloss_reference(input, target, weight=None, ignore_index=-100, return (result, norm) losses_and_weights = [nll_loss_helper(i, t, weight, ignore_index) - for i, t in zip(input, target)] - losses, weights = zip(*losses_and_weights) + for i, t in zip(input, target, strict=True)] + losses, weights = zip(*losses_and_weights, strict=True) losses_tensor = input.new_tensor(losses) if reduction == 'mean': return sum(losses_tensor) / sum(weights) @@ -3268,7 +3268,7 @@ class NNTestCase(TestCase): for i in range(output_size): param, d_param = self._get_parameters(module) # make non grad zeros - d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param)] + d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param, strict=True)] d_out = torch.zeros_like(output) flat_d_out = d_out.view(-1) @@ -3282,7 +3282,7 @@ class NNTestCase(TestCase): d_input = self._backward(module, input, output, d_out) if jacobian_input: - for jacobian_x, d_x in zip(flat_jacobian_input, _iter_tensors(d_input)): + for jacobian_x, d_x in zip(flat_jacobian_input, _iter_tensors(d_input), strict=True): jacobian_x[:, i] = d_x.contiguous().view(-1) if jacobian_parameters: jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0) @@ -3320,7 +3320,7 @@ class NNTestCase(TestCase): numerical_t = list(_iter_tensors(numerical)) differences = [] - for a, n in zip(analytical_t, numerical_t): + for a, n in zip(analytical_t, numerical_t, strict=True): if a.numel() != 0: differences.append(a.add(n, alpha=-1).abs().max()) # TODO: compare structure (ensure analytic jacobian has correct shape) @@ -3528,7 +3528,7 @@ class ModuleTest(TestBase): gpu_module = self.constructor(*self.constructor_args).float().cuda() cpu_param = test_case._get_parameters(cpu_module) gpu_param = test_case._get_parameters(gpu_module) - for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]): + for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0], strict=True): gpu_p.data.copy_(cpu_p) test_case._zero_grad_input(cpu_input_tuple) @@ -3549,7 +3549,7 @@ class ModuleTest(TestBase): cpu_gradInput = test_case._backward(cpu_module, cpu_input_tuple, cpu_output, cpu_gradOutput) gpu_gradInput = test_case._backward(gpu_module, gpu_input_tuple, gpu_output, gpu_gradOutput) test_case.assertEqual(cpu_gradInput, gpu_gradInput, atol=self.precision, rtol=0, exact_dtype=False) - for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]): + for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1], strict=True): test_case.assertEqual(cpu_d_p, gpu_d_p, atol=self.precision, rtol=0) # Run double-backwards on CPU and GPU and compare results @@ -3575,7 +3575,7 @@ class ModuleTest(TestBase): gpu_gradOutput, create_graph=True) - for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs): + for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs, strict=True): test_case.assertEqual(cpu_d_i, gpu_d_i, atol=self.precision, rtol=0, exact_dtype=False) # We mix output into the second backwards computation so that @@ -3598,7 +3598,7 @@ class ModuleTest(TestBase): gpu_input_tuple + (gpu_gradOutput,) + tuple(gpu_module.parameters()), retain_graph=True) test_case.assertEqual(cpu_gradInput, gpu_gradInput, atol=self.precision, rtol=0, exact_dtype=False) - for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg): + for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg, strict=True): test_case.assertEqual(cpu_d_p, gpu_d_p, atol=self.precision, rtol=0, exact_dtype=False) self.test_noncontig(test_case, gpu_module, gpu_input_tuple) diff --git a/torch/testing/_internal/common_utils.py b/torch/testing/_internal/common_utils.py index 0146f37e4baf..284a3bdcfbd7 100644 --- a/torch/testing/_internal/common_utils.py +++ b/torch/testing/_internal/common_utils.py @@ -692,7 +692,7 @@ class parametrize(_TestParametrizer): return f"{name}{idx}" def _default_subtest_name(self, idx, values): - return '_'.join([self._formatted_str_repr(idx, a, v) for a, v in zip(self.arg_names, values)]) + return '_'.join([self._formatted_str_repr(idx, a, v) for a, v in zip(self.arg_names, values, strict=True)]) def _get_subtest_name(self, idx, values, explicit_name=None): if explicit_name: @@ -736,7 +736,7 @@ class parametrize(_TestParametrizer): raise RuntimeError(f'Expected # values == # arg names, but got: {len(values)} ' f'values and {len(self.arg_names)} names for test "{test.__name__}"') - param_kwargs = dict(zip(self.arg_names, values)) + param_kwargs = dict(zip(self.arg_names, values, strict=True)) test_name = self._get_subtest_name(idx, values, explicit_name=maybe_name) @@ -3696,7 +3696,7 @@ class TestCase(expecttest.TestCase): n_compressed_dims, n_plain_dims = size[-1 - dense_dims] // blocksize1, size[-2 - dense_dims] // blocksize0 blocknnz = nnz // (blocksize0 * blocksize1) sparse_tensors = [random_sparse_compressed(n_compressed_dims, n_plain_dims, blocknnz) for _ in range(n_batch)] - sparse_tensors_it = map(list, zip(*sparse_tensors)) + sparse_tensors_it = map(list, zip(*sparse_tensors, strict=True)) values = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, blocknnz, *blocksize, *dense_size) compressed_indices = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, -1) diff --git a/torch/testing/_internal/composite_compliance.py b/torch/testing/_internal/composite_compliance.py index c44c0f50ff5d..527fc8a5826e 100644 --- a/torch/testing/_internal/composite_compliance.py +++ b/torch/testing/_internal/composite_compliance.py @@ -234,7 +234,7 @@ def generate_cct_and_mode(autograd_view_consistency=True): # tensor results to be that of the tensors that alias the input result = func(*args, **kwargs) if isinstance(result, (tuple, list)): - for a, b in zip(rs, result): + for a, b in zip(rs, result, strict=True): a.set_(b) else: rs.set_(result) @@ -303,7 +303,7 @@ def generate_subclass_choices(flat_args, CCT, cct_mode): for which_args_are_wrapped in itertools.product(*subclass_options): result = [maybe_map(partial(wrap, CCT=CCT, cct_mode=cct_mode), should_wrap_arg, arg) - for should_wrap_arg, arg in zip(which_args_are_wrapped, flat_args)] + for should_wrap_arg, arg in zip(which_args_are_wrapped, flat_args, strict=True)] yield result, which_args_are_wrapped @@ -539,11 +539,11 @@ def check_forward_ad_formula(op: Callable, args, kwargs, gradcheck_wrapper=None, return fwAD.make_dual(primal.detach(), tangent) elif is_tensorlist(primal): return tuple(fwAD.make_dual(pri.detach(), tang) if tang is not None else pri - for pri, tang in zip(primal, tangent)) + for pri, tang in zip(primal, tangent, strict=True)) return primal def compute_expected_grad(args, tangent_args, kwargs, tangent_kwargs): - op_args = tuple(map(maybe_make_dual, zip(args, tangent_args))) + op_args = tuple(map(maybe_make_dual, zip(args, tangent_args, strict=True))) op_kwargs = {k: maybe_make_dual((v, tangent_kwargs[k])) for k, v in kwargs.items()} if gradcheck_wrapper is None: @@ -572,7 +572,7 @@ def check_forward_ad_formula(op: Callable, args, kwargs, gradcheck_wrapper=None, new_tang_args, new_tang_kwargs, \ which_tang_args_are_wrapped, which_tang_kwargs_are_wrapped = tang_choice - op_args = tuple(map(maybe_make_dual, zip(new_args, new_tang_args))) + op_args = tuple(map(maybe_make_dual, zip(new_args, new_tang_args, strict=True))) op_kwargs = {k: maybe_make_dual((v, new_tang_kwargs[k])) for k, v in new_kwargs.items()} try: diff --git a/torch/testing/_internal/custom_tensor.py b/torch/testing/_internal/custom_tensor.py index 9fa6f79ec68a..de1b44ba8dac 100644 --- a/torch/testing/_internal/custom_tensor.py +++ b/torch/testing/_internal/custom_tensor.py @@ -144,7 +144,9 @@ class CustomTensorPlainOut(torch.Tensor): new_out = pytree.tree_unflatten( ( CustomTensorPlainOut(tensor1, tensor2) - for tensor1, tensor2 in zip(out_inner_flat_1, out_inner_flat_2) + for tensor1, tensor2 in zip( + out_inner_flat_1, out_inner_flat_2, strict=True + ) ), spec, ) diff --git a/torch/testing/_internal/distributed/common_state_dict.py b/torch/testing/_internal/distributed/common_state_dict.py index 76b7800a8d2a..a78e312306ba 100644 --- a/torch/testing/_internal/distributed/common_state_dict.py +++ b/torch/testing/_internal/distributed/common_state_dict.py @@ -60,7 +60,7 @@ class VerifyStateDictMixin: dist_osd: dict[str, Any], ) -> None: params = list(chain.from_iterable(g["params"] for g in optim.param_groups)) - param_pid_mapping = dict(zip(params, range(len(params)))) + param_pid_mapping = dict(zip(params, range(len(params)), strict=True)) fqn_pid_mapping = {} for fqn, param in model.named_parameters(): pid = param_pid_mapping[param] @@ -90,7 +90,7 @@ class VerifyStateDictMixin: dist_osd[_PG] = [new_pg] self.assertEqual(len(osd[_PG]), len(dist_osd[_PG])) - for group, dist_group in zip(osd[_PG], dist_osd[_PG]): + for group, dist_group in zip(osd[_PG], dist_osd[_PG], strict=True): self.assertEqual(len(group), len(dist_group)) for key, value in group.items(): # Below doesn't work because param_groups can have None diff --git a/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py b/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py index 428224022a45..ca9bc297010a 100644 --- a/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py +++ b/torch/testing/_internal/distributed/ddp_under_dist_autograd_test.py @@ -238,7 +238,9 @@ class Trainer: sparse_microbatch = torch.split(sparse_features, 2) values_microbatch = torch.split(values, 2) batches = [] - for d, s, v in zip(dense_microbatch, sparse_microbatch, values_microbatch): + for d, s, v in zip( + dense_microbatch, sparse_microbatch, values_microbatch, strict=True + ): feature_set = FeatureSet(dense_features=d, sparse_features=s, values=v) batches.append(feature_set) diff --git a/torch/testing/_internal/distributed/distributed_test.py b/torch/testing/_internal/distributed/distributed_test.py index 62ef8d4a5eca..c41602d43994 100644 --- a/torch/testing/_internal/distributed/distributed_test.py +++ b/torch/testing/_internal/distributed/distributed_test.py @@ -678,7 +678,7 @@ class DistributedTest: # Verify buffers across ranks. m1_buffers = list(m1.buffers()) m2_buffers = list(m2.buffers()) - for buf1, buf2 in zip(m1_buffers, m2_buffers): + for buf1, buf2 in zip(m1_buffers, m2_buffers, strict=True): gathered_bufs = [ torch.empty_like(buf1) for _ in range(dist.get_world_size()) ] @@ -3045,7 +3045,7 @@ class DistributedTest: curr_values = master_values if rank == src else worker_values tensors = [ _build_tensor(src + 1, val, dtype=dtype) - for dtype, val in zip(dtypes, curr_values) + for dtype, val in zip(dtypes, curr_values, strict=True) ] if cuda: tensors = [t.cuda(rank_to_GPU[rank][0]) for t in tensors] @@ -3066,7 +3066,9 @@ class DistributedTest: ) expected_tensors = [ _build_tensor(src + 1, expected_value, dtype=dtype) - for dtype, expected_value in zip(dtypes, expected_values) + for dtype, expected_value in zip( + dtypes, expected_values, strict=True + ) ] self.assertEqual(tensors, expected_tensors) @@ -3338,7 +3340,7 @@ class DistributedTest: ) if rank == dest: expected_tensors = [_build_tensor(dest + 1, i) for i in group] - for t1, t2 in zip(tensors, expected_tensors): + for t1, t2 in zip(tensors, expected_tensors, strict=True): self.assertEqual(t1, t2) self._barrier() @@ -3440,7 +3442,7 @@ class DistributedTest: expected_tensors = [ _build_tensor(dest + 1, i, dtype=dtype) for i in group ] - for t1, t2 in zip(tensors, expected_tensors): + for t1, t2 in zip(tensors, expected_tensors, strict=True): self.assertEqual(t1, t2) self._barrier() @@ -3624,8 +3626,8 @@ class DistributedTest: tensor_shapes=tensor_shapes, ) - for l1, l2 in zip(output_tensor_lists, expected_tensors): - for t1, t2 in zip(l1, l2): + for l1, l2 in zip(output_tensor_lists, expected_tensors, strict=True): + for t1, t2 in zip(l1, l2, strict=True): if not torch.equal(t1, t2): return False return True @@ -3824,7 +3826,7 @@ class DistributedTest: ] out_tensors = [t.cuda(rank_to_GPU[rank][0]) for t in out_tensors] dist.all_to_all(out_tensors, in_tensors, group=group_id) - for t1, t2 in zip(out_tensors, expected_tensors): + for t1, t2 in zip(out_tensors, expected_tensors, strict=True): self.assertEqual(t1, t2) self._barrier() @@ -4203,7 +4205,7 @@ class DistributedTest: def _assert_equal_param(self, param_gpu, param_DDP): self.assertEqual(len(param_gpu), len(param_DDP)) - for p_gpu, p_DDP in zip(param_gpu, param_DDP): + for p_gpu, p_DDP in zip(param_gpu, param_DDP, strict=True): self.assertEqual(p_gpu, p_DDP) def _test_DDP_niter( @@ -4618,6 +4620,7 @@ class DistributedTest: for hook_param, allreduce_param in zip( ddp_model_with_optimizer_hook.parameters(), ddp_model_with_no_hook.parameters(), + strict=True, ): self.assertEqual(hook_param, allreduce_param) @@ -4649,6 +4652,7 @@ class DistributedTest: for hook_param, allreduce_param in zip( ddp_model_with_optimizer_hook.parameters(), ddp_model_with_no_hook.parameters(), + strict=True, ): self.assertEqual(hook_param, allreduce_param) @@ -4825,7 +4829,9 @@ class DistributedTest: optimizer_kwargs=optim_kwargs, ) - for p1, p2 in zip(model.parameters(), model_optim_in_bwd.parameters()): + for p1, p2 in zip( + model.parameters(), model_optim_in_bwd.parameters(), strict=True + ): self.assertEqual(p1, p2, "Parameters not initially equal!") # Enable determinism in cudnn operators with torch.backends.cudnn.flags( @@ -4843,7 +4849,9 @@ class DistributedTest: inp ).sum().backward() # runs optimizer as well for p1, p2 in zip( - model.parameters(), model_optim_in_bwd.parameters() + model.parameters(), + model_optim_in_bwd.parameters(), + strict=True, ): self.assertEqual( p1, p2, f"Params not equal at iteration {i}" @@ -5323,7 +5331,9 @@ class DistributedTest: # sync grads step_model(ddp_model, ddp_input, ddp_target) - for i, j in zip(model.parameters(), ddp_model.parameters()): + for i, j in zip( + model.parameters(), ddp_model.parameters(), strict=True + ): if not i.requires_grad: continue if iteration % 2 == 0: @@ -5562,6 +5572,7 @@ class DistributedTest: for i, j in zip( ddp_model_grad_not_view.parameters(), ddp_model_grad_is_view.parameters(), + strict=True, ): self.assertEqual(i, j) @@ -5667,7 +5678,9 @@ class DistributedTest: target, ) for p1, p2 in zip( - net.parameters(), net_using_post_localSGD_opt.parameters() + net.parameters(), + net_using_post_localSGD_opt.parameters(), + strict=True, ): self.assertEqual(p1.data, p2.data) @@ -6817,7 +6830,7 @@ class DistributedTest: # they are the same as new_model on rank_to_broadcast. if rank == rank_to_broadcast: expected_states = new_model.state_dict().values() - for t, expected in zip(net_module_states, expected_states): + for t, expected in zip(net_module_states, expected_states, strict=True): self.assertEqual(t, expected) @skip_if_lt_x_gpu(2) @@ -7134,7 +7147,9 @@ class DistributedTest: # Validate model state dicts are equal for (_, local_tensor), (_, dist_tensor) in zip( - local_model.state_dict().items(), net.module.state_dict().items() + local_model.state_dict().items(), + net.module.state_dict().items(), + strict=True, ): self.assertEqual(local_tensor, dist_tensor) @@ -7722,13 +7737,17 @@ class DistributedTest: # materialized param grad is not touched by DDP, so its grad should # be the same as if running locally. for materialized_param, local_param in zip( - ddp.module.fc2.parameters(), local_model.fc2.parameters() + ddp.module.fc2.parameters(), + local_model.fc2.parameters(), + strict=True, ): self.assertEqual(materialized_param.grad, local_param.grad) # fc1 parameter grad should still be different, due to allreduce. for synced_param, local_param in zip( - ddp.module.fc1.parameters(), local_model.fc1.parameters() + ddp.module.fc1.parameters(), + local_model.fc1.parameters(), + strict=True, ): self.assertFalse(synced_param.grad == local_param.grad) @@ -8581,7 +8600,7 @@ class DistributedTest: # Verify grads are the same for local_param, dist_param in zip( - local_net.parameters(), net.parameters() + local_net.parameters(), net.parameters(), strict=True ): local_grad = local_param.grad dist_grad = dist_param.grad @@ -8631,7 +8650,7 @@ class DistributedTest: torch._C._functions.UndefinedGrad()(out).backward() torch._C._functions.UndefinedGrad()(local_out).backward() for (dist_param_name, dist_param), (local_param_name, local_param) in zip( - net.named_parameters(), local_net.named_parameters() + net.named_parameters(), local_net.named_parameters(), strict=True ): dist_grad = dist_param.grad local_grad = local_param.grad @@ -8689,7 +8708,9 @@ class DistributedTest: self.assertTrue( static_model._get_ddp_logging_data().get("has_rebuilt_buckets", 0) ) - for i, j in zip(base_model.parameters(), static_model.parameters()): + for i, j in zip( + base_model.parameters(), static_model.parameters(), strict=True + ): self.assertEqual(i, j) @require_backend_is_available({"gloo"}) @@ -9297,7 +9318,7 @@ class DistributedTest: loss_static.backward() self._model_step(model_static_graph) for p, p_static in zip( - model.parameters(), model_static_graph.parameters() + model.parameters(), model_static_graph.parameters(), strict=True ): self.assertEqual(p, p_static) @@ -9974,7 +9995,7 @@ class DistributedTest: p.grad.data = p.grad / iters for p_ddp, p_local in zip( - model.parameters(), local_model.parameters() + model.parameters(), local_model.parameters(), strict=True ): self.assertTrue( torch.allclose(p_ddp.grad, p_local.grad), @@ -10191,7 +10212,9 @@ class DistributedTest: # (refer to https://github.com/numpy/numpy/blob/266aad7478bc7fbcc55eea7f942a0d373b838396/numpy/random/mtrand.pyi) # To make sure random state was restored properly, all entries should equal the original for entry1, entry2 in zip( - hook_state.rng.get_state(), dummy_hook_state.rng.get_state() + hook_state.rng.get_state(), + dummy_hook_state.rng.get_state(), + strict=True, ): np.testing.assert_array_equal(entry1, entry2) @@ -10212,7 +10235,7 @@ class DistributedTest: # Check that gradients after 10 epochs are the same for orig_param, dummy_param in zip( - ddp_model.parameters(), dummy_ddp_model.parameters() + ddp_model.parameters(), dummy_ddp_model.parameters(), strict=True ): self.assertEqual(orig_param.grad, dummy_param.grad) @@ -10299,7 +10322,9 @@ class DistributedTest: self.assertEqual(out_ddp, out_ddp_static) out_ddp.backward() out_ddp_static.backward() - for p1, p2 in zip(ddp.parameters(), ddp_static.parameters()): + for p1, p2 in zip( + ddp.parameters(), ddp_static.parameters(), strict=True + ): self.assertEqual(p1.grad, p2.grad) @skip_if_lt_x_gpu(2) @@ -10392,7 +10417,9 @@ class DistributedTest: test_model_1._get_ddp_logging_data().get("num_buckets_reduced"), 1 ) - for i, j in zip(base_model.parameters(), test_model_1.parameters()): + for i, j in zip( + base_model.parameters(), test_model_1.parameters(), strict=True + ): self.assertEqual(i, j) diff --git a/torch/testing/_internal/distributed/multi_threaded_pg.py b/torch/testing/_internal/distributed/multi_threaded_pg.py index 1f5d1ef1bdbd..2cc22cb7c23a 100644 --- a/torch/testing/_internal/distributed/multi_threaded_pg.py +++ b/torch/testing/_internal/distributed/multi_threaded_pg.py @@ -457,7 +457,9 @@ class ProcessLocalGroup(dist.ProcessGroup): ): works = [ self._reduce_scatter_base(output_tensor, input_tensor, opts) - for output_tensor, input_tensor in zip(output_tensors, input_tensors) + for output_tensor, input_tensor in zip( + output_tensors, input_tensors, strict=True + ) ] for work in works[:-1]: work.wait() @@ -467,7 +469,7 @@ class ProcessLocalGroup(dist.ProcessGroup): self, output_tensor_list, input_tensor_list, opts=AllgatherOptions() ): res = None - for o_t, i_t in zip(output_tensor_list, input_tensor_list): + for o_t, i_t in zip(output_tensor_list, input_tensor_list, strict=True): res = self._allgather_base(o_t, i_t) return res diff --git a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py index f7cb2075e373..1d6c7500c5ad 100644 --- a/torch/testing/_internal/distributed/rpc/dist_autograd_test.py +++ b/torch/testing/_internal/distributed/rpc/dist_autograd_test.py @@ -2749,7 +2749,7 @@ class TensorPipeCudaDistAutogradTest(RpcAgentTestFixture): for i in range(len(futs)): local_gradients = [p.grad for p in local_layers[i].parameters()] - for g1, g2 in zip(futs[i].wait(), local_gradients): + for g1, g2 in zip(futs[i].wait(), local_gradients, strict=True): self.assertEqual(g1, g2) rpc.shutdown() diff --git a/torch/testing/_internal/distributed/rpc/examples/parameter_server_test.py b/torch/testing/_internal/distributed/rpc/examples/parameter_server_test.py index f84ba5225c6e..ad0b7fbe2207 100644 --- a/torch/testing/_internal/distributed/rpc/examples/parameter_server_test.py +++ b/torch/testing/_internal/distributed/rpc/examples/parameter_server_test.py @@ -46,7 +46,7 @@ class BatchUpdateParameterServer: @rpc.functions.async_execution def update_and_fetch_model(ps_rref, grads): self = ps_rref.local_value() - for p, g in zip(self.model.parameters(), grads): + for p, g in zip(self.model.parameters(), grads, strict=True): if p.grad is None: p.grad = g else: diff --git a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py index beb08a25484d..57008aed17db 100644 --- a/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py +++ b/torch/testing/_internal/distributed/rpc/examples/reinforcement_learning_rpc_test.py @@ -216,7 +216,7 @@ class Agent: returns.insert(0, R) returns = torch.tensor(returns) returns = (returns - returns.mean()) / (returns.std() + self.eps) - for log_prob, R in zip(probs, returns): + for log_prob, R in zip(probs, returns, strict=True): policy_loss.append(-log_prob * R) self.optimizer.zero_grad() policy_loss = torch.cat(policy_loss).sum() diff --git a/torch/testing/_internal/jit_utils.py b/torch/testing/_internal/jit_utils.py index 4bc0738ec2f3..e98d0e482683 100644 --- a/torch/testing/_internal/jit_utils.py +++ b/torch/testing/_internal/jit_utils.py @@ -249,7 +249,7 @@ class JitTestCase(JitCommonTestCase): saved_module_buffer_2.seek(0) code_files_2, _debug_files_2 = extract_files(saved_module_buffer_2) - for a, b in zip(code_files, code_files_2): + for a, b in zip(code_files, code_files_2, strict=True): self.assertMultiLineEqual(a, b) if isinstance(m, torch._C.ScriptModule): @@ -617,7 +617,7 @@ class JitTestCase(JitCommonTestCase): self.assertEqual(outputs, outputs_ge) if inputs_require_grads: self.assertEqual(grads, grads_ge, atol=grad_atol, rtol=grad_rtol) - for g2, g2_ge in zip(grads2, grads2_ge): + for g2, g2_ge in zip(grads2, grads2_ge, strict=True): if g2 is None and g2_ge is None: continue self.assertEqual(g2, g2_ge, atol=8e-4, rtol=8e-4) diff --git a/torch/testing/_internal/logging_utils.py b/torch/testing/_internal/logging_utils.py index 1632149c6584..1e1ecf8f4f70 100644 --- a/torch/testing/_internal/logging_utils.py +++ b/torch/testing/_internal/logging_utils.py @@ -228,11 +228,11 @@ def multiple_logs_to_string(module: str, *log_options: str) -> tuple[list[io.Str def tmp_redirect_logs(): loggers = [torch._logging.getArtifactLogger(module, option) for option in log_options] try: - for logger, handler in zip(loggers, handlers): + for logger, handler in zip(loggers, handlers, strict=True): logger.addHandler(handler) yield finally: - for logger, handler in zip(loggers, handlers): + for logger, handler in zip(loggers, handlers, strict=True): logger.removeHandler(handler) def ctx_manager() -> AbstractContextManager[None]: diff --git a/torch/testing/_internal/opinfo/definitions/_masked.py b/torch/testing/_internal/opinfo/definitions/_masked.py index 4ff16b343715..d65fbef658a4 100644 --- a/torch/testing/_internal/opinfo/definitions/_masked.py +++ b/torch/testing/_internal/opinfo/definitions/_masked.py @@ -402,9 +402,9 @@ def sample_inputs_masked_logaddexp(op_info, device, dtype, requires_grad, **kwar make_tensor, dtype=dtype, device=device, requires_grad=requires_grad ) for shape, input_masks, other_masks in zip( - shapes, input_mask_lists, other_mask_lists + shapes, input_mask_lists, other_mask_lists, strict=True ): - for input_mask, other_mask in zip(input_masks, other_masks): + for input_mask, other_mask in zip(input_masks, other_masks, strict=True): yield SampleInput( make_arg(shape), make_arg(shape), diff --git a/torch/testing/_internal/two_tensor.py b/torch/testing/_internal/two_tensor.py index 3a503c741e88..8197829ac7f4 100644 --- a/torch/testing/_internal/two_tensor.py +++ b/torch/testing/_internal/two_tensor.py @@ -78,7 +78,7 @@ class TwoTensor(torch.Tensor): # our two inner tensors return the same value out_flat = [ cls(o_a, o_b) if isinstance(o_a, torch.Tensor) else o_a - for o_a, o_b in zip(out_a_flat, out_b_flat) + for o_a, o_b in zip(out_a_flat, out_b_flat, strict=True) ] out = pytree.tree_unflatten(out_flat, spec) from torch._higher_order_ops.cond import cond_op