mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
[1/N] Add strict parameter to Python zip calls (#165531)
Add `strict=True/False` to zip calls in test utils. `strict=True` is passed when possible. Pull Request resolved: https://github.com/pytorch/pytorch/pull/165531 Approved by: https://github.com/Skylion007
This commit is contained in:
committed by
PyTorch MergeBot
parent
0f0b4bf029
commit
aaac8cb0f5
@ -92,7 +92,9 @@ def default_tolerances(
|
||||
f"Expected a torch.Tensor or a torch.dtype, but got {type(input)} instead."
|
||||
)
|
||||
dtype_precisions = dtype_precisions or _DTYPE_PRECISIONS
|
||||
rtols, atols = zip(*[dtype_precisions.get(dtype, (0.0, 0.0)) for dtype in dtypes])
|
||||
rtols, atols = zip(
|
||||
*[dtype_precisions.get(dtype, (0.0, 0.0)) for dtype in dtypes], strict=True
|
||||
)
|
||||
return max(rtols), max(atols)
|
||||
|
||||
|
||||
|
@ -437,7 +437,7 @@ class TestAutocast(TestCase):
|
||||
if isinstance(first, torch.Tensor):
|
||||
return torch.equal(first, second)
|
||||
elif isinstance(first, collections.abc.Iterable):
|
||||
return all(compare(f, s) for f, s in zip(first, second))
|
||||
return all(compare(f, s) for f, s in zip(first, second, strict=False))
|
||||
else:
|
||||
return first == second
|
||||
|
||||
|
@ -252,7 +252,7 @@ def tf32_on_and_off(tf32_precision=1e-5, *, only_if=True):
|
||||
|
||||
@functools.wraps(f)
|
||||
def wrapped(*args, **kwargs):
|
||||
kwargs.update(zip(arg_names, args))
|
||||
kwargs.update(zip(arg_names, args, strict=False))
|
||||
cond = torch.cuda.is_tf32_supported() and only_if
|
||||
if 'device' in kwargs:
|
||||
cond = cond and (torch.device(kwargs['device']).type == 'cuda')
|
||||
@ -325,7 +325,7 @@ def _create_scaling_models_optimizers(device="cuda", optimizer_ctor=torch.optim.
|
||||
mod_control = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device)
|
||||
mod_scaling = torch.nn.Sequential(torch.nn.Linear(8, 8), torch.nn.Linear(8, 8)).to(device=device)
|
||||
with torch.no_grad():
|
||||
for c, s in zip(mod_control.parameters(), mod_scaling.parameters()):
|
||||
for c, s in zip(mod_control.parameters(), mod_scaling.parameters(), strict=True):
|
||||
s.copy_(c)
|
||||
|
||||
kwargs = {"lr": 1.0}
|
||||
|
@ -1153,7 +1153,7 @@ def run_subtests(
|
||||
subtest_config_values: list[list[Any]] = [item[1] for item in subtest_config_items]
|
||||
for values in itertools.product(*subtest_config_values):
|
||||
# Map keyword to chosen value
|
||||
subtest_kwargs = dict(zip(subtest_config_keys, values))
|
||||
subtest_kwargs = dict(zip(subtest_config_keys, values, strict=True))
|
||||
with cls_inst.subTest(**subtest_kwargs):
|
||||
torch._dynamo.reset()
|
||||
test_fn(*test_args, **test_kwargs, **subtest_kwargs)
|
||||
|
@ -157,7 +157,7 @@ def _assert_module_states(
|
||||
assert rank0_states is not None # mypy
|
||||
for state in olist[1:]:
|
||||
assert state is not None # mypy
|
||||
for (_, p1), (_, p2) in zip(rank0_states, state):
|
||||
for (_, p1), (_, p2) in zip(rank0_states, state, strict=True):
|
||||
assert_fn(p1, p2)
|
||||
|
||||
|
||||
@ -1135,7 +1135,9 @@ def check_sharded_parity(
|
||||
prefixes_to_ignore: tuple[str, ...] = (),
|
||||
):
|
||||
for (replicated_name, replicated_param), (sharded_name, sharded_param) in zip(
|
||||
replicated_module.named_parameters(), sharded_module.named_parameters()
|
||||
replicated_module.named_parameters(),
|
||||
sharded_module.named_parameters(),
|
||||
strict=True,
|
||||
):
|
||||
clean_sharded_name = sharded_name
|
||||
for prefix in prefixes_to_ignore:
|
||||
|
@ -135,7 +135,7 @@ def check_against_reference(self, func, reference_func, output_func, args, kwarg
|
||||
|
||||
self.assertEqual(outputs, outputs_test)
|
||||
self.assertEqual(grads, grads_test)
|
||||
for g2, g2_test in zip(grads2, grads2_test):
|
||||
for g2, g2_test in zip(grads2, grads2_test, strict=True):
|
||||
if g2 is None and g2_test is None:
|
||||
continue
|
||||
self.assertEqual(g2, g2_test, atol=5e-4, rtol=1e-4)
|
||||
|
@ -449,7 +449,7 @@ def sample_inputs_batch_norm(op_info, device, dtype, requires_grad, **kwargs):
|
||||
biases = [None, channels, None]
|
||||
is_training = [True, False, False]
|
||||
|
||||
for weight, bias, training in zip(weights, biases, is_training):
|
||||
for weight, bias, training in zip(weights, biases, is_training, strict=True):
|
||||
yield SampleInput(
|
||||
make_arg(input_shape),
|
||||
args=(
|
||||
@ -3631,7 +3631,7 @@ class _TestParamsMaxPoolBase:
|
||||
def _gen_kwargs(self):
|
||||
keys = self.kwargs.keys()
|
||||
for values in product(*self.kwargs.values()):
|
||||
yield dict(zip(keys, values))
|
||||
yield dict(zip(keys, values, strict=True))
|
||||
|
||||
def gen_input_params(self):
|
||||
yield from product(self._gen_shape(), self._gen_kwargs())
|
||||
@ -4400,7 +4400,7 @@ def sample_inputs_instance_norm(opinfo, device, dtype, requires_grad, **kwargs):
|
||||
weights = [channels, None]
|
||||
biases = [None, None]
|
||||
|
||||
for weight_channels, bias_channels in zip(weights, biases):
|
||||
for weight_channels, bias_channels in zip(weights, biases, strict=True):
|
||||
running_mean = make_arg_without_requires_grad(channels, low=0)
|
||||
running_var = make_arg_without_requires_grad(channels, low=0)
|
||||
yield SampleInput(
|
||||
@ -11625,7 +11625,7 @@ def reference_searchsorted(sorted_sequence, boundary, out_int32=False, right=Fal
|
||||
split_sorter = [sorter[i] if (sorter is not None) else None for i in splits]
|
||||
|
||||
split_ret = [np.searchsorted(s_seq, b, side=side, sorter=s_sort)
|
||||
for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter)]
|
||||
for (s_seq, b, s_sort) in zip(split_sequence, split_boundary, split_sorter, strict=True)]
|
||||
split_ret = [i.astype(np.int32) for i in split_ret] if out_int32 else split_ret
|
||||
return np.stack(split_ret).reshape(orig_shape)
|
||||
|
||||
|
@ -91,7 +91,7 @@ def reduced_f32_on_and_off(bf32_precision=1e-2, tf32_precision=1e-5):
|
||||
|
||||
@functools.wraps(f)
|
||||
def wrapped(*args, **kwargs):
|
||||
kwargs.update(zip(arg_names, args))
|
||||
kwargs.update(zip(arg_names, args, strict=False))
|
||||
cond = True
|
||||
if "device" in kwargs:
|
||||
cond = cond and (torch.device(kwargs["device"]).type == "cpu")
|
||||
|
@ -1413,7 +1413,7 @@ def module_inputs_torch_nn_L1Loss(module_info, device, dtype, requires_grad, tra
|
||||
forward_input=FunctionInput(make_input((2, 3, 4)),
|
||||
make_input((2, 3, 4))),
|
||||
reference_fn=lambda m, p, i, t: 1. / i.numel() * sum((a - b).abs().sum()
|
||||
for a, b in zip(i, t))),
|
||||
for a, b in zip(i, t, strict=True))),
|
||||
ModuleInput(constructor_input=FunctionInput(),
|
||||
forward_input=FunctionInput(make_input(()), make_input(())),
|
||||
reference_fn=lambda m, p, i, t: 1. / i.numel() * (i - t).abs().sum(),
|
||||
|
@ -2633,7 +2633,7 @@ def get_new_module_tests():
|
||||
# add conv padding mode tests:
|
||||
for padding_mode, cpp_padding_mode in zip(
|
||||
['reflect', 'circular', 'replicate', 'zeros'],
|
||||
['torch::kReflect', 'torch::kCircular', 'torch::kReplicate', 'torch::kZeros']):
|
||||
['torch::kReflect', 'torch::kCircular', 'torch::kReplicate', 'torch::kZeros'], strict=True):
|
||||
# conv signature:
|
||||
# in_channels, out_channels, kernel_size, stride=1,
|
||||
# padding=0, dilation=1, groups=1,
|
||||
@ -2848,8 +2848,8 @@ def nllloss_reference(input, target, weight=None, ignore_index=-100,
|
||||
return (result, norm)
|
||||
|
||||
losses_and_weights = [nll_loss_helper(i, t, weight, ignore_index)
|
||||
for i, t in zip(input, target)]
|
||||
losses, weights = zip(*losses_and_weights)
|
||||
for i, t in zip(input, target, strict=True)]
|
||||
losses, weights = zip(*losses_and_weights, strict=True)
|
||||
losses_tensor = input.new_tensor(losses)
|
||||
if reduction == 'mean':
|
||||
return sum(losses_tensor) / sum(weights)
|
||||
@ -3268,7 +3268,7 @@ class NNTestCase(TestCase):
|
||||
for i in range(output_size):
|
||||
param, d_param = self._get_parameters(module)
|
||||
# make non grad zeros
|
||||
d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param)]
|
||||
d_param = [torch.zeros_like(p) if d is None else d for (p, d) in zip(param, d_param, strict=True)]
|
||||
|
||||
d_out = torch.zeros_like(output)
|
||||
flat_d_out = d_out.view(-1)
|
||||
@ -3282,7 +3282,7 @@ class NNTestCase(TestCase):
|
||||
d_input = self._backward(module, input, output, d_out)
|
||||
|
||||
if jacobian_input:
|
||||
for jacobian_x, d_x in zip(flat_jacobian_input, _iter_tensors(d_input)):
|
||||
for jacobian_x, d_x in zip(flat_jacobian_input, _iter_tensors(d_input), strict=True):
|
||||
jacobian_x[:, i] = d_x.contiguous().view(-1)
|
||||
if jacobian_parameters:
|
||||
jacobian_param[:, i] = torch.cat(self._flatten_tensors(d_param), 0)
|
||||
@ -3320,7 +3320,7 @@ class NNTestCase(TestCase):
|
||||
numerical_t = list(_iter_tensors(numerical))
|
||||
|
||||
differences = []
|
||||
for a, n in zip(analytical_t, numerical_t):
|
||||
for a, n in zip(analytical_t, numerical_t, strict=True):
|
||||
if a.numel() != 0:
|
||||
differences.append(a.add(n, alpha=-1).abs().max())
|
||||
# TODO: compare structure (ensure analytic jacobian has correct shape)
|
||||
@ -3528,7 +3528,7 @@ class ModuleTest(TestBase):
|
||||
gpu_module = self.constructor(*self.constructor_args).float().cuda()
|
||||
cpu_param = test_case._get_parameters(cpu_module)
|
||||
gpu_param = test_case._get_parameters(gpu_module)
|
||||
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0]):
|
||||
for cpu_p, gpu_p in zip(cpu_param[0], gpu_param[0], strict=True):
|
||||
gpu_p.data.copy_(cpu_p)
|
||||
|
||||
test_case._zero_grad_input(cpu_input_tuple)
|
||||
@ -3549,7 +3549,7 @@ class ModuleTest(TestBase):
|
||||
cpu_gradInput = test_case._backward(cpu_module, cpu_input_tuple, cpu_output, cpu_gradOutput)
|
||||
gpu_gradInput = test_case._backward(gpu_module, gpu_input_tuple, gpu_output, gpu_gradOutput)
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, atol=self.precision, rtol=0, exact_dtype=False)
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1]):
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_param[1], gpu_param[1], strict=True):
|
||||
test_case.assertEqual(cpu_d_p, gpu_d_p, atol=self.precision, rtol=0)
|
||||
|
||||
# Run double-backwards on CPU and GPU and compare results
|
||||
@ -3575,7 +3575,7 @@ class ModuleTest(TestBase):
|
||||
gpu_gradOutput,
|
||||
create_graph=True)
|
||||
|
||||
for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs):
|
||||
for cpu_d_i, gpu_d_i in zip(cpu_gradInputs, gpu_gradInputs, strict=True):
|
||||
test_case.assertEqual(cpu_d_i, gpu_d_i, atol=self.precision, rtol=0, exact_dtype=False)
|
||||
|
||||
# We mix output into the second backwards computation so that
|
||||
@ -3598,7 +3598,7 @@ class ModuleTest(TestBase):
|
||||
gpu_input_tuple + (gpu_gradOutput,) + tuple(gpu_module.parameters()),
|
||||
retain_graph=True)
|
||||
test_case.assertEqual(cpu_gradInput, gpu_gradInput, atol=self.precision, rtol=0, exact_dtype=False)
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg):
|
||||
for cpu_d_p, gpu_d_p in zip(cpu_gg, gpu_gg, strict=True):
|
||||
test_case.assertEqual(cpu_d_p, gpu_d_p, atol=self.precision, rtol=0, exact_dtype=False)
|
||||
|
||||
self.test_noncontig(test_case, gpu_module, gpu_input_tuple)
|
||||
|
@ -692,7 +692,7 @@ class parametrize(_TestParametrizer):
|
||||
return f"{name}{idx}"
|
||||
|
||||
def _default_subtest_name(self, idx, values):
|
||||
return '_'.join([self._formatted_str_repr(idx, a, v) for a, v in zip(self.arg_names, values)])
|
||||
return '_'.join([self._formatted_str_repr(idx, a, v) for a, v in zip(self.arg_names, values, strict=True)])
|
||||
|
||||
def _get_subtest_name(self, idx, values, explicit_name=None):
|
||||
if explicit_name:
|
||||
@ -736,7 +736,7 @@ class parametrize(_TestParametrizer):
|
||||
raise RuntimeError(f'Expected # values == # arg names, but got: {len(values)} '
|
||||
f'values and {len(self.arg_names)} names for test "{test.__name__}"')
|
||||
|
||||
param_kwargs = dict(zip(self.arg_names, values))
|
||||
param_kwargs = dict(zip(self.arg_names, values, strict=True))
|
||||
|
||||
test_name = self._get_subtest_name(idx, values, explicit_name=maybe_name)
|
||||
|
||||
@ -3696,7 +3696,7 @@ class TestCase(expecttest.TestCase):
|
||||
n_compressed_dims, n_plain_dims = size[-1 - dense_dims] // blocksize1, size[-2 - dense_dims] // blocksize0
|
||||
blocknnz = nnz // (blocksize0 * blocksize1)
|
||||
sparse_tensors = [random_sparse_compressed(n_compressed_dims, n_plain_dims, blocknnz) for _ in range(n_batch)]
|
||||
sparse_tensors_it = map(list, zip(*sparse_tensors))
|
||||
sparse_tensors_it = map(list, zip(*sparse_tensors, strict=True))
|
||||
|
||||
values = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, blocknnz, *blocksize, *dense_size)
|
||||
compressed_indices = torch.stack(next(sparse_tensors_it)).reshape(*batch_shape, -1)
|
||||
|
@ -234,7 +234,7 @@ def generate_cct_and_mode(autograd_view_consistency=True):
|
||||
# tensor results to be that of the tensors that alias the input
|
||||
result = func(*args, **kwargs)
|
||||
if isinstance(result, (tuple, list)):
|
||||
for a, b in zip(rs, result):
|
||||
for a, b in zip(rs, result, strict=True):
|
||||
a.set_(b)
|
||||
else:
|
||||
rs.set_(result)
|
||||
@ -303,7 +303,7 @@ def generate_subclass_choices(flat_args, CCT, cct_mode):
|
||||
for which_args_are_wrapped in itertools.product(*subclass_options):
|
||||
|
||||
result = [maybe_map(partial(wrap, CCT=CCT, cct_mode=cct_mode), should_wrap_arg, arg)
|
||||
for should_wrap_arg, arg in zip(which_args_are_wrapped, flat_args)]
|
||||
for should_wrap_arg, arg in zip(which_args_are_wrapped, flat_args, strict=True)]
|
||||
yield result, which_args_are_wrapped
|
||||
|
||||
|
||||
@ -539,11 +539,11 @@ def check_forward_ad_formula(op: Callable, args, kwargs, gradcheck_wrapper=None,
|
||||
return fwAD.make_dual(primal.detach(), tangent)
|
||||
elif is_tensorlist(primal):
|
||||
return tuple(fwAD.make_dual(pri.detach(), tang) if tang is not None else pri
|
||||
for pri, tang in zip(primal, tangent))
|
||||
for pri, tang in zip(primal, tangent, strict=True))
|
||||
return primal
|
||||
|
||||
def compute_expected_grad(args, tangent_args, kwargs, tangent_kwargs):
|
||||
op_args = tuple(map(maybe_make_dual, zip(args, tangent_args)))
|
||||
op_args = tuple(map(maybe_make_dual, zip(args, tangent_args, strict=True)))
|
||||
op_kwargs = {k: maybe_make_dual((v, tangent_kwargs[k])) for k, v in kwargs.items()}
|
||||
|
||||
if gradcheck_wrapper is None:
|
||||
@ -572,7 +572,7 @@ def check_forward_ad_formula(op: Callable, args, kwargs, gradcheck_wrapper=None,
|
||||
new_tang_args, new_tang_kwargs, \
|
||||
which_tang_args_are_wrapped, which_tang_kwargs_are_wrapped = tang_choice
|
||||
|
||||
op_args = tuple(map(maybe_make_dual, zip(new_args, new_tang_args)))
|
||||
op_args = tuple(map(maybe_make_dual, zip(new_args, new_tang_args, strict=True)))
|
||||
op_kwargs = {k: maybe_make_dual((v, new_tang_kwargs[k])) for k, v in new_kwargs.items()}
|
||||
|
||||
try:
|
||||
|
@ -144,7 +144,9 @@ class CustomTensorPlainOut(torch.Tensor):
|
||||
new_out = pytree.tree_unflatten(
|
||||
(
|
||||
CustomTensorPlainOut(tensor1, tensor2)
|
||||
for tensor1, tensor2 in zip(out_inner_flat_1, out_inner_flat_2)
|
||||
for tensor1, tensor2 in zip(
|
||||
out_inner_flat_1, out_inner_flat_2, strict=True
|
||||
)
|
||||
),
|
||||
spec,
|
||||
)
|
||||
|
@ -60,7 +60,7 @@ class VerifyStateDictMixin:
|
||||
dist_osd: dict[str, Any],
|
||||
) -> None:
|
||||
params = list(chain.from_iterable(g["params"] for g in optim.param_groups))
|
||||
param_pid_mapping = dict(zip(params, range(len(params))))
|
||||
param_pid_mapping = dict(zip(params, range(len(params)), strict=True))
|
||||
fqn_pid_mapping = {}
|
||||
for fqn, param in model.named_parameters():
|
||||
pid = param_pid_mapping[param]
|
||||
@ -90,7 +90,7 @@ class VerifyStateDictMixin:
|
||||
dist_osd[_PG] = [new_pg]
|
||||
|
||||
self.assertEqual(len(osd[_PG]), len(dist_osd[_PG]))
|
||||
for group, dist_group in zip(osd[_PG], dist_osd[_PG]):
|
||||
for group, dist_group in zip(osd[_PG], dist_osd[_PG], strict=True):
|
||||
self.assertEqual(len(group), len(dist_group))
|
||||
for key, value in group.items():
|
||||
# Below doesn't work because param_groups can have None
|
||||
|
@ -238,7 +238,9 @@ class Trainer:
|
||||
sparse_microbatch = torch.split(sparse_features, 2)
|
||||
values_microbatch = torch.split(values, 2)
|
||||
batches = []
|
||||
for d, s, v in zip(dense_microbatch, sparse_microbatch, values_microbatch):
|
||||
for d, s, v in zip(
|
||||
dense_microbatch, sparse_microbatch, values_microbatch, strict=True
|
||||
):
|
||||
feature_set = FeatureSet(dense_features=d, sparse_features=s, values=v)
|
||||
batches.append(feature_set)
|
||||
|
||||
|
@ -678,7 +678,7 @@ class DistributedTest:
|
||||
# Verify buffers across ranks.
|
||||
m1_buffers = list(m1.buffers())
|
||||
m2_buffers = list(m2.buffers())
|
||||
for buf1, buf2 in zip(m1_buffers, m2_buffers):
|
||||
for buf1, buf2 in zip(m1_buffers, m2_buffers, strict=True):
|
||||
gathered_bufs = [
|
||||
torch.empty_like(buf1) for _ in range(dist.get_world_size())
|
||||
]
|
||||
@ -3045,7 +3045,7 @@ class DistributedTest:
|
||||
curr_values = master_values if rank == src else worker_values
|
||||
tensors = [
|
||||
_build_tensor(src + 1, val, dtype=dtype)
|
||||
for dtype, val in zip(dtypes, curr_values)
|
||||
for dtype, val in zip(dtypes, curr_values, strict=True)
|
||||
]
|
||||
if cuda:
|
||||
tensors = [t.cuda(rank_to_GPU[rank][0]) for t in tensors]
|
||||
@ -3066,7 +3066,9 @@ class DistributedTest:
|
||||
)
|
||||
expected_tensors = [
|
||||
_build_tensor(src + 1, expected_value, dtype=dtype)
|
||||
for dtype, expected_value in zip(dtypes, expected_values)
|
||||
for dtype, expected_value in zip(
|
||||
dtypes, expected_values, strict=True
|
||||
)
|
||||
]
|
||||
self.assertEqual(tensors, expected_tensors)
|
||||
|
||||
@ -3338,7 +3340,7 @@ class DistributedTest:
|
||||
)
|
||||
if rank == dest:
|
||||
expected_tensors = [_build_tensor(dest + 1, i) for i in group]
|
||||
for t1, t2 in zip(tensors, expected_tensors):
|
||||
for t1, t2 in zip(tensors, expected_tensors, strict=True):
|
||||
self.assertEqual(t1, t2)
|
||||
|
||||
self._barrier()
|
||||
@ -3440,7 +3442,7 @@ class DistributedTest:
|
||||
expected_tensors = [
|
||||
_build_tensor(dest + 1, i, dtype=dtype) for i in group
|
||||
]
|
||||
for t1, t2 in zip(tensors, expected_tensors):
|
||||
for t1, t2 in zip(tensors, expected_tensors, strict=True):
|
||||
self.assertEqual(t1, t2)
|
||||
|
||||
self._barrier()
|
||||
@ -3624,8 +3626,8 @@ class DistributedTest:
|
||||
tensor_shapes=tensor_shapes,
|
||||
)
|
||||
|
||||
for l1, l2 in zip(output_tensor_lists, expected_tensors):
|
||||
for t1, t2 in zip(l1, l2):
|
||||
for l1, l2 in zip(output_tensor_lists, expected_tensors, strict=True):
|
||||
for t1, t2 in zip(l1, l2, strict=True):
|
||||
if not torch.equal(t1, t2):
|
||||
return False
|
||||
return True
|
||||
@ -3824,7 +3826,7 @@ class DistributedTest:
|
||||
]
|
||||
out_tensors = [t.cuda(rank_to_GPU[rank][0]) for t in out_tensors]
|
||||
dist.all_to_all(out_tensors, in_tensors, group=group_id)
|
||||
for t1, t2 in zip(out_tensors, expected_tensors):
|
||||
for t1, t2 in zip(out_tensors, expected_tensors, strict=True):
|
||||
self.assertEqual(t1, t2)
|
||||
self._barrier()
|
||||
|
||||
@ -4203,7 +4205,7 @@ class DistributedTest:
|
||||
|
||||
def _assert_equal_param(self, param_gpu, param_DDP):
|
||||
self.assertEqual(len(param_gpu), len(param_DDP))
|
||||
for p_gpu, p_DDP in zip(param_gpu, param_DDP):
|
||||
for p_gpu, p_DDP in zip(param_gpu, param_DDP, strict=True):
|
||||
self.assertEqual(p_gpu, p_DDP)
|
||||
|
||||
def _test_DDP_niter(
|
||||
@ -4618,6 +4620,7 @@ class DistributedTest:
|
||||
for hook_param, allreduce_param in zip(
|
||||
ddp_model_with_optimizer_hook.parameters(),
|
||||
ddp_model_with_no_hook.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(hook_param, allreduce_param)
|
||||
|
||||
@ -4649,6 +4652,7 @@ class DistributedTest:
|
||||
for hook_param, allreduce_param in zip(
|
||||
ddp_model_with_optimizer_hook.parameters(),
|
||||
ddp_model_with_no_hook.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(hook_param, allreduce_param)
|
||||
|
||||
@ -4825,7 +4829,9 @@ class DistributedTest:
|
||||
optimizer_kwargs=optim_kwargs,
|
||||
)
|
||||
|
||||
for p1, p2 in zip(model.parameters(), model_optim_in_bwd.parameters()):
|
||||
for p1, p2 in zip(
|
||||
model.parameters(), model_optim_in_bwd.parameters(), strict=True
|
||||
):
|
||||
self.assertEqual(p1, p2, "Parameters not initially equal!")
|
||||
# Enable determinism in cudnn operators
|
||||
with torch.backends.cudnn.flags(
|
||||
@ -4843,7 +4849,9 @@ class DistributedTest:
|
||||
inp
|
||||
).sum().backward() # runs optimizer as well
|
||||
for p1, p2 in zip(
|
||||
model.parameters(), model_optim_in_bwd.parameters()
|
||||
model.parameters(),
|
||||
model_optim_in_bwd.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(
|
||||
p1, p2, f"Params not equal at iteration {i}"
|
||||
@ -5323,7 +5331,9 @@ class DistributedTest:
|
||||
# sync grads
|
||||
step_model(ddp_model, ddp_input, ddp_target)
|
||||
|
||||
for i, j in zip(model.parameters(), ddp_model.parameters()):
|
||||
for i, j in zip(
|
||||
model.parameters(), ddp_model.parameters(), strict=True
|
||||
):
|
||||
if not i.requires_grad:
|
||||
continue
|
||||
if iteration % 2 == 0:
|
||||
@ -5562,6 +5572,7 @@ class DistributedTest:
|
||||
for i, j in zip(
|
||||
ddp_model_grad_not_view.parameters(),
|
||||
ddp_model_grad_is_view.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(i, j)
|
||||
|
||||
@ -5667,7 +5678,9 @@ class DistributedTest:
|
||||
target,
|
||||
)
|
||||
for p1, p2 in zip(
|
||||
net.parameters(), net_using_post_localSGD_opt.parameters()
|
||||
net.parameters(),
|
||||
net_using_post_localSGD_opt.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(p1.data, p2.data)
|
||||
|
||||
@ -6817,7 +6830,7 @@ class DistributedTest:
|
||||
# they are the same as new_model on rank_to_broadcast.
|
||||
if rank == rank_to_broadcast:
|
||||
expected_states = new_model.state_dict().values()
|
||||
for t, expected in zip(net_module_states, expected_states):
|
||||
for t, expected in zip(net_module_states, expected_states, strict=True):
|
||||
self.assertEqual(t, expected)
|
||||
|
||||
@skip_if_lt_x_gpu(2)
|
||||
@ -7134,7 +7147,9 @@ class DistributedTest:
|
||||
|
||||
# Validate model state dicts are equal
|
||||
for (_, local_tensor), (_, dist_tensor) in zip(
|
||||
local_model.state_dict().items(), net.module.state_dict().items()
|
||||
local_model.state_dict().items(),
|
||||
net.module.state_dict().items(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(local_tensor, dist_tensor)
|
||||
|
||||
@ -7722,13 +7737,17 @@ class DistributedTest:
|
||||
# materialized param grad is not touched by DDP, so its grad should
|
||||
# be the same as if running locally.
|
||||
for materialized_param, local_param in zip(
|
||||
ddp.module.fc2.parameters(), local_model.fc2.parameters()
|
||||
ddp.module.fc2.parameters(),
|
||||
local_model.fc2.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertEqual(materialized_param.grad, local_param.grad)
|
||||
|
||||
# fc1 parameter grad should still be different, due to allreduce.
|
||||
for synced_param, local_param in zip(
|
||||
ddp.module.fc1.parameters(), local_model.fc1.parameters()
|
||||
ddp.module.fc1.parameters(),
|
||||
local_model.fc1.parameters(),
|
||||
strict=True,
|
||||
):
|
||||
self.assertFalse(synced_param.grad == local_param.grad)
|
||||
|
||||
@ -8581,7 +8600,7 @@ class DistributedTest:
|
||||
|
||||
# Verify grads are the same
|
||||
for local_param, dist_param in zip(
|
||||
local_net.parameters(), net.parameters()
|
||||
local_net.parameters(), net.parameters(), strict=True
|
||||
):
|
||||
local_grad = local_param.grad
|
||||
dist_grad = dist_param.grad
|
||||
@ -8631,7 +8650,7 @@ class DistributedTest:
|
||||
torch._C._functions.UndefinedGrad()(out).backward()
|
||||
torch._C._functions.UndefinedGrad()(local_out).backward()
|
||||
for (dist_param_name, dist_param), (local_param_name, local_param) in zip(
|
||||
net.named_parameters(), local_net.named_parameters()
|
||||
net.named_parameters(), local_net.named_parameters(), strict=True
|
||||
):
|
||||
dist_grad = dist_param.grad
|
||||
local_grad = local_param.grad
|
||||
@ -8689,7 +8708,9 @@ class DistributedTest:
|
||||
self.assertTrue(
|
||||
static_model._get_ddp_logging_data().get("has_rebuilt_buckets", 0)
|
||||
)
|
||||
for i, j in zip(base_model.parameters(), static_model.parameters()):
|
||||
for i, j in zip(
|
||||
base_model.parameters(), static_model.parameters(), strict=True
|
||||
):
|
||||
self.assertEqual(i, j)
|
||||
|
||||
@require_backend_is_available({"gloo"})
|
||||
@ -9297,7 +9318,7 @@ class DistributedTest:
|
||||
loss_static.backward()
|
||||
self._model_step(model_static_graph)
|
||||
for p, p_static in zip(
|
||||
model.parameters(), model_static_graph.parameters()
|
||||
model.parameters(), model_static_graph.parameters(), strict=True
|
||||
):
|
||||
self.assertEqual(p, p_static)
|
||||
|
||||
@ -9974,7 +9995,7 @@ class DistributedTest:
|
||||
p.grad.data = p.grad / iters
|
||||
|
||||
for p_ddp, p_local in zip(
|
||||
model.parameters(), local_model.parameters()
|
||||
model.parameters(), local_model.parameters(), strict=True
|
||||
):
|
||||
self.assertTrue(
|
||||
torch.allclose(p_ddp.grad, p_local.grad),
|
||||
@ -10191,7 +10212,9 @@ class DistributedTest:
|
||||
# (refer to https://github.com/numpy/numpy/blob/266aad7478bc7fbcc55eea7f942a0d373b838396/numpy/random/mtrand.pyi)
|
||||
# To make sure random state was restored properly, all entries should equal the original
|
||||
for entry1, entry2 in zip(
|
||||
hook_state.rng.get_state(), dummy_hook_state.rng.get_state()
|
||||
hook_state.rng.get_state(),
|
||||
dummy_hook_state.rng.get_state(),
|
||||
strict=True,
|
||||
):
|
||||
np.testing.assert_array_equal(entry1, entry2)
|
||||
|
||||
@ -10212,7 +10235,7 @@ class DistributedTest:
|
||||
|
||||
# Check that gradients after 10 epochs are the same
|
||||
for orig_param, dummy_param in zip(
|
||||
ddp_model.parameters(), dummy_ddp_model.parameters()
|
||||
ddp_model.parameters(), dummy_ddp_model.parameters(), strict=True
|
||||
):
|
||||
self.assertEqual(orig_param.grad, dummy_param.grad)
|
||||
|
||||
@ -10299,7 +10322,9 @@ class DistributedTest:
|
||||
self.assertEqual(out_ddp, out_ddp_static)
|
||||
out_ddp.backward()
|
||||
out_ddp_static.backward()
|
||||
for p1, p2 in zip(ddp.parameters(), ddp_static.parameters()):
|
||||
for p1, p2 in zip(
|
||||
ddp.parameters(), ddp_static.parameters(), strict=True
|
||||
):
|
||||
self.assertEqual(p1.grad, p2.grad)
|
||||
|
||||
@skip_if_lt_x_gpu(2)
|
||||
@ -10392,7 +10417,9 @@ class DistributedTest:
|
||||
test_model_1._get_ddp_logging_data().get("num_buckets_reduced"), 1
|
||||
)
|
||||
|
||||
for i, j in zip(base_model.parameters(), test_model_1.parameters()):
|
||||
for i, j in zip(
|
||||
base_model.parameters(), test_model_1.parameters(), strict=True
|
||||
):
|
||||
self.assertEqual(i, j)
|
||||
|
||||
|
||||
|
@ -457,7 +457,9 @@ class ProcessLocalGroup(dist.ProcessGroup):
|
||||
):
|
||||
works = [
|
||||
self._reduce_scatter_base(output_tensor, input_tensor, opts)
|
||||
for output_tensor, input_tensor in zip(output_tensors, input_tensors)
|
||||
for output_tensor, input_tensor in zip(
|
||||
output_tensors, input_tensors, strict=True
|
||||
)
|
||||
]
|
||||
for work in works[:-1]:
|
||||
work.wait()
|
||||
@ -467,7 +469,7 @@ class ProcessLocalGroup(dist.ProcessGroup):
|
||||
self, output_tensor_list, input_tensor_list, opts=AllgatherOptions()
|
||||
):
|
||||
res = None
|
||||
for o_t, i_t in zip(output_tensor_list, input_tensor_list):
|
||||
for o_t, i_t in zip(output_tensor_list, input_tensor_list, strict=True):
|
||||
res = self._allgather_base(o_t, i_t)
|
||||
return res
|
||||
|
||||
|
@ -2749,7 +2749,7 @@ class TensorPipeCudaDistAutogradTest(RpcAgentTestFixture):
|
||||
|
||||
for i in range(len(futs)):
|
||||
local_gradients = [p.grad for p in local_layers[i].parameters()]
|
||||
for g1, g2 in zip(futs[i].wait(), local_gradients):
|
||||
for g1, g2 in zip(futs[i].wait(), local_gradients, strict=True):
|
||||
self.assertEqual(g1, g2)
|
||||
|
||||
rpc.shutdown()
|
||||
|
@ -46,7 +46,7 @@ class BatchUpdateParameterServer:
|
||||
@rpc.functions.async_execution
|
||||
def update_and_fetch_model(ps_rref, grads):
|
||||
self = ps_rref.local_value()
|
||||
for p, g in zip(self.model.parameters(), grads):
|
||||
for p, g in zip(self.model.parameters(), grads, strict=True):
|
||||
if p.grad is None:
|
||||
p.grad = g
|
||||
else:
|
||||
|
@ -216,7 +216,7 @@ class Agent:
|
||||
returns.insert(0, R)
|
||||
returns = torch.tensor(returns)
|
||||
returns = (returns - returns.mean()) / (returns.std() + self.eps)
|
||||
for log_prob, R in zip(probs, returns):
|
||||
for log_prob, R in zip(probs, returns, strict=True):
|
||||
policy_loss.append(-log_prob * R)
|
||||
self.optimizer.zero_grad()
|
||||
policy_loss = torch.cat(policy_loss).sum()
|
||||
|
@ -249,7 +249,7 @@ class JitTestCase(JitCommonTestCase):
|
||||
saved_module_buffer_2.seek(0)
|
||||
code_files_2, _debug_files_2 = extract_files(saved_module_buffer_2)
|
||||
|
||||
for a, b in zip(code_files, code_files_2):
|
||||
for a, b in zip(code_files, code_files_2, strict=True):
|
||||
self.assertMultiLineEqual(a, b)
|
||||
|
||||
if isinstance(m, torch._C.ScriptModule):
|
||||
@ -617,7 +617,7 @@ class JitTestCase(JitCommonTestCase):
|
||||
self.assertEqual(outputs, outputs_ge)
|
||||
if inputs_require_grads:
|
||||
self.assertEqual(grads, grads_ge, atol=grad_atol, rtol=grad_rtol)
|
||||
for g2, g2_ge in zip(grads2, grads2_ge):
|
||||
for g2, g2_ge in zip(grads2, grads2_ge, strict=True):
|
||||
if g2 is None and g2_ge is None:
|
||||
continue
|
||||
self.assertEqual(g2, g2_ge, atol=8e-4, rtol=8e-4)
|
||||
|
@ -228,11 +228,11 @@ def multiple_logs_to_string(module: str, *log_options: str) -> tuple[list[io.Str
|
||||
def tmp_redirect_logs():
|
||||
loggers = [torch._logging.getArtifactLogger(module, option) for option in log_options]
|
||||
try:
|
||||
for logger, handler in zip(loggers, handlers):
|
||||
for logger, handler in zip(loggers, handlers, strict=True):
|
||||
logger.addHandler(handler)
|
||||
yield
|
||||
finally:
|
||||
for logger, handler in zip(loggers, handlers):
|
||||
for logger, handler in zip(loggers, handlers, strict=True):
|
||||
logger.removeHandler(handler)
|
||||
|
||||
def ctx_manager() -> AbstractContextManager[None]:
|
||||
|
@ -402,9 +402,9 @@ def sample_inputs_masked_logaddexp(op_info, device, dtype, requires_grad, **kwar
|
||||
make_tensor, dtype=dtype, device=device, requires_grad=requires_grad
|
||||
)
|
||||
for shape, input_masks, other_masks in zip(
|
||||
shapes, input_mask_lists, other_mask_lists
|
||||
shapes, input_mask_lists, other_mask_lists, strict=True
|
||||
):
|
||||
for input_mask, other_mask in zip(input_masks, other_masks):
|
||||
for input_mask, other_mask in zip(input_masks, other_masks, strict=True):
|
||||
yield SampleInput(
|
||||
make_arg(shape),
|
||||
make_arg(shape),
|
||||
|
@ -78,7 +78,7 @@ class TwoTensor(torch.Tensor):
|
||||
# our two inner tensors return the same value
|
||||
out_flat = [
|
||||
cls(o_a, o_b) if isinstance(o_a, torch.Tensor) else o_a
|
||||
for o_a, o_b in zip(out_a_flat, out_b_flat)
|
||||
for o_a, o_b in zip(out_a_flat, out_b_flat, strict=True)
|
||||
]
|
||||
out = pytree.tree_unflatten(out_flat, spec)
|
||||
from torch._higher_order_ops.cond import cond_op
|
||||
|
Reference in New Issue
Block a user