[InductorBench] Fix accuracy validation logic for MPS (#156385)

As it does not support full fp64, validate against float32

Pull Request resolved: https://github.com/pytorch/pytorch/pull/156385
Approved by: https://github.com/Skylion007
This commit is contained in:
Nikita Shulga
2025-06-18 18:01:25 -07:00
committed by PyTorch MergeBot
parent ce8180a61d
commit 242eb19c83

View File

@ -2037,7 +2037,8 @@ class BenchmarkRunner:
):
"""
Checks accuracy.
1) Collect the outputs with fp64 datatype. This is useful for error checking.
1) Collect the outputs with reference datatype. This is useful for error checking.
(reference datatype is fp64 if supported otherwise fp32"
2) Checks if eager itself has variations.
"""
start_stats = get_dynamo_stats()
@ -2091,35 +2092,38 @@ class BenchmarkRunner:
if self.args.backend == "torchao":
return record_status("pass_due_to_skip", dynamo_start_stats=start_stats)
ref_dtype = torch.float64 if current_device != "mps" else torch.float32
with self.pick_grad(name, self.args.training):
# Collect the fp64 reference outputs to be used later for accuracy checking.
fp64_outputs = None
model_fp64 = None
inputs_fp64 = None
ref_outputs = None
model_ref = None
inputs_ref = None
try:
model_fp64, inputs_fp64 = cast_to_fp64(
model_ref, inputs_ref = cast_to(
ref_dtype,
self.deepcopy_and_maybe_parallelize(model),
clone_inputs(example_inputs),
)
self.init_optimizer(name, current_device, model_fp64.parameters())
fp64_outputs = self.run_n_iterations(
model_fp64, inputs_fp64, self.model_iter_fn
self.init_optimizer(name, current_device, model_ref.parameters())
ref_outputs = self.run_n_iterations(
model_ref, inputs_ref, self.model_iter_fn
)
fp64_outputs = tree_map(
lambda x: x.to(torch.float64)
ref_outputs = tree_map(
lambda x: x.to(ref_dtype)
if isinstance(x, torch.Tensor) and x.is_floating_point()
else x,
fp64_outputs,
ref_outputs,
)
except Exception:
log.warning(
"fp64 golden ref were not generated for %s. Setting accuracy check to cosine",
"%s golden ref were not generated for %s. Setting accuracy check to cosine",
ref_dtype.__name__,
name,
)
self.args.cosine = True
fp64_outputs = None
ref_outputs = None
finally:
del model_fp64, inputs_fp64
del model_ref, inputs_ref
empty_gpu_cache(current_device)
tolerance, cos_similarity = self.get_tolerance_and_cosine_flag(
@ -2254,12 +2258,12 @@ class BenchmarkRunner:
):
correct_result = process_fn(correct_result)
new_result = process_fn(new_result)
fp64_outputs = process_fn(fp64_outputs)
ref_outputs = process_fn(ref_outputs)
if not same(
correct_result,
new_result,
fp64_outputs,
ref_outputs,
equal_nan=self.equal_nan,
use_larger_multiplier_for_smaller_tensor=self.use_larger_multiplier_for_smaller_tensor(
name