[InductorBench] Fix accuracy validation logic for MPS (#156385)

As it does not support full fp64, validate against float32 Pull Request resolved: https://github.com/pytorch/pytorch/pull/156385 Approved by: https://github.com/Skylion007
2025-10-20 21:14:14 +08:00 · 2025-06-18 18:01:25 -07:00
parent ce8180a61d
commit 242eb19c83
1 changed files with 20 additions and 16 deletions
--- a/benchmarks/dynamo/common.py
+++ b/benchmarks/dynamo/common.py
@ -2037,7 +2037,8 @@ class BenchmarkRunner:
    ):
        """
        Checks accuracy.
-        1) Collect the outputs with fp64 datatype. This is useful for error checking.
+        1) Collect the outputs with reference datatype. This is useful for error checking.
+           (reference datatype is fp64 if supported otherwise fp32"
        2) Checks if eager itself has variations.
        """
        start_stats = get_dynamo_stats()
@ -2091,35 +2092,38 @@ class BenchmarkRunner:
        if self.args.backend == "torchao":
            return record_status("pass_due_to_skip", dynamo_start_stats=start_stats)

+        ref_dtype = torch.float64 if current_device != "mps" else torch.float32
        with self.pick_grad(name, self.args.training):
            # Collect the fp64 reference outputs to be used later for accuracy checking.
-            fp64_outputs = None
-            model_fp64 = None
-            inputs_fp64 = None
+            ref_outputs = None
+            model_ref = None
+            inputs_ref = None
            try:
-                model_fp64, inputs_fp64 = cast_to_fp64(
+                model_ref, inputs_ref = cast_to(
+                    ref_dtype,
                    self.deepcopy_and_maybe_parallelize(model),
                    clone_inputs(example_inputs),
                )
-                self.init_optimizer(name, current_device, model_fp64.parameters())
-                fp64_outputs = self.run_n_iterations(
-                    model_fp64, inputs_fp64, self.model_iter_fn
+                self.init_optimizer(name, current_device, model_ref.parameters())
+                ref_outputs = self.run_n_iterations(
+                    model_ref, inputs_ref, self.model_iter_fn
                )
-                fp64_outputs = tree_map(
-                    lambda x: x.to(torch.float64)
+                ref_outputs = tree_map(
+                    lambda x: x.to(ref_dtype)
                    if isinstance(x, torch.Tensor) and x.is_floating_point()
                    else x,
-                    fp64_outputs,
+                    ref_outputs,
                )
            except Exception:
                log.warning(
-                    "fp64 golden ref were not generated for %s. Setting accuracy check to cosine",
+                    "%s golden ref were not generated for %s. Setting accuracy check to cosine",
+                    ref_dtype.__name__,
                    name,
                )
                self.args.cosine = True
-                fp64_outputs = None
+                ref_outputs = None
            finally:
-                del model_fp64, inputs_fp64
+                del model_ref, inputs_ref
                empty_gpu_cache(current_device)

            tolerance, cos_similarity = self.get_tolerance_and_cosine_flag(
@ -2254,12 +2258,12 @@ class BenchmarkRunner:
                    ):
                        correct_result = process_fn(correct_result)
                        new_result = process_fn(new_result)
-                        fp64_outputs = process_fn(fp64_outputs)
+                        ref_outputs = process_fn(ref_outputs)

                if not same(
                    correct_result,
                    new_result,
-                    fp64_outputs,
+                    ref_outputs,
                    equal_nan=self.equal_nan,
                    use_larger_multiplier_for_smaller_tensor=self.use_larger_multiplier_for_smaller_tensor(
                        name