From 53f6cc75296bf04c5e48e427e0ceb495c57c76b5 Mon Sep 17 00:00:00 2001
From: bobrenjc93 <bobren@meta.com>
Date: Sun, 5 Oct 2025 23:02:00 -0700
Subject: [PATCH] [torchfuzz] make ops_fuzzer deterministic (#164694)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/164694
Approved by: https://github.com/pianpwk
ghstack dependencies: #164432, #164434, #164514, #164646, #164647, #164649, #164687, #164688, #164693
---
 tools/experimental/torchfuzz/ops_fuzzer.py    |  5 +-
 .../torchfuzz/test_determinism.py             | 51 +++++++++++++++----
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/tools/experimental/torchfuzz/ops_fuzzer.py b/tools/experimental/torchfuzz/ops_fuzzer.py
index 577fdfb69d57..3ff17bb5b559 100644
--- a/tools/experimental/torchfuzz/ops_fuzzer.py
+++ b/tools/experimental/torchfuzz/ops_fuzzer.py
@@ -294,11 +294,14 @@ def fuzz_op(
     available_operators = _get_template_filtered_operators(template, supported_ops)
 
     # Filter operators that can produce the target spec
+    # IMPORTANT: iterate in a deterministic order to avoid dict-order nondeterminism
     compatible_ops = []
-    for op_name, operator in available_operators.items():
+    for op_name in sorted(available_operators.keys()):
+        operator = available_operators[op_name]
         if operator.can_produce(target_spec):
             compatible_ops.append((op_name, operator))
 
+    # Shuffle with seeded RNG (caller seeds random), but from a deterministic base order
     random.shuffle(compatible_ops)
 
     if not compatible_ops:
diff --git a/tools/experimental/torchfuzz/test_determinism.py b/tools/experimental/torchfuzz/test_determinism.py
index 361e454df42d..7c621d2e0cf2 100644
--- a/tools/experimental/torchfuzz/test_determinism.py
+++ b/tools/experimental/torchfuzz/test_determinism.py
@@ -8,7 +8,7 @@ from pathlib import Path
 
 def run_fuzzer_with_seed(seed):
     """Run the fuzzer with a specific seed and return the generated code."""
-    cmd = [sys.executable, "fuzzer.py", "--seed", str(seed)]
+    cmd = [sys.executable, "fuzzer.py", "--seed", str(seed), "--template", "unbacked"]
 
     # Clear the output directory first
     torchfuzz_dir = Path("/tmp/torchfuzz")
@@ -20,26 +20,57 @@ def run_fuzzer_with_seed(seed):
         cmd, capture_output=True, text=True, cwd=Path(__file__).parent
     )
 
+    # Always attempt to read the generated file even if execution failed.
     if result.returncode != 0:
         print(f"Fuzzer failed with return code {result.returncode}")
         print(f"stdout: {result.stdout}")
         print(f"stderr: {result.stderr}")
-        return None
 
-    # Find the generated Python file in /tmp/torchfuzz/
-    py_files = list(torchfuzz_dir.glob("fuzz_*.py"))
-    if not py_files:
-        print("No Python files generated in /tmp/torchfuzz/")
-        return None
+    # Prefer to compare the exact Program Source that the fuzzer printed in stdout,
+    # which reflects the executed code even if files are overwritten between runs.
+    src_block = None
+    lines = result.stdout.splitlines()
+    for i, line in enumerate(lines):
+        if line.strip() == "=== Program Source ===":
+            # Collect until the next delimiter line of === or the end
+            j = i + 1
+            block_lines = []
+            while j < len(lines) and not lines[j].startswith("==="):
+                block_lines.append(lines[j])
+                j += 1
+            src_block = "\n".join(block_lines)
+            break
 
-    # Read the content of the generated file
-    with open(py_files[0]) as f:
+    if src_block:
+        return src_block
+
+    # Fallback: parse the exact path the fuzzer ran from stdout: "Running: /tmp/torchfuzz/fuzz_XXXX.py"
+    path = None
+    for line in lines:
+        if line.startswith("Running: ") and line.strip().endswith(".py"):
+            path = line.split("Running: ", 1)[1].strip()
+            break
+
+    if path is None:
+        # Fallback: pick the most recently modified fuzz_*.py in /tmp/torchfuzz
+        py_files = sorted(
+            torchfuzz_dir.glob("fuzz_*.py"),
+            key=lambda p: p.stat().st_mtime,
+            reverse=True,
+        )
+        if not py_files:
+            print("No Python files generated in /tmp/torchfuzz/")
+            return None
+        path = str(py_files[0])
+
+    # Read the content of the generated file that was actually executed
+    with open(path) as f:
         return f.read()
 
 
 def test_deterministic_output():
     """Test that the fuzzer produces identical output for the same seed."""
-    seed = 115306  # Use the seed mentioned in the user's issue
+    seed = 13157  # Use the seed mentioned in the user's issue
     num_runs = 3
 
     outputs = []