From 53f6cc75296bf04c5e48e427e0ceb495c57c76b5 Mon Sep 17 00:00:00 2001 From: bobrenjc93 Date: Sun, 5 Oct 2025 23:02:00 -0700 Subject: [PATCH] [torchfuzz] make ops_fuzzer deterministic (#164694) Pull Request resolved: https://github.com/pytorch/pytorch/pull/164694 Approved by: https://github.com/pianpwk ghstack dependencies: #164432, #164434, #164514, #164646, #164647, #164649, #164687, #164688, #164693 --- tools/experimental/torchfuzz/ops_fuzzer.py | 5 +- .../torchfuzz/test_determinism.py | 51 +++++++++++++++---- 2 files changed, 45 insertions(+), 11 deletions(-) diff --git a/tools/experimental/torchfuzz/ops_fuzzer.py b/tools/experimental/torchfuzz/ops_fuzzer.py index 577fdfb69d57..3ff17bb5b559 100644 --- a/tools/experimental/torchfuzz/ops_fuzzer.py +++ b/tools/experimental/torchfuzz/ops_fuzzer.py @@ -294,11 +294,14 @@ def fuzz_op( available_operators = _get_template_filtered_operators(template, supported_ops) # Filter operators that can produce the target spec + # IMPORTANT: iterate in a deterministic order to avoid dict-order nondeterminism compatible_ops = [] - for op_name, operator in available_operators.items(): + for op_name in sorted(available_operators.keys()): + operator = available_operators[op_name] if operator.can_produce(target_spec): compatible_ops.append((op_name, operator)) + # Shuffle with seeded RNG (caller seeds random), but from a deterministic base order random.shuffle(compatible_ops) if not compatible_ops: diff --git a/tools/experimental/torchfuzz/test_determinism.py b/tools/experimental/torchfuzz/test_determinism.py index 361e454df42d..7c621d2e0cf2 100644 --- a/tools/experimental/torchfuzz/test_determinism.py +++ b/tools/experimental/torchfuzz/test_determinism.py @@ -8,7 +8,7 @@ from pathlib import Path def run_fuzzer_with_seed(seed): """Run the fuzzer with a specific seed and return the generated code.""" - cmd = [sys.executable, "fuzzer.py", "--seed", str(seed)] + cmd = [sys.executable, "fuzzer.py", "--seed", str(seed), "--template", "unbacked"] # Clear the output directory first torchfuzz_dir = Path("/tmp/torchfuzz") @@ -20,26 +20,57 @@ def run_fuzzer_with_seed(seed): cmd, capture_output=True, text=True, cwd=Path(__file__).parent ) + # Always attempt to read the generated file even if execution failed. if result.returncode != 0: print(f"Fuzzer failed with return code {result.returncode}") print(f"stdout: {result.stdout}") print(f"stderr: {result.stderr}") - return None - # Find the generated Python file in /tmp/torchfuzz/ - py_files = list(torchfuzz_dir.glob("fuzz_*.py")) - if not py_files: - print("No Python files generated in /tmp/torchfuzz/") - return None + # Prefer to compare the exact Program Source that the fuzzer printed in stdout, + # which reflects the executed code even if files are overwritten between runs. + src_block = None + lines = result.stdout.splitlines() + for i, line in enumerate(lines): + if line.strip() == "=== Program Source ===": + # Collect until the next delimiter line of === or the end + j = i + 1 + block_lines = [] + while j < len(lines) and not lines[j].startswith("==="): + block_lines.append(lines[j]) + j += 1 + src_block = "\n".join(block_lines) + break - # Read the content of the generated file - with open(py_files[0]) as f: + if src_block: + return src_block + + # Fallback: parse the exact path the fuzzer ran from stdout: "Running: /tmp/torchfuzz/fuzz_XXXX.py" + path = None + for line in lines: + if line.startswith("Running: ") and line.strip().endswith(".py"): + path = line.split("Running: ", 1)[1].strip() + break + + if path is None: + # Fallback: pick the most recently modified fuzz_*.py in /tmp/torchfuzz + py_files = sorted( + torchfuzz_dir.glob("fuzz_*.py"), + key=lambda p: p.stat().st_mtime, + reverse=True, + ) + if not py_files: + print("No Python files generated in /tmp/torchfuzz/") + return None + path = str(py_files[0]) + + # Read the content of the generated file that was actually executed + with open(path) as f: return f.read() def test_deterministic_output(): """Test that the fuzzer produces identical output for the same seed.""" - seed = 115306 # Use the seed mentioned in the user's issue + seed = 13157 # Use the seed mentioned in the user's issue num_runs = 3 outputs = []