[torchfuzz] make ops_fuzzer deterministic (#164694)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/164694
Approved by: https://github.com/pianpwk
ghstack dependencies: #164432, #164434, #164514, #164646, #164647, #164649, #164687, #164688, #164693
This commit is contained in:
bobrenjc93
2025-10-05 23:02:00 -07:00
committed by PyTorch MergeBot
parent ac901bf79a
commit 53f6cc7529
2 changed files with 45 additions and 11 deletions

View File

@ -294,11 +294,14 @@ def fuzz_op(
available_operators = _get_template_filtered_operators(template, supported_ops)
# Filter operators that can produce the target spec
# IMPORTANT: iterate in a deterministic order to avoid dict-order nondeterminism
compatible_ops = []
for op_name, operator in available_operators.items():
for op_name in sorted(available_operators.keys()):
operator = available_operators[op_name]
if operator.can_produce(target_spec):
compatible_ops.append((op_name, operator))
# Shuffle with seeded RNG (caller seeds random), but from a deterministic base order
random.shuffle(compatible_ops)
if not compatible_ops:

View File

@ -8,7 +8,7 @@ from pathlib import Path
def run_fuzzer_with_seed(seed):
"""Run the fuzzer with a specific seed and return the generated code."""
cmd = [sys.executable, "fuzzer.py", "--seed", str(seed)]
cmd = [sys.executable, "fuzzer.py", "--seed", str(seed), "--template", "unbacked"]
# Clear the output directory first
torchfuzz_dir = Path("/tmp/torchfuzz")
@ -20,26 +20,57 @@ def run_fuzzer_with_seed(seed):
cmd, capture_output=True, text=True, cwd=Path(__file__).parent
)
# Always attempt to read the generated file even if execution failed.
if result.returncode != 0:
print(f"Fuzzer failed with return code {result.returncode}")
print(f"stdout: {result.stdout}")
print(f"stderr: {result.stderr}")
return None
# Find the generated Python file in /tmp/torchfuzz/
py_files = list(torchfuzz_dir.glob("fuzz_*.py"))
if not py_files:
print("No Python files generated in /tmp/torchfuzz/")
return None
# Prefer to compare the exact Program Source that the fuzzer printed in stdout,
# which reflects the executed code even if files are overwritten between runs.
src_block = None
lines = result.stdout.splitlines()
for i, line in enumerate(lines):
if line.strip() == "=== Program Source ===":
# Collect until the next delimiter line of === or the end
j = i + 1
block_lines = []
while j < len(lines) and not lines[j].startswith("==="):
block_lines.append(lines[j])
j += 1
src_block = "\n".join(block_lines)
break
# Read the content of the generated file
with open(py_files[0]) as f:
if src_block:
return src_block
# Fallback: parse the exact path the fuzzer ran from stdout: "Running: /tmp/torchfuzz/fuzz_XXXX.py"
path = None
for line in lines:
if line.startswith("Running: ") and line.strip().endswith(".py"):
path = line.split("Running: ", 1)[1].strip()
break
if path is None:
# Fallback: pick the most recently modified fuzz_*.py in /tmp/torchfuzz
py_files = sorted(
torchfuzz_dir.glob("fuzz_*.py"),
key=lambda p: p.stat().st_mtime,
reverse=True,
)
if not py_files:
print("No Python files generated in /tmp/torchfuzz/")
return None
path = str(py_files[0])
# Read the content of the generated file that was actually executed
with open(path) as f:
return f.read()
def test_deterministic_output():
"""Test that the fuzzer produces identical output for the same seed."""
seed = 115306 # Use the seed mentioned in the user's issue
seed = 13157 # Use the seed mentioned in the user's issue
num_runs = 3
outputs = []