Files
pytorch/torch/_dynamo/test_minifier_common.py
Edward Z. Yang 0a479d9b9c Simplify minifier testing by incorporating fault injection in prod code (#100357)
Previously, minifier testing injected faults by injecting extra code
into the repro scripts, and then ensuring this code got propagated to
all subsequent subprocess calls.  This was not only quite complicated,
but also induced a big slowdown on the minifier, because to inject the
faults, you had to import torch._inductor, which would cause the
compilation threads to immediately get initialized before you even got
to do anything else in the repro script.

This new approach fixes this problem by incorporating the fault
injection into "prod" code.  Essentially, for inductor fault injection
we introduce some new config flags that let you "configure" Inductor to
be buggy; for Dynamo fault injection we just permanently keep the buggy
testing backends registered.  This is MUCH simpler: we only have to
propagate the buggy config (which is something we're already doing),
and it saves the minifier scripts from having to immediately initialize
inductor on entry.

Also, I enable the test for Triton runtime errors, now that tl.assert_device is here.

Signed-off-by: Edward Z. Yang <ezyang@meta.com>

Pull Request resolved: https://github.com/pytorch/pytorch/pull/100357
Approved by: https://github.com/voznesenskym
2023-05-02 11:44:06 +00:00

115 lines
4.0 KiB
Python

import os
import re
import shutil
import subprocess
import tempfile
import unittest
import torch
import torch._dynamo
import torch._dynamo.test_case
class MinifierTestBase(torch._dynamo.test_case.TestCase):
DEBUG_DIR = tempfile.mkdtemp()
@classmethod
def setUpClass(cls):
super().setUpClass()
cls._exit_stack.enter_context(
unittest.mock.patch.object(
torch._dynamo.config,
"debug_dir_root",
cls.DEBUG_DIR,
)
)
@classmethod
def tearDownClass(cls):
if os.getenv("PYTORCH_KEEP_TMPDIR", "0") != "1":
shutil.rmtree(cls.DEBUG_DIR)
else:
print("test_minifier_common tmpdir kept at: {cls.DEBUG_DIR}")
cls._exit_stack.close()
# Run `code` in a separate python process.
# Returns the completed process state and the directory containing the
# minifier launcher script, if `code` outputted it.
def _run_test_code(self, code):
proc = subprocess.run(
["python3", "-c", code], capture_output=True, cwd=self.DEBUG_DIR
)
print("stdout:", proc.stdout.decode("utf-8"))
print("stderr:", proc.stderr.decode("utf-8"))
repro_dir_match = re.search(
r"(\S+)minifier_launcher.py", proc.stderr.decode("utf-8")
)
if repro_dir_match is not None:
return proc, repro_dir_match.group(1)
return proc, None
# Runs the minifier launcher script in `repro_dir`
def _run_minifier_launcher(self, repro_dir):
self.assertIsNotNone(repro_dir)
launch_file = os.path.join(repro_dir, "minifier_launcher.py")
with open(launch_file, "r") as f:
launch_code = f.read()
self.assertTrue(os.path.exists(launch_file))
launch_proc = subprocess.run(
["python3", launch_file],
capture_output=True,
cwd=repro_dir,
)
print("minifier stdout:", launch_proc.stdout.decode("utf-8"))
print("minifier stderr:", launch_proc.stderr.decode("utf-8"))
return launch_proc, launch_code
# Runs the repro script in `repro_dir`
def _run_repro(self, repro_dir):
self.assertIsNotNone(repro_dir)
repro_file = os.path.join(repro_dir, "repro.py")
with open(repro_file, "r") as f:
repro_code = f.read()
self.assertTrue(os.path.exists(repro_file))
repro_proc = subprocess.run(
["python3", repro_file], capture_output=True, cwd=repro_dir
)
return repro_proc, repro_code
# Template for testing code.
# `run_code` is the code to run for the test case.
# `patch_code` is the code to be patched in every generated file; usually
# just use this to turn on bugs via the config
def _gen_test_code(self, run_code, repro_after, repro_level, patch_code):
return f"""\
import torch
import torch._dynamo
{patch_code}
torch._dynamo.config.repro_after = "{repro_after}"
torch._dynamo.config.repro_level = {repro_level}
torch._dynamo.config.debug_dir_root = "{self.DEBUG_DIR}"
{run_code}
"""
# Runs a full minifier test.
# Minifier tests generally consist of 3 stages:
# 1. Run the problematic code (in a separate process since it could segfault)
# 2. Run the generated minifier launcher script
# 3. Run the generated repro script
def _run_full_test(self, run_code, repro_after, repro_level, patch_code):
test_code = self._gen_test_code(run_code, repro_after, repro_level, patch_code)
test_proc, repro_dir = self._run_test_code(test_code)
self.assertIsNotNone(repro_dir)
print("running minifier")
launch_proc, launch_code = self._run_minifier_launcher(repro_dir)
print("running repro")
repro_proc, repro_code = self._run_repro(repro_dir)
return (test_proc, launch_proc, repro_proc), (launch_code, repro_code)
def _run_full_test_nocode(self, run_code, repro_after, repro_level, patch_code):
tbs, _ = self._run_full_test(run_code, repro_after, repro_level, patch_code)
return tbs