[dynamo] Add run_inductor_tests entrypoint (#113278)

Pull Request resolved: https://github.com/pytorch/pytorch/pull/113278
Approved by: https://github.com/yanboliang
This commit is contained in:
Jason Ansel
2023-11-10 21:47:47 -08:00
committed by PyTorch MergeBot
parent fb9a136383
commit b00311ce9e
55 changed files with 352 additions and 518 deletions

View File

@ -32,6 +32,7 @@ from torch.testing._internal.common_utils import (
disable_translation_validation_if_dynamic_shapes,
skipIfRocm,
)
from torch.testing._internal.inductor_utils import requires_cuda
# Defines all the kernels for tests
from torch.testing._internal.triton_utils import * # noqa: F403

View File

@ -1,7 +1,6 @@
# Owner(s): ["module: inductor"]
import copy
import os
import sys
import tempfile
import unittest
from typing import Dict
@ -18,38 +17,21 @@ from torch._inductor.utils import aot_inductor_launcher, cache_dir
from torch.testing import FileCheck
from torch.testing._internal import common_utils
from torch.testing._internal.common_utils import (
IS_CI,
IS_FBCODE,
IS_WINDOWS,
TEST_WITH_ROCM,
TestCase,
)
from torch.testing._internal.common_utils import IS_FBCODE, TestCase
from torch.testing._internal.inductor_utils import (
copy_tests,
HAS_CUDA,
requires_cuda,
requires_multigpu,
TestFailure,
)
from torch.testing._internal.triton_utils import (
add_kernel,
add_kernel_2d_autotuned,
add_kernel_autotuned,
triton,
)
from torch.utils import _pytree as pytree
if HAS_CUDA:
import triton
from torch.testing._internal.triton_utils import (
add_kernel,
add_kernel_2d_autotuned,
add_kernel_autotuned,
)
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
class AOTInductorModelRunner:
@classmethod
@ -1262,8 +1244,6 @@ copy_tests(
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
# cpp_extension N/A in fbcode
if HAS_CUDA and not TEST_WITH_ROCM:
run_tests(needs="filelock")
run_inductor_tests(skip_rocm=True, triton=True)

View File

@ -1,41 +1,25 @@
# Owner(s): ["module: inductor"]
import contextlib
import math
import os
import sys
import unittest
import torch
from torch._inductor import config
from torch._inductor.scheduler import Scheduler
from torch.testing._internal.common_utils import (
IS_CI,
IS_WINDOWS,
skipIfRocm,
TEST_WITH_ASAN,
TestCase as TorchTestCase,
)
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
# Make the helper files in test/ importable
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.append(pytorch_test_dir)
import contextlib
import unittest
from torch._inductor import config
from torch._inductor.scheduler import Scheduler
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
from torch.testing._internal.inductor_utils import (
check_model,
check_model_cuda,
copy_tests,
HAS_CPU,
HAS_CUDA,
)
@ -137,7 +121,6 @@ if HAS_CPU and not torch.backends.mps.is_available():
copy_tests(BenchmarkFusionTestTemplate, BenchmarkFusionCpuTest, "cpu")
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests()
run_inductor_tests()

View File

@ -1,16 +1,13 @@
# Owner(s): ["module: inductor"]
import functools
import importlib
import itertools
import sys
import unittest
import torch
from torch import nn
from torch._dynamo.testing import load_test_module
from torch._inductor import config as inductor_config
from torch.testing._internal.common_cuda import TEST_CUDNN
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
from torch.testing._internal.common_utils import TEST_WITH_ASAN
from torch.testing._internal.inductor_utils import (
check_model,
check_model_cuda,
@ -20,20 +17,7 @@ from torch.testing._internal.inductor_utils import (
skipCUDAIf,
)
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
TestCase = load_test_module(__file__, "inductor.test_inductor_freezing").TestCase
importlib.import_module("functorch")
importlib.import_module("filelock")
aten = torch.ops.aten
@ -252,7 +236,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
del BinaryFoldingTemplate
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -1,12 +1,11 @@
# Owner(s): ["module: inductor"]
import functools
import pickle
import tempfile
import unittest
from unittest.mock import patch
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch._inductor import config
from torch._inductor.codecache import (
@ -22,13 +21,7 @@ from torch.testing._internal.common_utils import (
instantiate_parametrized_tests,
parametrize,
)
from torch.testing._internal.inductor_utils import HAS_CUDA
from torch.utils._triton import has_triton
HAS_TRITON = has_triton()
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
requires_triton = functools.partial(unittest.skipIf, not HAS_TRITON, "requires triton")
from torch.testing._internal.inductor_utils import HAS_CUDA, requires_cuda
class MyModel(torch.nn.Module):
@ -96,7 +89,7 @@ class TestFxGraphCache(TestCase):
super().setUp()
counters.clear()
@requires_triton()
@requires_cuda()
@config.patch({"fx_graph_cache": True})
@parametrize("device", ("cuda", "cpu"))
@parametrize("dtype", (torch.float32, torch.bfloat16))
@ -137,7 +130,7 @@ class TestFxGraphCache(TestCase):
self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 2)
self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 1)
@requires_triton()
@requires_cuda()
@config.patch({"fx_graph_cache": True})
@parametrize("device", ("cuda", "cpu"))
@parametrize("dtype", (torch.float32, torch.float64))
@ -482,4 +475,6 @@ class TestFxGraphCacheHashing(TestCase):
if __name__ == "__main__":
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -12,7 +12,6 @@ from torch._inductor.graph import GraphLowering
from torch._inductor.virtualized import V
from torch.testing._internal.common_utils import TestCase as TorchTestCase
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
class TestCodegenTriton(TorchTestCase):
@ -71,7 +70,6 @@ class TestCodegenTriton(TorchTestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests("sympy")
run_inductor_tests()

View File

@ -7,12 +7,10 @@ import torch
import torch.nn as nn
from torch import _inductor as inductor
from torch._dynamo import compiled_autograd
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.testing import load_test_module
from torch._dynamo.utils import counters
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
# note: these tests are not run on windows due to inductor_utils.HAS_CPU
from torch.testing._internal.inductor_utils import HAS_CUDA
def compiler_fn(gm):
@ -535,5 +533,6 @@ for name, fn in test_autograd.TestAutograd.__dict__.items():
if __name__ == "__main__":
if HAS_CPU:
run_tests(needs="filelock")
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -10,13 +10,11 @@ import torch._inductor
# The rest of the optimizers not yet imported: Adamax, LBFGS, RAdam, SGD, SparseAdam
from torch.optim import Adadelta, Adagrad, Adam, AdamW, ASGD, NAdam, RMSprop, Rprop
from torch.testing._internal.common_utils import TEST_WITH_ROCM, TestCase
from torch.testing._internal.common_utils import TestCase
from torch.testing._internal.inductor_utils import (
check_model,
check_model_cuda,
HAS_CPU,
HAS_CUDA,
requires_cuda,
)
@ -215,7 +213,6 @@ class CompiledOptimizerTests(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if (HAS_CPU or HAS_CUDA) and not TEST_WITH_ROCM:
run_tests(needs="filelock")
run_inductor_tests(skip_rocm=True)

View File

@ -4,7 +4,7 @@ import unittest
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor import config
from torch.testing._internal.inductor_utils import HAS_CPU
@ -235,4 +235,6 @@ class TestInductorConfig(TestCase):
if __name__ == "__main__":
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -1,27 +1,13 @@
# Owner(s): ["module: inductor"]
import sys
import unittest
from unittest import mock
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch.testing._internal.common_utils import IS_LINUX
from torch.testing._internal.inductor_utils import HAS_CUDA
try:
import triton
except ImportError:
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires triton") # noqa: TRY200
from torch._dynamo.test_case import TestCase
from torch._inductor import config
from torch._inductor.coordinate_descent_tuner import CoordescTuner
from torch.testing._internal.triton_utils import triton
config.benchmark_kernel = True
config.coordinate_descent_tuning = True
orig_compare_config = CoordescTuner.compare_config
@ -44,6 +30,7 @@ def mock_compare_config_prefer_larger_XBLOCK(
return orig_compare_config(self, func, candidate_config, best_config, best_timing)
@config.patch(benchmark_kernel=True, coordinate_descent_tuning=True)
class TestCoordinateDescentTuner(TestCase):
def test_abs_function(self):
"""
@ -100,5 +87,6 @@ class TestCoordinateDescentTuner(TestCase):
if __name__ == "__main__":
if IS_LINUX and HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -379,8 +379,8 @@ if RUN_CUDA:
test_failures_cuda_wrapper,
)
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
if RUN_CPU or RUN_CUDA:
run_tests(needs="filelock")
if __name__ == "__main__":
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(skip_asan=True, skip_rocm=True, skip_mac=True)

View File

@ -34,7 +34,7 @@ from torch._inductor.utils import timed
from torch._inductor.virtualized import V
from torch.fx.experimental.proxy_tensor import make_fx
from torch.nn import functional as F
from torch.testing._internal.common_utils import IS_MACOS, slowTest
from torch.testing._internal.common_utils import slowTest
from torch.testing._internal.inductor_utils import (
check_model,
run_and_get_cpp_code,
@ -2600,8 +2600,6 @@ class CPUReproTests(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import HAS_CPU
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU and not IS_MACOS:
run_tests(needs="filelock")
run_inductor_tests(skip_mac=True)

View File

@ -1,6 +1,5 @@
# Owner(s): ["module: inductor"]
import math
import sys
import unittest
import torch
@ -20,17 +19,9 @@ from torch.testing._internal.common_utils import (
freeze_rng_state,
IS_FBCODE,
skipIfRocm,
TEST_WITH_ASAN,
)
from torch.testing._internal.inductor_utils import check_model_cuda, TestCase, ToTuple
try:
import triton
from triton import language as tl
except ImportError:
if __name__ == "__main__":
sys.exit(0)
raise
from torch.testing._internal.triton_utils import tl, triton
aten = torch.ops.aten
@ -1063,8 +1054,6 @@ class CudaReproTests(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import HAS_CUDA
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CUDA and not TEST_WITH_ASAN:
run_tests(needs="filelock")
run_inductor_tests(triton=True, skip_asan=True)

View File

@ -7,7 +7,6 @@ import torch
from torch._inductor import config
from torch._inductor.codecache import AsyncCompile, CUDACodeCache
from torch._inductor.codegen.cuda.cuda_env import nvcc_exist
from torch._inductor.exc import CUDACompileError
from torch.testing._internal.common_utils import TestCase as TorchTestCase
@ -86,7 +85,6 @@ class TestCUDACodeCache(TorchTestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if nvcc_exist():
run_tests("cuda")
run_inductor_tests(nvcc=True, triton=True)

View File

@ -1,9 +1,6 @@
# Owner(s): ["module: inductor"]
import contextlib
import functools
import gc
import importlib
import sys
import unittest
import warnings
@ -18,35 +15,15 @@ from torch.fx.experimental.proxy_tensor import make_fx
from torch.testing import FileCheck
from torch.testing._internal.common_utils import (
IS_CI,
IS_LINUX,
IS_WINDOWS,
skipIfRocm,
TEST_CUDA_GRAPH,
TEST_WITH_ASAN,
TestCase as TorchTestCase,
)
from torch.testing._internal.inductor_utils import HAS_CUDA, requires_multigpu
from torch.utils._python_dispatch import TorchDispatchMode
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
importlib.import_module("functorch")
importlib.import_module("filelock")
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
HAS_MULTIGPU = HAS_CUDA and torch.cuda.device_count() >= 2
aten = torch.ops.aten
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
requires_multigpu = functools.partial(
unittest.skipIf, not HAS_MULTIGPU, "requires multiple cuda devices"
)
def cdata(t):
@ -1310,12 +1287,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if not TEST_CUDA_GRAPH:
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("cuda graph test is skipped")
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests(cudagraphs=True)

View File

@ -9,7 +9,7 @@ from torch._inductor.lowering import register_lowering
from torch._inductor.virtualized import ops
from torch.testing._internal.common_utils import TestCase as TorchTestCase
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
from torch.testing._internal.inductor_utils import HAS_CUDA
# These tests check issues for lowerings that aren't in the main pytorch repo
@ -139,7 +139,6 @@ class TestCustomLowering(TorchTestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -4,16 +4,13 @@ import contextlib
import torch
import torch._inductor.pattern_matcher as pattern_matcher
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch._inductor import config
from torch._inductor.lowering import lowerings as L
from torch._inductor.pattern_matcher import Arg, CallFunction, PatternMatcherPass
from torch.testing._internal.common_utils import IS_LINUX
from torch.testing._internal.inductor_utils import HAS_CPU
@config.patch({"freezing": True})
class TestCustomPassBase(TestCase):
@ -168,5 +165,6 @@ class TestPostGradCustomPrePostPass(TestCustomPassBase):
if __name__ == "__main__":
if IS_LINUX and HAS_CPU and torch.backends.mkldnn.is_available():
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(mkl=True)

View File

@ -145,8 +145,6 @@ buf2.node.kernel = extern_kernels.mm""",
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import HAS_CPU
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -9,7 +9,7 @@ from torch._inductor.ir import Buffer, FixedLayout, Pointwise
from torch._inductor.virtualized import ops, V
from torch.testing._internal.common_utils import TestCase as TorchTestCase
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
from torch.testing._internal.inductor_utils import HAS_CUDA
class TestDependencies(TorchTestCase):
@ -58,7 +58,6 @@ class TestDependencies(TorchTestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests("sympy")
run_inductor_tests()

View File

@ -1,8 +1,6 @@
# Owner(s): ["module: inductor"]
import copy
import itertools
import sys
import unittest
import torch
from torch import nn
@ -11,18 +9,10 @@ from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch._inductor import config as inductor_config
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
from torch.testing._internal.common_utils import TEST_WITH_ASAN
from torch.testing._internal.inductor_utils import copy_tests, HAS_CPU, HAS_CUDA
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
class ConvOp(nn.Module):
expected_optimization_count = 1
@ -191,7 +181,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
del EfficientConvBNEvalTemplate
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -8,27 +8,23 @@ import torch
import torch._dynamo
import torch.utils.cpp_extension
try:
from extension_backends.extension_codegen_backend import (
ExtensionScheduling,
ExtensionWrapperCodegen,
)
except ImportError:
from .extension_backends.extension_codegen_backend import (
ExtensionScheduling,
ExtensionWrapperCodegen,
)
from torch._C import FileCheck
from torch._dynamo.testing import load_test_module
from torch._inductor import metrics
from torch._inductor.codegen.common import (
get_scheduling_for_device,
get_wrapper_codegen_for_device,
register_backend_for_device,
)
from torch.testing._internal.common_utils import IS_FBCODE, IS_MACOS
from torch.testing._internal.common_utils import IS_FBCODE
from torch.testing._internal.inductor_utils import run_and_get_cpp_code, TestCase
extension_codegen_backend = load_test_module(
__name__, "inductor.extension_backends.extension_codegen_backend"
)
ExtensionScheduling = extension_codegen_backend.ExtensionScheduling
ExtensionWrapperCodegen = extension_codegen_backend.ExtensionWrapperCodegen
def remove_build_path():
if sys.platform == "win32":
@ -127,9 +123,6 @@ class ExtensionBackendTests(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import HAS_CPU
from torch.testing._internal.inductor_utils import run_inductor_tests
# cpp_extension doesn't work in fbcode right now
if HAS_CPU and not IS_MACOS and not IS_FBCODE:
run_tests(needs="filelock")
run_inductor_tests(skip_fbcode=True, skip_mac=True)

View File

@ -10,15 +10,12 @@ from torch.testing._internal.common_utils import (
instantiate_parametrized_tests,
IS_FBCODE,
parametrize,
TEST_WITH_ROCM,
TestCase,
)
from torch.testing._internal.inductor_utils import (
check_model,
check_model_cuda,
HAS_CPU,
HAS_CUDA,
requires_cuda,
)
@ -590,7 +587,6 @@ class ForeachTests(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if (HAS_CPU or HAS_CUDA) and not TEST_WITH_ROCM:
run_tests(needs="filelock")
run_inductor_tests(skip_rocm=True)

View File

@ -5,7 +5,7 @@ import unittest
import torch
from torch import Tensor
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor import utils
from torch.testing._internal.common_cuda import SM90OrLater
from torch.testing._internal.common_utils import (
@ -13,7 +13,6 @@ from torch.testing._internal.common_utils import (
parametrize,
TEST_WITH_ROCM,
)
from torch.testing._internal.inductor_utils import HAS_CUDA
torch.set_float32_matmul_precision("high")
@ -302,5 +301,6 @@ class TestFP8Types(TestCase):
if __name__ == "__main__":
if HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True, skip_rocm=True)

View File

@ -6,14 +6,14 @@ import math
import torch
import torch._inductor.config
import torch.utils.checkpoint
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch._inductor.utils import run_and_get_code
from torch.testing._internal.common_cuda import (
PLATFORM_SUPPORTS_FUSED_ATTENTION,
SM80OrLater,
)
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
from torch.testing._internal.common_utils import skipIfRocm
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
@ -664,5 +664,6 @@ if HAS_CPU:
if __name__ == "__main__":
if IS_LINUX:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -12,7 +12,7 @@ from torch._inductor.fx_passes.pre_grad import (
transpose_matmul,
)
from torch.fx.passes.shape_prop import ShapeProp
from torch.testing._internal.common_utils import run_tests, TestCase
from torch.testing._internal.common_utils import TestCase
PassFunc = Callable[[torch.fx.GraphModule, Any], torch.fx.GraphModule]
@ -154,4 +154,6 @@ class TestFxFusion(TestCase):
if __name__ == "__main__":
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -4,7 +4,7 @@ import unittest
import torch
import torch._inductor
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch.testing._internal.inductor_utils import requires_cuda
@ -15,7 +15,6 @@ try:
has_fbgemm = True
except Exception:
has_fbgemm = False
pass
class MyModule(torch.nn.Module):
@ -433,4 +432,6 @@ class TestGroupBatchFusion(TestCase):
if __name__ == "__main__":
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -268,8 +268,6 @@ class ExprPrinterTests(TorchTestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests("sympy")
run_inductor_tests()

View File

@ -2,7 +2,6 @@
import contextlib
import functools
import itertools
import sys
import unittest
import weakref
@ -15,8 +14,6 @@ from torch.testing import FileCheck
from torch.testing._internal.common_cuda import SM80OrLater
from torch.testing._internal.common_utils import (
IS_CI,
IS_WINDOWS,
skipIfRocm,
TEST_WITH_ASAN,
TestCase as TorchTestCase,
@ -27,21 +24,11 @@ from torch.testing._internal.inductor_utils import (
copy_tests,
HAS_CPU,
HAS_CUDA,
requires_cuda,
)
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
HAS_MULTIGPU = HAS_CUDA and torch.cuda.device_count() >= 2
aten = torch.ops.aten
prims = torch.ops.prims
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
class TestCase(TorchTestCase):
@ -652,7 +639,6 @@ del OptimizeForInferenceTemplate
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -5,7 +5,7 @@ import logging
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor.utils import do_bench, do_bench_using_profiling
@ -33,4 +33,6 @@ class TestBench(TestCase):
if __name__ == "__main__":
run_tests("cuda")
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -1,9 +1,7 @@
# Owner(s): ["module: inductor"]
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch.testing._internal.common_utils import IS_LINUX
from torch.testing._internal.inductor_utils import HAS_CUDA
from torch._dynamo.test_case import TestCase
aten = torch.ops.aten
@ -65,5 +63,6 @@ class TestReinplacingPassCorrectness(TestCase):
if __name__ == "__main__":
if IS_LINUX and HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -5,11 +5,10 @@ import sys
from unittest.mock import patch
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor import config
from torch._inductor.codecache import PyCodeCache
from torch.testing import FileCheck
from torch.testing._internal.inductor_utils import HAS_CUDA
class TestKernelBenchmark(TestCase):
@ -105,5 +104,6 @@ class TestKernelBenchmark(TestCase):
if __name__ == "__main__":
if HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -5,10 +5,9 @@ import random
import torch
from torch import nn
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import same
from torch._inductor import config
from torch.testing._internal.inductor_utils import HAS_CUDA
USE_DDP_WRAPPER = os.environ.get("USE_DDP_WRAPPER", "1") == "1"
@ -285,5 +284,6 @@ class TestLayoutOptim(TestCase):
if __name__ == "__main__":
if HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -6,7 +6,7 @@ from typing import Callable, List, Optional
import torch
from torch import multiprocessing as mp
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.testing import reset_rng_state
from torch._dynamo.utils import counters
from torch._inductor import config
@ -34,7 +34,7 @@ from torch.testing._internal.common_utils import (
skipIfRocm,
)
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
from torch.testing._internal.inductor_utils import HAS_CUDA
torch.set_float32_matmul_precision("high")
if HAS_CUDA:
@ -755,8 +755,6 @@ class TestTuningProcess(TestCase):
if __name__ == "__main__":
from torch._inductor.utils import is_big_gpu
from torch.testing._internal.inductor_utils import run_inductor_tests
# Set env to make it work in CI.
if HAS_CUDA and HAS_CPU and is_big_gpu(0):
run_tests()
run_inductor_tests(triton=True, big_gpu=True)

View File

@ -1,30 +1,18 @@
# Owner(s): ["module: inductor"]
import sys
import unittest
from typing import List
import torch
from torch._C import FileCheck
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.testing import load_test_module
from torch._dynamo.utils import same
from torch._inductor import config
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfRocm
from torch.testing._internal.common_utils import skipIfRocm
from torch.testing._internal.inductor_utils import run_and_get_cpp_code
from torch.utils._triton import has_triton
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_memory_planning yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
@unittest.skipIf(not has_triton(), "Inductor+gpu needs triton and recent GPU arch")
@config.patch(memory_planning=True)
class TestMemoryPlanning(TestCase):
def _generate(self, *, device):
@ -118,4 +106,6 @@ class TestMemoryPlanning(TestCase):
if __name__ == "__main__":
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -1,5 +1,4 @@
# Owner(s): ["module: inductor"]
import functools
import unittest
from unittest.mock import patch
@ -7,11 +6,8 @@ import torch._dynamo.config as dynamo_config
import torch._inductor.config as inductor_config
from torch._dynamo.test_minifier_common import MinifierTestBase
from torch._inductor import config
from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN
from torch.utils._triton import has_triton
_HAS_TRITON = has_triton()
requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")
from torch.testing._internal.common_utils import IS_JETSON
from torch.testing._internal.inductor_utils import requires_cuda
class MinifierTests(MinifierTestBase):
@ -173,9 +169,6 @@ inner(torch.randn(20, 20))
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
# Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
# also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
if not IS_MACOS and not TEST_WITH_ASAN:
run_tests()
run_inductor_tests(skip_mac=True, skip_asan=True)

View File

@ -1,19 +1,11 @@
# Owner(s): ["module: inductor"]
import functools
import sys
import unittest
import torch._inductor.config as inductor_config
from torch._dynamo.test_minifier_common import MinifierTestBase
from torch.testing._internal.common_utils import (
IS_JETSON,
IS_MACOS,
skipIfRocm,
TEST_WITH_ASAN,
)
from torch.utils._triton import has_triton
_HAS_TRITON = has_triton()
requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")
from torch.testing._internal.common_utils import IS_JETSON, skipIfRocm
from torch.testing._internal.inductor_utils import requires_cuda
# These minifier tests are slow, because they must be run in separate
@ -45,12 +37,10 @@ inner(torch.randn(2, 2).to("{device}"))
if __name__ == "__main__":
import sys
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
# Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
# also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
# also skip on Py 3.11+ since unhandled exceptions can cause segfaults
if not IS_MACOS and not TEST_WITH_ASAN and sys.version_info < (3, 11):
run_tests()
if sys.version_info < (3, 11):
run_inductor_tests(skip_mac=True, skip_asan=True)

View File

@ -6,7 +6,7 @@ import torch
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
from torch._dynamo import config as dynamo_config
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch._export import capture_pre_autograd_graph
from torch._inductor import config
@ -23,8 +23,8 @@ from torch.testing._internal.common_quantization import (
skipIfNoONEDNN,
skipIfNoONEDNNBF16,
)
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
from torch.testing._internal.inductor_utils import _check_has_dynamic_shape, HAS_CPU
from torch.testing._internal.common_utils import skipIfRocm
from torch.testing._internal.inductor_utils import _check_has_dynamic_shape
# The dict value is match_nodes(computation_op+unary_op)
@ -1533,5 +1533,6 @@ class TestDynamicPatternMatcher(TestPatternMatcherBase):
if __name__ == "__main__":
if IS_LINUX and HAS_CPU and torch.backends.mkldnn.is_available():
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(mkl=True)

View File

@ -7,12 +7,7 @@ from typing import List, Tuple, Union
import torch
from torch.testing._internal.common_device_type import instantiate_device_type_tests
from torch.testing._internal.common_nn import NNTestCase
from torch.testing._internal.common_utils import (
IS_WINDOWS,
parametrize,
run_tests,
TEST_CUDA,
)
from torch.testing._internal.common_utils import parametrize, TEST_CUDA
from torch.utils._triton import has_triton
@ -180,7 +175,8 @@ class TestDecomp(NNTestCase):
device_types = ("cpu", "cuda")
instantiate_device_type_tests(TestDecomp, globals(), only_for=device_types)
if __name__ == "__main__":
# We don't support torch.compile() on Windows presently
if not IS_WINDOWS:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -5,7 +5,7 @@ import unittest
import torch
import torch._dynamo.config as dynamo_config
import torch._inductor.config as inductor_config
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import count_calls, counters
from torch._higher_order_ops.out_dtype import out_dtype
from torch._inductor.fx_passes import joint_graph
@ -29,8 +29,7 @@ from torch._inductor.utils import run_and_get_code
from torch._inductor.virtualized import V
from torch.testing import FileCheck
from torch.testing._internal.common_cuda import SM80OrLater
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
from torch.testing._internal.inductor_utils import HAS_CUDA
from torch.testing._internal.common_utils import skipIfRocm
class TestPatternMatcher(TestCase):
@ -1068,5 +1067,6 @@ class TestPatternMatcher(TestCase):
if __name__ == "__main__":
if IS_LINUX and HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -13,12 +13,10 @@ from torch.testing._internal.common_utils import (
skipIfRocm,
TestCase as TorchTestCase,
)
from torch.testing._internal.inductor_utils import requires_cuda
# Defines all the kernels for tests
from torch.testing._internal.triton_utils import HAS_CUDA, requires_cuda
if HAS_CUDA:
from torch.testing._internal.triton_utils import add_kernel
from torch.testing._internal.triton_utils import add_kernel
aten = torch.ops.aten
@ -840,7 +838,6 @@ class WouldBeNiceIfItWorked:
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests(triton=True)

View File

@ -9,7 +9,7 @@ import torch._inductor.utils
from torch._inductor import config
from torch.profiler import ProfilerActivity
from torch.testing._internal.common_utils import TemporaryFileName, TEST_WITH_ROCM
from torch.testing._internal.common_utils import TemporaryFileName
from torch.utils._triton import has_triton
@ -120,7 +120,6 @@ class DynamoProfilerTests(torch._dynamo.test_case.TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if not TEST_WITH_ROCM:
run_tests()
run_inductor_tests(skip_rocm=True)

View File

@ -7,13 +7,12 @@ import torch._dynamo.config as dynamo_config
import torch._inductor.config as inductor_config
import torch._inductor.select_algorithm as select_algorithm
import torch.nn.functional as F
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.testing import expectedFailureDynamicWrapper
from torch._dynamo.utils import counters
from torch._inductor.autotune_process import TritonBenchmarkRequest
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
from torch.testing._internal.inductor_utils import HAS_CUDA
from torch.testing._internal.common_utils import skipIfRocm
aten = torch.ops.aten
@ -346,7 +345,6 @@ class TestSelectAlgorithm(TestCase):
if __name__ == "__main__":
from torch._inductor.utils import is_big_gpu
from torch.testing._internal.inductor_utils import run_inductor_tests
if IS_LINUX and HAS_CUDA and is_big_gpu(0):
run_tests()
run_inductor_tests(triton=True, big_gpu=True)

View File

@ -5,7 +5,7 @@ import unittest
import torch
import torch._logging
from torch.testing._internal.common_utils import IS_LINUX, TestCase
from torch.testing._internal.common_utils import TestCase
from torch.testing._internal.inductor_utils import HAS_CUDA
@ -59,8 +59,6 @@ class SmokeTest(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if IS_LINUX and torch.cuda.is_available():
if torch.cuda.get_device_properties(0).major > 5:
run_tests()
run_inductor_tests(triton=True, big_gpu=True)

View File

@ -5,7 +5,6 @@ import torch
from torch._inductor import metrics
from torch._inductor.compile_fx import compile_fx, count_bytes_inner
from torch.testing._internal.common_utils import TestCase as TorchTestCase
from torch.testing._internal.inductor_utils import HAS_CUDA
aten = torch.ops.aten
@ -166,7 +165,6 @@ class MemoryBoundedTests(TestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests(triton=True)

View File

@ -1,10 +1,8 @@
# Owner(s): ["module: inductor"]
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._dynamo.utils import counters
from torch.testing._internal.common_utils import IS_LINUX
from torch.testing._internal.inductor_utils import HAS_CUDA
def patch(f):
@ -1066,5 +1064,6 @@ class TestSplitCatFxPasses(TestCase):
if __name__ == "__main__":
if IS_LINUX and HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -1,11 +1,10 @@
# Owner(s): ["module: inductor"]
import torch
from torch import _dynamo as dynamo, _inductor as inductor
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor.utils import gen_gm_and_inputs
from torch.fx import symbolic_trace
from torch.fx.experimental.proxy_tensor import make_fx
from torch.testing._internal.inductor_utils import HAS_CPU
class MyModule(torch.nn.Module):
@ -111,5 +110,6 @@ class TestStandaloneInductor(TestCase):
if __name__ == "__main__":
if HAS_CPU:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -4,7 +4,6 @@ import copy
import dataclasses
import functools
import gc
import importlib
import itertools
import math
import operator
@ -32,9 +31,14 @@ from torch._dynamo.testing import (
rand_strided,
same,
)
from torch._inductor import config, test_operators
from torch._inductor.codegen.common import DataTypePropagation, OptimizationContext
from torch._inductor.compile_fx import compile_fx, compile_fx_inner
from torch._inductor.utils import (
add_scheduler_init_hook,
has_torchvision_roi_align,
run_and_get_code,
run_and_get_triton_code,
)
@ -51,34 +55,15 @@ from torch.testing._internal.common_cuda import (
from torch.testing._internal.common_device_type import _has_sufficient_memory
from torch.testing._internal.common_dtype import all_types
from torch.testing._internal.common_utils import (
DeterministicGuard,
IS_CI,
IS_FBCODE,
IS_WINDOWS,
IS_X86,
skipIfRocm,
slowTest,
TEST_WITH_ASAN,
)
from torch.utils import _pytree as pytree
from torch.utils._python_dispatch import TorchDispatchMode
from torch.utils.weak import WeakTensorKeyDictionary
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
)
sys.exit(0)
importlib.import_module("functorch")
importlib.import_module("filelock")
from torch._inductor import config, test_operators
from torch._inductor.compile_fx import compile_fx, compile_fx_inner
from torch._inductor.utils import has_torchvision_roi_align
from torch.testing._internal.common_utils import slowTest
from torch.testing._internal.inductor_utils import (
check_model,
check_model_cuda,
@ -89,11 +74,15 @@ from torch.testing._internal.inductor_utils import (
requires_cuda,
requires_multigpu,
run_and_get_cpp_code,
skip_if_mac,
skip_if_x86_mac,
skipCUDAIf,
TestCase,
ToTuple,
)
from torch.utils import _pytree as pytree
from torch.utils._python_dispatch import TorchDispatchMode
from torch.utils.weak import WeakTensorKeyDictionary
aten = torch.ops.aten
@ -759,6 +748,7 @@ class CommonTemplate:
for dtype in dtypes:
self.common(fn, (torch.randn(8, 8).to(dtype), torch.randn(8, 8).to(dtype)))
@skip_if_mac()
def test_min_max_reduction_nan(self):
def fn(a):
return (torch.max(a), torch.min(a))
@ -8014,7 +8004,6 @@ if HAS_CPU:
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -1,17 +1,10 @@
# Owner(s): ["module: inductor"]
import sys
import unittest
import torch
from torch._dynamo.testing import load_test_module
from torch._inductor.compile_fx import compile_fx
from torch._inductor.utils import run_and_get_triton_code
from torch.testing._internal.common_utils import (
IS_CI,
IS_WINDOWS,
TEST_WITH_ASAN,
TestCase,
)
from torch.testing._internal.common_utils import TEST_WITH_ASAN, TestCase
from torch.testing._internal.inductor_utils import (
_check_has_dynamic_shape,
@ -28,15 +21,6 @@ CommonTemplate = load_test_module(
).CommonTemplate
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor_codegen_dynamic_shapes yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
# Checks for patterns in generated C++/Triton code to see if it's dynamic
def check_codegen(
self: TestCase,
@ -328,7 +312,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
if HAS_CPU or HAS_CUDA:
run_tests(needs="filelock")
run_inductor_tests()

View File

@ -1,8 +1,6 @@
# Owner(s): ["module: inductor"]
import contextlib
import math
import sys
import unittest
from functools import partial
import torch
@ -15,8 +13,6 @@ from torch.testing._internal.common_device_type import (
onlyCUDA,
)
from torch.testing._internal.common_utils import (
IS_CI,
IS_WINDOWS,
TEST_WITH_ASAN,
TEST_WITH_ROCM,
TestCase,
@ -36,15 +32,6 @@ CommonTemplate = load_test_module(
__file__, "inductor.test_torchinductor"
).CommonTemplate
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for test_torchinductor_dynamic_shapes yet\n"
)
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires sympy/functorch/filelock")
# xfail by default, set is_skip=True to skip
test_failures = {
"test_kwargs_dynamic_shapes": TestFailure(("cpu",)),
@ -435,8 +422,7 @@ class TestInductorDynamic(TestCase):
instantiate_device_type_tests(TestInductorDynamic, globals())
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch.testing._internal.inductor_utils import run_inductor_tests
# Slow on ASAN after https://github.com/pytorch/pytorch/pull/94068
if (HAS_CPU or HAS_CUDA) and not TEST_WITH_ASAN:
run_tests(needs="filelock")
run_inductor_tests(skip_asan=True)

View File

@ -12,7 +12,6 @@ from unittest.mock import patch
import torch
from torch._dispatch.python import enable_python_dispatcher
from torch._dynamo.test_case import run_tests
from torch._subclasses.fake_tensor import (
DataDependentOutputException,
DynamicOutputShapeException,
@ -617,4 +616,6 @@ class TestInductorOpInfo(TestCase):
instantiate_device_type_tests(TestInductorOpInfo, globals())
if __name__ == "__main__":
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests()

View File

@ -1,19 +1,7 @@
# Owner(s): ["module: inductor"]
import sys
import unittest
from torch.testing._internal.common_utils import IS_LINUX
from torch.testing._internal.inductor_utils import HAS_CUDA
try:
import triton # noqa: F401
except ImportError:
if __name__ == "__main__":
sys.exit(0)
raise unittest.SkipTest("requires triton") # noqa: TRY200
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor import config
from torch._inductor.triton_heuristics import triton_config
@ -32,5 +20,6 @@ class TestTritonHeuristics(TestCase):
if __name__ == "__main__":
if IS_LINUX and HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -4,9 +4,8 @@ import subprocess
import sys
import torch
from torch._dynamo.test_case import run_tests, TestCase
from torch._dynamo.test_case import TestCase
from torch._inductor.codecache import PyCodeCache
from torch.testing._internal.inductor_utils import HAS_CUDA
class TestTritonWrapper(TestCase):
@ -49,5 +48,6 @@ class TestTritonWrapper(TestCase):
if __name__ == "__main__":
if HAS_CUDA:
run_tests()
from torch.testing._internal.inductor_utils import run_inductor_tests
run_inductor_tests(triton=True)

View File

@ -5,8 +5,7 @@ import torch
from torch._dynamo import config as dynamo_config
from torch._inductor import config as inductor_config
from torch.testing._internal.common_utils import IS_LINUX, TestCase as TorchTestCase
from torch.testing._internal.inductor_utils import HAS_CUDA
from torch.testing._internal.common_utils import TestCase as TorchTestCase
class TestUnbackedSymints(TorchTestCase):
@ -56,8 +55,6 @@ class TestUnbackedSymints(TorchTestCase):
if __name__ == "__main__":
from torch._dynamo.test_case import run_tests
from torch._inductor.utils import is_big_gpu
from torch.testing._internal.inductor_utils import run_inductor_tests
if IS_LINUX and HAS_CUDA and is_big_gpu(0):
run_tests()
run_inductor_tests(triton=True, big_gpu=True)

View File

@ -1,6 +1,7 @@
import contextlib
import os
import pathlib
import sys
import time
from subprocess import CalledProcessError
@ -14,6 +15,11 @@ from torch.testing._internal.common_utils import (
IS_FBCODE,
IS_MACOS,
IS_X86,
IS_WINDOWS,
IS_CI,
TEST_WITH_ASAN,
TEST_CUDA_GRAPH,
TEST_WITH_ROCM,
)
from torch._dynamo.backends.registry import register_backend
from torch._inductor.compile_fx import compile_fx, count_bytes_inner
@ -28,6 +34,9 @@ from torch.utils import _pytree as pytree
from torch.utils._pytree import tree_flatten, tree_unflatten
from typing import Tuple
from torch._dynamo.testing import make_test_cls_with_patches
from torch._inductor.codegen.cuda.cuda_env import nvcc_exist
from torch._inductor.utils import is_big_gpu
def test_cpu():
try:
@ -41,14 +50,26 @@ def test_cpu():
):
return False
HAS_CPU = LazyVal(test_cpu)
def gpu_is_old():
if has_triton() and torch.cuda.is_available() and not TEST_WITH_ROCM:
device_props = torch.cuda.get_device_properties(0)
# some of our CI machines use M60's which can't run Triton
if device_props.major < 7:
return True
return False
HAS_CPU = LazyVal(test_cpu)
HAS_CUDA = has_triton() and not gpu_is_old()
HAS_CUDA = has_triton()
@register_backend
def count_bytes_inductor(gm, example_inputs):
return compile_fx(gm, example_inputs, inner_compile=count_bytes_inner)
def _check_has_dynamic_shape(
self: TestCase,
code,
@ -91,16 +112,21 @@ requires_multigpu = functools.partial(
skip_if_x86_mac = functools.partial(
unittest.skipIf, IS_MACOS and IS_X86, "Does not work on x86 Mac"
)
skip_if_mac = functools.partial(
unittest.skipIf, IS_MACOS, "Does not work on Mac"
)
vec_dtypes = [torch.float, torch.bfloat16, torch.float16]
@dataclasses.dataclass
class TestFailure:
suffixes: Tuple[str]
is_skip: bool = False
__test__: bool = False
def copy_tests(
my_cls, other_cls, suffix, test_failures=None, xfail_prop=None
my_cls, other_cls, suffix, test_failures=None, xfail_prop=None
): # noqa: B902
for name, value in my_cls.__dict__.items():
if name.startswith("test_"):
@ -146,7 +172,6 @@ def clone_preserve_strides(x, device=None):
return out
def compute_grads(args, kwrags, results, grads):
def gather_leaf_tensors(args, kwargs):
args = pytree.arg_tree_leaves(*args, **kwargs)
@ -171,22 +196,22 @@ def compute_grads(args, kwrags, results, grads):
def check_model(
self: TestCase,
model,
example_inputs,
kwargs=None,
*,
atol=None,
rtol=None,
check_lowp=True,
exact_dtype=True,
nopython=True,
copy_to_cuda=True,
reference_in_float=True,
assert_equal=True,
check_gradient=False,
check_has_compiled=True,
output_process_fn_grad=lambda x: x,
self: TestCase,
model,
example_inputs,
kwargs=None,
*,
atol=None,
rtol=None,
check_lowp=True,
exact_dtype=True,
nopython=True,
copy_to_cuda=True,
reference_in_float=True,
assert_equal=True,
check_gradient=False,
check_has_compiled=True,
output_process_fn_grad=lambda x: x,
):
kwargs = kwargs or {}
torch._dynamo.reset()
@ -201,7 +226,7 @@ def check_model(
def upcast_fn(x):
nonlocal has_lowp_args
if isinstance(x, torch.Tensor) and (
x.dtype == torch.float16 or x.dtype == torch.bfloat16
x.dtype == torch.float16 or x.dtype == torch.bfloat16
):
has_lowp_args = True
return x.float()
@ -350,22 +375,22 @@ def check_model(
@torch._inductor.config.patch("triton.cudagraphs", False)
def check_model_cuda(
self: TestCase,
model,
example_inputs,
kwargs=None,
*,
atol=None,
rtol=None,
check_lowp=True,
exact_dtype=True,
nopython=True,
copy_to_cuda=True,
reference_in_float=True,
assert_equal=True,
check_gradient=False,
check_has_compiled=True,
output_process_fn_grad=lambda x: x,
self: TestCase,
model,
example_inputs,
kwargs=None,
*,
atol=None,
rtol=None,
check_lowp=True,
exact_dtype=True,
nopython=True,
copy_to_cuda=True,
reference_in_float=True,
assert_equal=True,
check_gradient=False,
check_has_compiled=True,
output_process_fn_grad=lambda x: x,
):
kwargs = kwargs or {}
if hasattr(model, "to"):
@ -421,6 +446,8 @@ def check_model_cuda(
check_has_compiled=check_has_compiled,
output_process_fn_grad=output_process_fn_grad,
)
def run_and_get_cpp_code(fn, *args, **kwargs):
# We use the patch context manager instead of using it as a decorator.
# In this way, we can ensure that the attribute is patched and unpatched correctly
@ -443,6 +470,7 @@ def run_and_get_cpp_code(fn, *args, **kwargs):
output_code_log.removeHandler(ch)
return result, s
class TestCase(TorchTestCase):
@classmethod
def setUpClass(cls):
@ -478,6 +506,8 @@ class TestCase(TorchTestCase):
if os.environ.get("ERROR_ON_SLOW") == "1":
elapsed = time.perf_counter() - self._start
assert elapsed < 120
class ToTuple(torch.nn.Module):
def forward(self, x):
return (x,)
@ -494,3 +524,56 @@ def make_dynamic_cls(cls, xfail_prop="_expected_failure_dynamic"):
def filesize(filename: pathlib.Path):
assert filename.exists(), f"{filename} is missing"
return os.stat(filename).st_size
def run_inductor_tests(
*,
skip_rocm=False,
skip_asan=False,
nvcc=False,
cudagraphs=False,
mkl=False,
skip_fbcode=False,
skip_mac=False,
triton=False,
big_gpu=False,
):
if IS_WINDOWS and IS_CI:
sys.stderr.write(
"Windows CI does not have necessary dependencies for inductor yet\n"
)
return
if not (HAS_CPU or HAS_CUDA):
sys.stderr.write("Missing both CPU compiler and Triton compiler\n")
return
if skip_rocm and TEST_WITH_ROCM:
sys.stderr.write("Skipping due to rocm\n")
return
if skip_asan and TEST_WITH_ASAN:
sys.stderr.write("Skipping due to asan\n")
return
if nvcc and not nvcc_exist():
sys.stderr.write("Skipping due to nvcc\n")
return
if cudagraphs and not TEST_CUDA_GRAPH:
sys.stderr.write("Skipping due to cudagraphs\n")
return
if mkl and not torch.backends.mkldnn.is_available():
sys.stderr.write("Skipping due to mkl\n")
return
if skip_fbcode and IS_FBCODE:
sys.stderr.write("Skipping due to fbcode\n")
return
if skip_mac and IS_MACOS:
sys.stderr.write("Skipping due to mac\n")
return
if (triton or big_gpu) and not HAS_CUDA:
sys.stderr.write("Skipping due to triton\n")
return
if big_gpu and not is_big_gpu(0):
sys.stderr.write("Skipping due to is_big_gpu\n")
return
from torch._dynamo.test_case import run_tests
return run_tests(("filelock", "sympy"))

View File

@ -1,10 +1,5 @@
import functools
import unittest
from torch.testing._internal.inductor_utils import HAS_CUDA
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
if HAS_CUDA:
import triton
from triton import language as tl
@ -134,3 +129,14 @@ if HAS_CUDA:
mul2_inplace_kernel(in_ptr0, n_elements, BLOCK_SIZE=BLOCK_SIZE)
x = tl.load(in_ptr0 + offsets, mask=mask)
tl.store(out_ptr + offsets, x, mask=mask)
else:
triton = None
tl = None
add_kernel = None
add_kernel_autotuned = None
add_kernel_2d_autotuned = None
mul2_kernel = None
mul2_inplace_kernel = None
zero_negs = None
indirection_kernel = None