mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[dynamo] Add run_inductor_tests entrypoint (#113278)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/113278 Approved by: https://github.com/yanboliang
This commit is contained in:
committed by
PyTorch MergeBot
parent
fb9a136383
commit
b00311ce9e
@ -32,6 +32,7 @@ from torch.testing._internal.common_utils import (
|
||||
disable_translation_validation_if_dynamic_shapes,
|
||||
skipIfRocm,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import requires_cuda
|
||||
|
||||
# Defines all the kernels for tests
|
||||
from torch.testing._internal.triton_utils import * # noqa: F403
|
||||
|
@ -1,7 +1,6 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import copy
|
||||
import os
|
||||
import sys
|
||||
import tempfile
|
||||
import unittest
|
||||
from typing import Dict
|
||||
@ -18,38 +17,21 @@ from torch._inductor.utils import aot_inductor_launcher, cache_dir
|
||||
from torch.testing import FileCheck
|
||||
from torch.testing._internal import common_utils
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_FBCODE,
|
||||
IS_WINDOWS,
|
||||
TEST_WITH_ROCM,
|
||||
TestCase,
|
||||
)
|
||||
from torch.testing._internal.common_utils import IS_FBCODE, TestCase
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
copy_tests,
|
||||
HAS_CUDA,
|
||||
requires_cuda,
|
||||
requires_multigpu,
|
||||
TestFailure,
|
||||
)
|
||||
from torch.testing._internal.triton_utils import (
|
||||
add_kernel,
|
||||
add_kernel_2d_autotuned,
|
||||
add_kernel_autotuned,
|
||||
triton,
|
||||
)
|
||||
from torch.utils import _pytree as pytree
|
||||
|
||||
if HAS_CUDA:
|
||||
import triton
|
||||
from torch.testing._internal.triton_utils import (
|
||||
add_kernel,
|
||||
add_kernel_2d_autotuned,
|
||||
add_kernel_autotuned,
|
||||
)
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
|
||||
class AOTInductorModelRunner:
|
||||
@classmethod
|
||||
@ -1262,8 +1244,6 @@ copy_tests(
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
# cpp_extension N/A in fbcode
|
||||
if HAS_CUDA and not TEST_WITH_ROCM:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(skip_rocm=True, triton=True)
|
||||
|
@ -1,41 +1,25 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import contextlib
|
||||
import math
|
||||
import os
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.scheduler import Scheduler
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_WINDOWS,
|
||||
skipIfRocm,
|
||||
TEST_WITH_ASAN,
|
||||
TestCase as TorchTestCase,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
# Make the helper files in test/ importable
|
||||
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
sys.path.append(pytorch_test_dir)
|
||||
|
||||
import contextlib
|
||||
import unittest
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.scheduler import Scheduler
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
check_model,
|
||||
check_model_cuda,
|
||||
copy_tests,
|
||||
HAS_CPU,
|
||||
HAS_CUDA,
|
||||
)
|
||||
|
||||
|
||||
@ -137,7 +121,6 @@ if HAS_CPU and not torch.backends.mps.is_available():
|
||||
copy_tests(BenchmarkFusionTestTemplate, BenchmarkFusionCpuTest, "cpu")
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests()
|
||||
run_inductor_tests()
|
||||
|
@ -1,16 +1,13 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import functools
|
||||
import importlib
|
||||
import itertools
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch._dynamo.testing import load_test_module
|
||||
from torch._inductor import config as inductor_config
|
||||
from torch.testing._internal.common_cuda import TEST_CUDNN
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
|
||||
from torch.testing._internal.common_utils import TEST_WITH_ASAN
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
check_model,
|
||||
check_model_cuda,
|
||||
@ -20,20 +17,7 @@ from torch.testing._internal.inductor_utils import (
|
||||
skipCUDAIf,
|
||||
)
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
TestCase = load_test_module(__file__, "inductor.test_inductor_freezing").TestCase
|
||||
|
||||
importlib.import_module("functorch")
|
||||
importlib.import_module("filelock")
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
|
||||
@ -252,7 +236,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
|
||||
del BinaryFoldingTemplate
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -1,12 +1,11 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import functools
|
||||
import pickle
|
||||
import tempfile
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor import config
|
||||
from torch._inductor.codecache import (
|
||||
@ -22,13 +21,7 @@ from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
HAS_TRITON = has_triton()
|
||||
|
||||
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
|
||||
requires_triton = functools.partial(unittest.skipIf, not HAS_TRITON, "requires triton")
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA, requires_cuda
|
||||
|
||||
|
||||
class MyModel(torch.nn.Module):
|
||||
@ -96,7 +89,7 @@ class TestFxGraphCache(TestCase):
|
||||
super().setUp()
|
||||
counters.clear()
|
||||
|
||||
@requires_triton()
|
||||
@requires_cuda()
|
||||
@config.patch({"fx_graph_cache": True})
|
||||
@parametrize("device", ("cuda", "cpu"))
|
||||
@parametrize("dtype", (torch.float32, torch.bfloat16))
|
||||
@ -137,7 +130,7 @@ class TestFxGraphCache(TestCase):
|
||||
self.assertEqual(counters["inductor"]["fxgraph_cache_miss"], 2)
|
||||
self.assertEqual(counters["inductor"]["fxgraph_cache_hit"], 1)
|
||||
|
||||
@requires_triton()
|
||||
@requires_cuda()
|
||||
@config.patch({"fx_graph_cache": True})
|
||||
@parametrize("device", ("cuda", "cpu"))
|
||||
@parametrize("dtype", (torch.float32, torch.float64))
|
||||
@ -482,4 +475,6 @@ class TestFxGraphCacheHashing(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -12,7 +12,6 @@ from torch._inductor.graph import GraphLowering
|
||||
from torch._inductor.virtualized import V
|
||||
|
||||
from torch.testing._internal.common_utils import TestCase as TorchTestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
|
||||
class TestCodegenTriton(TorchTestCase):
|
||||
@ -71,7 +70,6 @@ class TestCodegenTriton(TorchTestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests("sympy")
|
||||
run_inductor_tests()
|
||||
|
@ -7,12 +7,10 @@ import torch
|
||||
import torch.nn as nn
|
||||
from torch import _inductor as inductor
|
||||
from torch._dynamo import compiled_autograd
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.testing import load_test_module
|
||||
from torch._dynamo.utils import counters
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
# note: these tests are not run on windows due to inductor_utils.HAS_CPU
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
def compiler_fn(gm):
|
||||
@ -535,5 +533,6 @@ for name, fn in test_autograd.TestAutograd.__dict__.items():
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if HAS_CPU:
|
||||
run_tests(needs="filelock")
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -10,13 +10,11 @@ import torch._inductor
|
||||
# The rest of the optimizers not yet imported: Adamax, LBFGS, RAdam, SGD, SparseAdam
|
||||
from torch.optim import Adadelta, Adagrad, Adam, AdamW, ASGD, NAdam, RMSprop, Rprop
|
||||
|
||||
from torch.testing._internal.common_utils import TEST_WITH_ROCM, TestCase
|
||||
from torch.testing._internal.common_utils import TestCase
|
||||
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
check_model,
|
||||
check_model_cuda,
|
||||
HAS_CPU,
|
||||
HAS_CUDA,
|
||||
requires_cuda,
|
||||
)
|
||||
|
||||
@ -215,7 +213,6 @@ class CompiledOptimizerTests(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if (HAS_CPU or HAS_CUDA) and not TEST_WITH_ROCM:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(skip_rocm=True)
|
||||
|
@ -4,7 +4,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
|
||||
from torch._inductor import config
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
@ -235,4 +235,6 @@ class TestInductorConfig(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -1,27 +1,13 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from unittest import mock
|
||||
|
||||
import torch
|
||||
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
try:
|
||||
import triton
|
||||
except ImportError:
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires triton") # noqa: TRY200
|
||||
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._inductor import config
|
||||
from torch._inductor.coordinate_descent_tuner import CoordescTuner
|
||||
from torch.testing._internal.triton_utils import triton
|
||||
|
||||
config.benchmark_kernel = True
|
||||
config.coordinate_descent_tuning = True
|
||||
|
||||
orig_compare_config = CoordescTuner.compare_config
|
||||
|
||||
@ -44,6 +30,7 @@ def mock_compare_config_prefer_larger_XBLOCK(
|
||||
return orig_compare_config(self, func, candidate_config, best_config, best_timing)
|
||||
|
||||
|
||||
@config.patch(benchmark_kernel=True, coordinate_descent_tuning=True)
|
||||
class TestCoordinateDescentTuner(TestCase):
|
||||
def test_abs_function(self):
|
||||
"""
|
||||
@ -100,5 +87,6 @@ class TestCoordinateDescentTuner(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -379,8 +379,8 @@ if RUN_CUDA:
|
||||
test_failures_cuda_wrapper,
|
||||
)
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
|
||||
if RUN_CPU or RUN_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
if __name__ == "__main__":
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(skip_asan=True, skip_rocm=True, skip_mac=True)
|
||||
|
@ -34,7 +34,7 @@ from torch._inductor.utils import timed
|
||||
from torch._inductor.virtualized import V
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch.nn import functional as F
|
||||
from torch.testing._internal.common_utils import IS_MACOS, slowTest
|
||||
from torch.testing._internal.common_utils import slowTest
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
check_model,
|
||||
run_and_get_cpp_code,
|
||||
@ -2600,8 +2600,6 @@ class CPUReproTests(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU and not IS_MACOS:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(skip_mac=True)
|
||||
|
@ -1,6 +1,5 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import math
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
@ -20,17 +19,9 @@ from torch.testing._internal.common_utils import (
|
||||
freeze_rng_state,
|
||||
IS_FBCODE,
|
||||
skipIfRocm,
|
||||
TEST_WITH_ASAN,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import check_model_cuda, TestCase, ToTuple
|
||||
|
||||
try:
|
||||
import triton
|
||||
from triton import language as tl
|
||||
except ImportError:
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise
|
||||
from torch.testing._internal.triton_utils import tl, triton
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
@ -1063,8 +1054,6 @@ class CudaReproTests(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CUDA and not TEST_WITH_ASAN:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(triton=True, skip_asan=True)
|
||||
|
@ -7,7 +7,6 @@ import torch
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.codecache import AsyncCompile, CUDACodeCache
|
||||
from torch._inductor.codegen.cuda.cuda_env import nvcc_exist
|
||||
from torch._inductor.exc import CUDACompileError
|
||||
from torch.testing._internal.common_utils import TestCase as TorchTestCase
|
||||
|
||||
@ -86,7 +85,6 @@ class TestCUDACodeCache(TorchTestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if nvcc_exist():
|
||||
run_tests("cuda")
|
||||
run_inductor_tests(nvcc=True, triton=True)
|
||||
|
@ -1,9 +1,6 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import contextlib
|
||||
import functools
|
||||
import gc
|
||||
import importlib
|
||||
import sys
|
||||
import unittest
|
||||
import warnings
|
||||
|
||||
@ -18,35 +15,15 @@ from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch.testing import FileCheck
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_LINUX,
|
||||
IS_WINDOWS,
|
||||
skipIfRocm,
|
||||
TEST_CUDA_GRAPH,
|
||||
TEST_WITH_ASAN,
|
||||
TestCase as TorchTestCase,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA, requires_multigpu
|
||||
from torch.utils._python_dispatch import TorchDispatchMode
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
importlib.import_module("functorch")
|
||||
importlib.import_module("filelock")
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
HAS_MULTIGPU = HAS_CUDA and torch.cuda.device_count() >= 2
|
||||
aten = torch.ops.aten
|
||||
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
|
||||
requires_multigpu = functools.partial(
|
||||
unittest.skipIf, not HAS_MULTIGPU, "requires multiple cuda devices"
|
||||
)
|
||||
|
||||
|
||||
def cdata(t):
|
||||
@ -1310,12 +1287,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if not TEST_CUDA_GRAPH:
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("cuda graph test is skipped")
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(cudagraphs=True)
|
||||
|
@ -9,7 +9,7 @@ from torch._inductor.lowering import register_lowering
|
||||
from torch._inductor.virtualized import ops
|
||||
|
||||
from torch.testing._internal.common_utils import TestCase as TorchTestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
# These tests check issues for lowerings that aren't in the main pytorch repo
|
||||
@ -139,7 +139,6 @@ class TestCustomLowering(TorchTestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -4,16 +4,13 @@ import contextlib
|
||||
import torch
|
||||
import torch._inductor.pattern_matcher as pattern_matcher
|
||||
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.lowering import lowerings as L
|
||||
from torch._inductor.pattern_matcher import Arg, CallFunction, PatternMatcherPass
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
|
||||
|
||||
@config.patch({"freezing": True})
|
||||
class TestCustomPassBase(TestCase):
|
||||
@ -168,5 +165,6 @@ class TestPostGradCustomPrePostPass(TestCustomPassBase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CPU and torch.backends.mkldnn.is_available():
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(mkl=True)
|
||||
|
@ -145,8 +145,6 @@ buf2.node.kernel = extern_kernels.mm""",
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -9,7 +9,7 @@ from torch._inductor.ir import Buffer, FixedLayout, Pointwise
|
||||
from torch._inductor.virtualized import ops, V
|
||||
|
||||
from torch.testing._internal.common_utils import TestCase as TorchTestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
class TestDependencies(TorchTestCase):
|
||||
@ -58,7 +58,6 @@ class TestDependencies(TorchTestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests("sympy")
|
||||
run_inductor_tests()
|
||||
|
@ -1,8 +1,6 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import copy
|
||||
import itertools
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
@ -11,18 +9,10 @@ from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor import config as inductor_config
|
||||
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
|
||||
from torch.testing._internal.common_utils import TEST_WITH_ASAN
|
||||
|
||||
from torch.testing._internal.inductor_utils import copy_tests, HAS_CPU, HAS_CUDA
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
|
||||
class ConvOp(nn.Module):
|
||||
expected_optimization_count = 1
|
||||
@ -191,7 +181,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
|
||||
del EfficientConvBNEvalTemplate
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -8,27 +8,23 @@ import torch
|
||||
import torch._dynamo
|
||||
import torch.utils.cpp_extension
|
||||
|
||||
try:
|
||||
from extension_backends.extension_codegen_backend import (
|
||||
ExtensionScheduling,
|
||||
ExtensionWrapperCodegen,
|
||||
)
|
||||
except ImportError:
|
||||
from .extension_backends.extension_codegen_backend import (
|
||||
ExtensionScheduling,
|
||||
ExtensionWrapperCodegen,
|
||||
)
|
||||
|
||||
from torch._C import FileCheck
|
||||
from torch._dynamo.testing import load_test_module
|
||||
from torch._inductor import metrics
|
||||
from torch._inductor.codegen.common import (
|
||||
get_scheduling_for_device,
|
||||
get_wrapper_codegen_for_device,
|
||||
register_backend_for_device,
|
||||
)
|
||||
from torch.testing._internal.common_utils import IS_FBCODE, IS_MACOS
|
||||
from torch.testing._internal.common_utils import IS_FBCODE
|
||||
from torch.testing._internal.inductor_utils import run_and_get_cpp_code, TestCase
|
||||
|
||||
extension_codegen_backend = load_test_module(
|
||||
__name__, "inductor.extension_backends.extension_codegen_backend"
|
||||
)
|
||||
ExtensionScheduling = extension_codegen_backend.ExtensionScheduling
|
||||
ExtensionWrapperCodegen = extension_codegen_backend.ExtensionWrapperCodegen
|
||||
|
||||
|
||||
def remove_build_path():
|
||||
if sys.platform == "win32":
|
||||
@ -127,9 +123,6 @@ class ExtensionBackendTests(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
# cpp_extension doesn't work in fbcode right now
|
||||
if HAS_CPU and not IS_MACOS and not IS_FBCODE:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(skip_fbcode=True, skip_mac=True)
|
||||
|
@ -10,15 +10,12 @@ from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
IS_FBCODE,
|
||||
parametrize,
|
||||
TEST_WITH_ROCM,
|
||||
TestCase,
|
||||
)
|
||||
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
check_model,
|
||||
check_model_cuda,
|
||||
HAS_CPU,
|
||||
HAS_CUDA,
|
||||
requires_cuda,
|
||||
)
|
||||
|
||||
@ -590,7 +587,6 @@ class ForeachTests(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if (HAS_CPU or HAS_CUDA) and not TEST_WITH_ROCM:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(skip_rocm=True)
|
||||
|
@ -5,7 +5,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
from torch import Tensor
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._inductor import utils
|
||||
from torch.testing._internal.common_cuda import SM90OrLater
|
||||
from torch.testing._internal.common_utils import (
|
||||
@ -13,7 +13,6 @@ from torch.testing._internal.common_utils import (
|
||||
parametrize,
|
||||
TEST_WITH_ROCM,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
@ -302,5 +301,6 @@ class TestFP8Types(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True, skip_rocm=True)
|
||||
|
@ -6,14 +6,14 @@ import math
|
||||
import torch
|
||||
import torch._inductor.config
|
||||
import torch.utils.checkpoint
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor.utils import run_and_get_code
|
||||
from torch.testing._internal.common_cuda import (
|
||||
PLATFORM_SUPPORTS_FUSED_ATTENTION,
|
||||
SM80OrLater,
|
||||
)
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
|
||||
@ -664,5 +664,6 @@ if HAS_CPU:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -12,7 +12,7 @@ from torch._inductor.fx_passes.pre_grad import (
|
||||
transpose_matmul,
|
||||
)
|
||||
from torch.fx.passes.shape_prop import ShapeProp
|
||||
from torch.testing._internal.common_utils import run_tests, TestCase
|
||||
from torch.testing._internal.common_utils import TestCase
|
||||
|
||||
PassFunc = Callable[[torch.fx.GraphModule, Any], torch.fx.GraphModule]
|
||||
|
||||
@ -154,4 +154,6 @@ class TestFxFusion(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -4,7 +4,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
import torch._inductor
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
from torch.testing._internal.inductor_utils import requires_cuda
|
||||
|
||||
@ -15,7 +15,6 @@ try:
|
||||
has_fbgemm = True
|
||||
except Exception:
|
||||
has_fbgemm = False
|
||||
pass
|
||||
|
||||
|
||||
class MyModule(torch.nn.Module):
|
||||
@ -433,4 +432,6 @@ class TestGroupBatchFusion(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -268,8 +268,6 @@ class ExprPrinterTests(TorchTestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests("sympy")
|
||||
run_inductor_tests()
|
||||
|
@ -2,7 +2,6 @@
|
||||
import contextlib
|
||||
import functools
|
||||
import itertools
|
||||
import sys
|
||||
import unittest
|
||||
import weakref
|
||||
|
||||
@ -15,8 +14,6 @@ from torch.testing import FileCheck
|
||||
from torch.testing._internal.common_cuda import SM80OrLater
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_WINDOWS,
|
||||
skipIfRocm,
|
||||
TEST_WITH_ASAN,
|
||||
TestCase as TorchTestCase,
|
||||
@ -27,21 +24,11 @@ from torch.testing._internal.inductor_utils import (
|
||||
copy_tests,
|
||||
HAS_CPU,
|
||||
HAS_CUDA,
|
||||
requires_cuda,
|
||||
)
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
|
||||
HAS_MULTIGPU = HAS_CUDA and torch.cuda.device_count() >= 2
|
||||
aten = torch.ops.aten
|
||||
prims = torch.ops.prims
|
||||
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
|
||||
|
||||
|
||||
class TestCase(TorchTestCase):
|
||||
@ -652,7 +639,6 @@ del OptimizeForInferenceTemplate
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -5,7 +5,7 @@ import logging
|
||||
|
||||
import torch
|
||||
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
|
||||
from torch._inductor.utils import do_bench, do_bench_using_profiling
|
||||
|
||||
@ -33,4 +33,6 @@ class TestBench(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests("cuda")
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -1,9 +1,7 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
|
||||
import torch
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch._dynamo.test_case import TestCase
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
@ -65,5 +63,6 @@ class TestReinplacingPassCorrectness(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -5,11 +5,10 @@ import sys
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._inductor import config
|
||||
from torch._inductor.codecache import PyCodeCache
|
||||
from torch.testing import FileCheck
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
class TestKernelBenchmark(TestCase):
|
||||
@ -105,5 +104,6 @@ class TestKernelBenchmark(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -5,10 +5,9 @@ import random
|
||||
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import same
|
||||
from torch._inductor import config
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
USE_DDP_WRAPPER = os.environ.get("USE_DDP_WRAPPER", "1") == "1"
|
||||
|
||||
@ -285,5 +284,6 @@ class TestLayoutOptim(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -6,7 +6,7 @@ from typing import Callable, List, Optional
|
||||
|
||||
import torch
|
||||
from torch import multiprocessing as mp
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.testing import reset_rng_state
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor import config
|
||||
@ -34,7 +34,7 @@ from torch.testing._internal.common_utils import (
|
||||
skipIfRocm,
|
||||
)
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
torch.set_float32_matmul_precision("high")
|
||||
if HAS_CUDA:
|
||||
@ -755,8 +755,6 @@ class TestTuningProcess(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._inductor.utils import is_big_gpu
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
# Set env to make it work in CI.
|
||||
if HAS_CUDA and HAS_CPU and is_big_gpu(0):
|
||||
run_tests()
|
||||
run_inductor_tests(triton=True, big_gpu=True)
|
||||
|
@ -1,30 +1,18 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from typing import List
|
||||
|
||||
import torch
|
||||
from torch._C import FileCheck
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.testing import load_test_module
|
||||
from torch._dynamo.utils import same
|
||||
from torch._inductor import config
|
||||
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfRocm
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import run_and_get_cpp_code
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_memory_planning yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
|
||||
@unittest.skipIf(not has_triton(), "Inductor+gpu needs triton and recent GPU arch")
|
||||
@config.patch(memory_planning=True)
|
||||
class TestMemoryPlanning(TestCase):
|
||||
def _generate(self, *, device):
|
||||
@ -118,4 +106,6 @@ class TestMemoryPlanning(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -1,5 +1,4 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import functools
|
||||
import unittest
|
||||
from unittest.mock import patch
|
||||
|
||||
@ -7,11 +6,8 @@ import torch._dynamo.config as dynamo_config
|
||||
import torch._inductor.config as inductor_config
|
||||
from torch._dynamo.test_minifier_common import MinifierTestBase
|
||||
from torch._inductor import config
|
||||
from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
_HAS_TRITON = has_triton()
|
||||
requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")
|
||||
from torch.testing._internal.common_utils import IS_JETSON
|
||||
from torch.testing._internal.inductor_utils import requires_cuda
|
||||
|
||||
|
||||
class MinifierTests(MinifierTestBase):
|
||||
@ -173,9 +169,6 @@ inner(torch.randn(20, 20))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
# Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
|
||||
# also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
|
||||
if not IS_MACOS and not TEST_WITH_ASAN:
|
||||
run_tests()
|
||||
run_inductor_tests(skip_mac=True, skip_asan=True)
|
||||
|
@ -1,19 +1,11 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import functools
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import torch._inductor.config as inductor_config
|
||||
from torch._dynamo.test_minifier_common import MinifierTestBase
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_JETSON,
|
||||
IS_MACOS,
|
||||
skipIfRocm,
|
||||
TEST_WITH_ASAN,
|
||||
)
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
_HAS_TRITON = has_triton()
|
||||
requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")
|
||||
from torch.testing._internal.common_utils import IS_JETSON, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import requires_cuda
|
||||
|
||||
|
||||
# These minifier tests are slow, because they must be run in separate
|
||||
@ -45,12 +37,10 @@ inner(torch.randn(2, 2).to("{device}"))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
import sys
|
||||
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
# Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
|
||||
# also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
|
||||
# also skip on Py 3.11+ since unhandled exceptions can cause segfaults
|
||||
if not IS_MACOS and not TEST_WITH_ASAN and sys.version_info < (3, 11):
|
||||
run_tests()
|
||||
if sys.version_info < (3, 11):
|
||||
run_inductor_tests(skip_mac=True, skip_asan=True)
|
||||
|
@ -6,7 +6,7 @@ import torch
|
||||
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
|
||||
|
||||
from torch._dynamo import config as dynamo_config
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._export import capture_pre_autograd_graph
|
||||
from torch._inductor import config
|
||||
@ -23,8 +23,8 @@ from torch.testing._internal.common_quantization import (
|
||||
skipIfNoONEDNN,
|
||||
skipIfNoONEDNNBF16,
|
||||
)
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import _check_has_dynamic_shape, HAS_CPU
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import _check_has_dynamic_shape
|
||||
|
||||
|
||||
# The dict value is match_nodes(computation_op+unary_op)
|
||||
@ -1533,5 +1533,6 @@ class TestDynamicPatternMatcher(TestPatternMatcherBase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CPU and torch.backends.mkldnn.is_available():
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(mkl=True)
|
||||
|
@ -7,12 +7,7 @@ from typing import List, Tuple, Union
|
||||
import torch
|
||||
from torch.testing._internal.common_device_type import instantiate_device_type_tests
|
||||
from torch.testing._internal.common_nn import NNTestCase
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_WINDOWS,
|
||||
parametrize,
|
||||
run_tests,
|
||||
TEST_CUDA,
|
||||
)
|
||||
from torch.testing._internal.common_utils import parametrize, TEST_CUDA
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
|
||||
@ -180,7 +175,8 @@ class TestDecomp(NNTestCase):
|
||||
device_types = ("cpu", "cuda")
|
||||
instantiate_device_type_tests(TestDecomp, globals(), only_for=device_types)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# We don't support torch.compile() on Windows presently
|
||||
if not IS_WINDOWS:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -5,7 +5,7 @@ import unittest
|
||||
import torch
|
||||
import torch._dynamo.config as dynamo_config
|
||||
import torch._inductor.config as inductor_config
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import count_calls, counters
|
||||
from torch._higher_order_ops.out_dtype import out_dtype
|
||||
from torch._inductor.fx_passes import joint_graph
|
||||
@ -29,8 +29,7 @@ from torch._inductor.utils import run_and_get_code
|
||||
from torch._inductor.virtualized import V
|
||||
from torch.testing import FileCheck
|
||||
from torch.testing._internal.common_cuda import SM80OrLater
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
|
||||
|
||||
class TestPatternMatcher(TestCase):
|
||||
@ -1068,5 +1067,6 @@ class TestPatternMatcher(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -13,12 +13,10 @@ from torch.testing._internal.common_utils import (
|
||||
skipIfRocm,
|
||||
TestCase as TorchTestCase,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import requires_cuda
|
||||
|
||||
# Defines all the kernels for tests
|
||||
from torch.testing._internal.triton_utils import HAS_CUDA, requires_cuda
|
||||
|
||||
if HAS_CUDA:
|
||||
from torch.testing._internal.triton_utils import add_kernel
|
||||
from torch.testing._internal.triton_utils import add_kernel
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
@ -840,7 +838,6 @@ class WouldBeNiceIfItWorked:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -9,7 +9,7 @@ import torch._inductor.utils
|
||||
from torch._inductor import config
|
||||
from torch.profiler import ProfilerActivity
|
||||
|
||||
from torch.testing._internal.common_utils import TemporaryFileName, TEST_WITH_ROCM
|
||||
from torch.testing._internal.common_utils import TemporaryFileName
|
||||
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
@ -120,7 +120,6 @@ class DynamoProfilerTests(torch._dynamo.test_case.TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if not TEST_WITH_ROCM:
|
||||
run_tests()
|
||||
run_inductor_tests(skip_rocm=True)
|
||||
|
@ -7,13 +7,12 @@ import torch._dynamo.config as dynamo_config
|
||||
import torch._inductor.config as inductor_config
|
||||
import torch._inductor.select_algorithm as select_algorithm
|
||||
import torch.nn.functional as F
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.testing import expectedFailureDynamicWrapper
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor.autotune_process import TritonBenchmarkRequest
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
@ -346,7 +345,6 @@ class TestSelectAlgorithm(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._inductor.utils import is_big_gpu
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if IS_LINUX and HAS_CUDA and is_big_gpu(0):
|
||||
run_tests()
|
||||
run_inductor_tests(triton=True, big_gpu=True)
|
||||
|
@ -5,7 +5,7 @@ import unittest
|
||||
import torch
|
||||
import torch._logging
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX, TestCase
|
||||
from torch.testing._internal.common_utils import TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
@ -59,8 +59,6 @@ class SmokeTest(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if IS_LINUX and torch.cuda.is_available():
|
||||
if torch.cuda.get_device_properties(0).major > 5:
|
||||
run_tests()
|
||||
run_inductor_tests(triton=True, big_gpu=True)
|
||||
|
@ -5,7 +5,6 @@ import torch
|
||||
from torch._inductor import metrics
|
||||
from torch._inductor.compile_fx import compile_fx, count_bytes_inner
|
||||
from torch.testing._internal.common_utils import TestCase as TorchTestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
@ -166,7 +165,6 @@ class MemoryBoundedTests(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -1,10 +1,8 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
|
||||
import torch
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._dynamo.utils import counters
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
def patch(f):
|
||||
@ -1066,5 +1064,6 @@ class TestSplitCatFxPasses(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -1,11 +1,10 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import torch
|
||||
from torch import _dynamo as dynamo, _inductor as inductor
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._inductor.utils import gen_gm_and_inputs
|
||||
from torch.fx import symbolic_trace
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
|
||||
|
||||
class MyModule(torch.nn.Module):
|
||||
@ -111,5 +110,6 @@ class TestStandaloneInductor(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if HAS_CPU:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -4,7 +4,6 @@ import copy
|
||||
import dataclasses
|
||||
import functools
|
||||
import gc
|
||||
import importlib
|
||||
import itertools
|
||||
import math
|
||||
import operator
|
||||
@ -32,9 +31,14 @@ from torch._dynamo.testing import (
|
||||
rand_strided,
|
||||
same,
|
||||
)
|
||||
|
||||
from torch._inductor import config, test_operators
|
||||
from torch._inductor.codegen.common import DataTypePropagation, OptimizationContext
|
||||
|
||||
from torch._inductor.compile_fx import compile_fx, compile_fx_inner
|
||||
from torch._inductor.utils import (
|
||||
add_scheduler_init_hook,
|
||||
has_torchvision_roi_align,
|
||||
run_and_get_code,
|
||||
run_and_get_triton_code,
|
||||
)
|
||||
@ -51,34 +55,15 @@ from torch.testing._internal.common_cuda import (
|
||||
|
||||
from torch.testing._internal.common_device_type import _has_sufficient_memory
|
||||
from torch.testing._internal.common_dtype import all_types
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
DeterministicGuard,
|
||||
IS_CI,
|
||||
IS_FBCODE,
|
||||
IS_WINDOWS,
|
||||
IS_X86,
|
||||
skipIfRocm,
|
||||
slowTest,
|
||||
TEST_WITH_ASAN,
|
||||
)
|
||||
from torch.utils import _pytree as pytree
|
||||
from torch.utils._python_dispatch import TorchDispatchMode
|
||||
from torch.utils.weak import WeakTensorKeyDictionary
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
)
|
||||
sys.exit(0)
|
||||
|
||||
importlib.import_module("functorch")
|
||||
importlib.import_module("filelock")
|
||||
|
||||
from torch._inductor import config, test_operators
|
||||
|
||||
from torch._inductor.compile_fx import compile_fx, compile_fx_inner
|
||||
from torch._inductor.utils import has_torchvision_roi_align
|
||||
|
||||
from torch.testing._internal.common_utils import slowTest
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
check_model,
|
||||
check_model_cuda,
|
||||
@ -89,11 +74,15 @@ from torch.testing._internal.inductor_utils import (
|
||||
requires_cuda,
|
||||
requires_multigpu,
|
||||
run_and_get_cpp_code,
|
||||
skip_if_mac,
|
||||
skip_if_x86_mac,
|
||||
skipCUDAIf,
|
||||
TestCase,
|
||||
ToTuple,
|
||||
)
|
||||
from torch.utils import _pytree as pytree
|
||||
from torch.utils._python_dispatch import TorchDispatchMode
|
||||
from torch.utils.weak import WeakTensorKeyDictionary
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
@ -759,6 +748,7 @@ class CommonTemplate:
|
||||
for dtype in dtypes:
|
||||
self.common(fn, (torch.randn(8, 8).to(dtype), torch.randn(8, 8).to(dtype)))
|
||||
|
||||
@skip_if_mac()
|
||||
def test_min_max_reduction_nan(self):
|
||||
def fn(a):
|
||||
return (torch.max(a), torch.min(a))
|
||||
@ -8014,7 +8004,6 @@ if HAS_CPU:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -1,17 +1,10 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
from torch._dynamo.testing import load_test_module
|
||||
from torch._inductor.compile_fx import compile_fx
|
||||
from torch._inductor.utils import run_and_get_triton_code
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_WINDOWS,
|
||||
TEST_WITH_ASAN,
|
||||
TestCase,
|
||||
)
|
||||
from torch.testing._internal.common_utils import TEST_WITH_ASAN, TestCase
|
||||
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
_check_has_dynamic_shape,
|
||||
@ -28,15 +21,6 @@ CommonTemplate = load_test_module(
|
||||
).CommonTemplate
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor_codegen_dynamic_shapes yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
|
||||
# Checks for patterns in generated C++/Triton code to see if it's dynamic
|
||||
def check_codegen(
|
||||
self: TestCase,
|
||||
@ -328,7 +312,6 @@ if HAS_CUDA and not TEST_WITH_ASAN:
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if HAS_CPU or HAS_CUDA:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests()
|
||||
|
@ -1,8 +1,6 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import contextlib
|
||||
import math
|
||||
import sys
|
||||
import unittest
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
@ -15,8 +13,6 @@ from torch.testing._internal.common_device_type import (
|
||||
onlyCUDA,
|
||||
)
|
||||
from torch.testing._internal.common_utils import (
|
||||
IS_CI,
|
||||
IS_WINDOWS,
|
||||
TEST_WITH_ASAN,
|
||||
TEST_WITH_ROCM,
|
||||
TestCase,
|
||||
@ -36,15 +32,6 @@ CommonTemplate = load_test_module(
|
||||
__file__, "inductor.test_torchinductor"
|
||||
).CommonTemplate
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor_dynamic_shapes yet\n"
|
||||
)
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires sympy/functorch/filelock")
|
||||
|
||||
|
||||
# xfail by default, set is_skip=True to skip
|
||||
test_failures = {
|
||||
"test_kwargs_dynamic_shapes": TestFailure(("cpu",)),
|
||||
@ -435,8 +422,7 @@ class TestInductorDynamic(TestCase):
|
||||
instantiate_device_type_tests(TestInductorDynamic, globals())
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
# Slow on ASAN after https://github.com/pytorch/pytorch/pull/94068
|
||||
if (HAS_CPU or HAS_CUDA) and not TEST_WITH_ASAN:
|
||||
run_tests(needs="filelock")
|
||||
run_inductor_tests(skip_asan=True)
|
||||
|
@ -12,7 +12,6 @@ from unittest.mock import patch
|
||||
import torch
|
||||
|
||||
from torch._dispatch.python import enable_python_dispatcher
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch._subclasses.fake_tensor import (
|
||||
DataDependentOutputException,
|
||||
DynamicOutputShapeException,
|
||||
@ -617,4 +616,6 @@ class TestInductorOpInfo(TestCase):
|
||||
instantiate_device_type_tests(TestInductorOpInfo, globals())
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests()
|
||||
|
@ -1,19 +1,7 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
try:
|
||||
import triton # noqa: F401
|
||||
except ImportError:
|
||||
if __name__ == "__main__":
|
||||
sys.exit(0)
|
||||
raise unittest.SkipTest("requires triton") # noqa: TRY200
|
||||
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._inductor import config
|
||||
from torch._inductor.triton_heuristics import triton_config
|
||||
|
||||
@ -32,5 +20,6 @@ class TestTritonHeuristics(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if IS_LINUX and HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -4,9 +4,8 @@ import subprocess
|
||||
import sys
|
||||
|
||||
import torch
|
||||
from torch._dynamo.test_case import run_tests, TestCase
|
||||
from torch._dynamo.test_case import TestCase
|
||||
from torch._inductor.codecache import PyCodeCache
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
class TestTritonWrapper(TestCase):
|
||||
@ -49,5 +48,6 @@ class TestTritonWrapper(TestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
if HAS_CUDA:
|
||||
run_tests()
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
run_inductor_tests(triton=True)
|
||||
|
@ -5,8 +5,7 @@ import torch
|
||||
from torch._dynamo import config as dynamo_config
|
||||
from torch._inductor import config as inductor_config
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX, TestCase as TorchTestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.testing._internal.common_utils import TestCase as TorchTestCase
|
||||
|
||||
|
||||
class TestUnbackedSymints(TorchTestCase):
|
||||
@ -56,8 +55,6 @@ class TestUnbackedSymints(TorchTestCase):
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
from torch._dynamo.test_case import run_tests
|
||||
from torch._inductor.utils import is_big_gpu
|
||||
from torch.testing._internal.inductor_utils import run_inductor_tests
|
||||
|
||||
if IS_LINUX and HAS_CUDA and is_big_gpu(0):
|
||||
run_tests()
|
||||
run_inductor_tests(triton=True, big_gpu=True)
|
||||
|
@ -1,6 +1,7 @@
|
||||
import contextlib
|
||||
import os
|
||||
import pathlib
|
||||
import sys
|
||||
import time
|
||||
from subprocess import CalledProcessError
|
||||
|
||||
@ -14,6 +15,11 @@ from torch.testing._internal.common_utils import (
|
||||
IS_FBCODE,
|
||||
IS_MACOS,
|
||||
IS_X86,
|
||||
IS_WINDOWS,
|
||||
IS_CI,
|
||||
TEST_WITH_ASAN,
|
||||
TEST_CUDA_GRAPH,
|
||||
TEST_WITH_ROCM,
|
||||
)
|
||||
from torch._dynamo.backends.registry import register_backend
|
||||
from torch._inductor.compile_fx import compile_fx, count_bytes_inner
|
||||
@ -28,6 +34,9 @@ from torch.utils import _pytree as pytree
|
||||
from torch.utils._pytree import tree_flatten, tree_unflatten
|
||||
from typing import Tuple
|
||||
from torch._dynamo.testing import make_test_cls_with_patches
|
||||
from torch._inductor.codegen.cuda.cuda_env import nvcc_exist
|
||||
from torch._inductor.utils import is_big_gpu
|
||||
|
||||
|
||||
def test_cpu():
|
||||
try:
|
||||
@ -41,14 +50,26 @@ def test_cpu():
|
||||
):
|
||||
return False
|
||||
|
||||
HAS_CPU = LazyVal(test_cpu)
|
||||
def gpu_is_old():
|
||||
if has_triton() and torch.cuda.is_available() and not TEST_WITH_ROCM:
|
||||
device_props = torch.cuda.get_device_properties(0)
|
||||
# some of our CI machines use M60's which can't run Triton
|
||||
if device_props.major < 7:
|
||||
return True
|
||||
return False
|
||||
|
||||
|
||||
HAS_CPU = LazyVal(test_cpu)
|
||||
HAS_CUDA = has_triton() and not gpu_is_old()
|
||||
|
||||
|
||||
|
||||
HAS_CUDA = has_triton()
|
||||
|
||||
@register_backend
|
||||
def count_bytes_inductor(gm, example_inputs):
|
||||
return compile_fx(gm, example_inputs, inner_compile=count_bytes_inner)
|
||||
|
||||
|
||||
def _check_has_dynamic_shape(
|
||||
self: TestCase,
|
||||
code,
|
||||
@ -91,16 +112,21 @@ requires_multigpu = functools.partial(
|
||||
skip_if_x86_mac = functools.partial(
|
||||
unittest.skipIf, IS_MACOS and IS_X86, "Does not work on x86 Mac"
|
||||
)
|
||||
skip_if_mac = functools.partial(
|
||||
unittest.skipIf, IS_MACOS, "Does not work on Mac"
|
||||
)
|
||||
vec_dtypes = [torch.float, torch.bfloat16, torch.float16]
|
||||
|
||||
|
||||
@dataclasses.dataclass
|
||||
class TestFailure:
|
||||
suffixes: Tuple[str]
|
||||
is_skip: bool = False
|
||||
__test__: bool = False
|
||||
|
||||
|
||||
def copy_tests(
|
||||
my_cls, other_cls, suffix, test_failures=None, xfail_prop=None
|
||||
my_cls, other_cls, suffix, test_failures=None, xfail_prop=None
|
||||
): # noqa: B902
|
||||
for name, value in my_cls.__dict__.items():
|
||||
if name.startswith("test_"):
|
||||
@ -146,7 +172,6 @@ def clone_preserve_strides(x, device=None):
|
||||
return out
|
||||
|
||||
|
||||
|
||||
def compute_grads(args, kwrags, results, grads):
|
||||
def gather_leaf_tensors(args, kwargs):
|
||||
args = pytree.arg_tree_leaves(*args, **kwargs)
|
||||
@ -171,22 +196,22 @@ def compute_grads(args, kwrags, results, grads):
|
||||
|
||||
|
||||
def check_model(
|
||||
self: TestCase,
|
||||
model,
|
||||
example_inputs,
|
||||
kwargs=None,
|
||||
*,
|
||||
atol=None,
|
||||
rtol=None,
|
||||
check_lowp=True,
|
||||
exact_dtype=True,
|
||||
nopython=True,
|
||||
copy_to_cuda=True,
|
||||
reference_in_float=True,
|
||||
assert_equal=True,
|
||||
check_gradient=False,
|
||||
check_has_compiled=True,
|
||||
output_process_fn_grad=lambda x: x,
|
||||
self: TestCase,
|
||||
model,
|
||||
example_inputs,
|
||||
kwargs=None,
|
||||
*,
|
||||
atol=None,
|
||||
rtol=None,
|
||||
check_lowp=True,
|
||||
exact_dtype=True,
|
||||
nopython=True,
|
||||
copy_to_cuda=True,
|
||||
reference_in_float=True,
|
||||
assert_equal=True,
|
||||
check_gradient=False,
|
||||
check_has_compiled=True,
|
||||
output_process_fn_grad=lambda x: x,
|
||||
):
|
||||
kwargs = kwargs or {}
|
||||
torch._dynamo.reset()
|
||||
@ -201,7 +226,7 @@ def check_model(
|
||||
def upcast_fn(x):
|
||||
nonlocal has_lowp_args
|
||||
if isinstance(x, torch.Tensor) and (
|
||||
x.dtype == torch.float16 or x.dtype == torch.bfloat16
|
||||
x.dtype == torch.float16 or x.dtype == torch.bfloat16
|
||||
):
|
||||
has_lowp_args = True
|
||||
return x.float()
|
||||
@ -350,22 +375,22 @@ def check_model(
|
||||
|
||||
@torch._inductor.config.patch("triton.cudagraphs", False)
|
||||
def check_model_cuda(
|
||||
self: TestCase,
|
||||
model,
|
||||
example_inputs,
|
||||
kwargs=None,
|
||||
*,
|
||||
atol=None,
|
||||
rtol=None,
|
||||
check_lowp=True,
|
||||
exact_dtype=True,
|
||||
nopython=True,
|
||||
copy_to_cuda=True,
|
||||
reference_in_float=True,
|
||||
assert_equal=True,
|
||||
check_gradient=False,
|
||||
check_has_compiled=True,
|
||||
output_process_fn_grad=lambda x: x,
|
||||
self: TestCase,
|
||||
model,
|
||||
example_inputs,
|
||||
kwargs=None,
|
||||
*,
|
||||
atol=None,
|
||||
rtol=None,
|
||||
check_lowp=True,
|
||||
exact_dtype=True,
|
||||
nopython=True,
|
||||
copy_to_cuda=True,
|
||||
reference_in_float=True,
|
||||
assert_equal=True,
|
||||
check_gradient=False,
|
||||
check_has_compiled=True,
|
||||
output_process_fn_grad=lambda x: x,
|
||||
):
|
||||
kwargs = kwargs or {}
|
||||
if hasattr(model, "to"):
|
||||
@ -421,6 +446,8 @@ def check_model_cuda(
|
||||
check_has_compiled=check_has_compiled,
|
||||
output_process_fn_grad=output_process_fn_grad,
|
||||
)
|
||||
|
||||
|
||||
def run_and_get_cpp_code(fn, *args, **kwargs):
|
||||
# We use the patch context manager instead of using it as a decorator.
|
||||
# In this way, we can ensure that the attribute is patched and unpatched correctly
|
||||
@ -443,6 +470,7 @@ def run_and_get_cpp_code(fn, *args, **kwargs):
|
||||
output_code_log.removeHandler(ch)
|
||||
return result, s
|
||||
|
||||
|
||||
class TestCase(TorchTestCase):
|
||||
@classmethod
|
||||
def setUpClass(cls):
|
||||
@ -478,6 +506,8 @@ class TestCase(TorchTestCase):
|
||||
if os.environ.get("ERROR_ON_SLOW") == "1":
|
||||
elapsed = time.perf_counter() - self._start
|
||||
assert elapsed < 120
|
||||
|
||||
|
||||
class ToTuple(torch.nn.Module):
|
||||
def forward(self, x):
|
||||
return (x,)
|
||||
@ -494,3 +524,56 @@ def make_dynamic_cls(cls, xfail_prop="_expected_failure_dynamic"):
|
||||
def filesize(filename: pathlib.Path):
|
||||
assert filename.exists(), f"{filename} is missing"
|
||||
return os.stat(filename).st_size
|
||||
|
||||
|
||||
def run_inductor_tests(
|
||||
*,
|
||||
skip_rocm=False,
|
||||
skip_asan=False,
|
||||
nvcc=False,
|
||||
cudagraphs=False,
|
||||
mkl=False,
|
||||
skip_fbcode=False,
|
||||
skip_mac=False,
|
||||
triton=False,
|
||||
big_gpu=False,
|
||||
):
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for inductor yet\n"
|
||||
)
|
||||
return
|
||||
if not (HAS_CPU or HAS_CUDA):
|
||||
sys.stderr.write("Missing both CPU compiler and Triton compiler\n")
|
||||
return
|
||||
if skip_rocm and TEST_WITH_ROCM:
|
||||
sys.stderr.write("Skipping due to rocm\n")
|
||||
return
|
||||
if skip_asan and TEST_WITH_ASAN:
|
||||
sys.stderr.write("Skipping due to asan\n")
|
||||
return
|
||||
if nvcc and not nvcc_exist():
|
||||
sys.stderr.write("Skipping due to nvcc\n")
|
||||
return
|
||||
if cudagraphs and not TEST_CUDA_GRAPH:
|
||||
sys.stderr.write("Skipping due to cudagraphs\n")
|
||||
return
|
||||
if mkl and not torch.backends.mkldnn.is_available():
|
||||
sys.stderr.write("Skipping due to mkl\n")
|
||||
return
|
||||
if skip_fbcode and IS_FBCODE:
|
||||
sys.stderr.write("Skipping due to fbcode\n")
|
||||
return
|
||||
if skip_mac and IS_MACOS:
|
||||
sys.stderr.write("Skipping due to mac\n")
|
||||
return
|
||||
if (triton or big_gpu) and not HAS_CUDA:
|
||||
sys.stderr.write("Skipping due to triton\n")
|
||||
return
|
||||
if big_gpu and not is_big_gpu(0):
|
||||
sys.stderr.write("Skipping due to is_big_gpu\n")
|
||||
return
|
||||
|
||||
from torch._dynamo.test_case import run_tests
|
||||
|
||||
return run_tests(("filelock", "sympy"))
|
||||
|
@ -1,10 +1,5 @@
|
||||
import functools
|
||||
import unittest
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
requires_cuda = functools.partial(unittest.skipIf, not HAS_CUDA, "requires cuda")
|
||||
|
||||
if HAS_CUDA:
|
||||
import triton
|
||||
from triton import language as tl
|
||||
@ -134,3 +129,14 @@ if HAS_CUDA:
|
||||
mul2_inplace_kernel(in_ptr0, n_elements, BLOCK_SIZE=BLOCK_SIZE)
|
||||
x = tl.load(in_ptr0 + offsets, mask=mask)
|
||||
tl.store(out_ptr + offsets, x, mask=mask)
|
||||
|
||||
else:
|
||||
triton = None
|
||||
tl = None
|
||||
add_kernel = None
|
||||
add_kernel_autotuned = None
|
||||
add_kernel_2d_autotuned = None
|
||||
mul2_kernel = None
|
||||
mul2_inplace_kernel = None
|
||||
zero_negs = None
|
||||
indirection_kernel = None
|
||||
|
Reference in New Issue
Block a user