mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-21 05:34:18 +08:00
Revert "[BE][Easy][12/19] enforce style for empty lines in import segments in test/i*/
(#129763)"
This reverts commit aecc746fccc4495313167e3a7f94210daf457e1d. Reverted https://github.com/pytorch/pytorch/pull/129763 on behalf of https://github.com/XuehaiPan due to need reland after rerunning lintrunner on main ([comment](https://github.com/pytorch/pytorch/pull/129763#issuecomment-2235736732))
This commit is contained in:
@ -1,6 +1,7 @@
|
||||
import sys
|
||||
|
||||
import torch
|
||||
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE
|
||||
|
||||
|
||||
|
@ -4,7 +4,6 @@
|
||||
# https://docs.google.com/document/d/18L9e7bZSBpJ7gGbwlUV13LasmjiEX2lree2pl-SdbCU/edit
|
||||
import os
|
||||
|
||||
|
||||
os.environ["TORCHDYNAMO_REPRO_AFTER"] = "dynamo"
|
||||
import torch
|
||||
import torch._dynamo as torchdynamo
|
||||
|
@ -3,7 +3,6 @@ import subprocess
|
||||
|
||||
from torch.testing._internal.common_methods_invocations import op_db
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
i = 0
|
||||
while i < len(op_db):
|
||||
|
@ -20,6 +20,7 @@ from torch._inductor import config
|
||||
from torch._inductor.exc import CppWrapperCodeGenError
|
||||
from torch._inductor.runtime.runtime_utils import cache_dir
|
||||
from torch._inductor.test_case import TestCase
|
||||
|
||||
from torch.export import Dim, export
|
||||
from torch.testing import FileCheck
|
||||
from torch.testing._internal import common_utils
|
||||
@ -36,10 +37,10 @@ from torch.testing._internal.common_utils import (
|
||||
skipIfRocm,
|
||||
TEST_WITH_ROCM,
|
||||
)
|
||||
|
||||
from torch.testing._internal.triton_utils import HAS_CUDA, requires_cuda
|
||||
from torch.utils import _pytree as pytree
|
||||
|
||||
|
||||
if HAS_CUDA:
|
||||
import triton
|
||||
|
||||
|
@ -5,7 +5,9 @@ import torch._export
|
||||
import torch._inductor
|
||||
import torch.export._trace
|
||||
import torch.fx._pytree as fx_pytree
|
||||
|
||||
from torch.testing._internal.common_utils import IS_FBCODE
|
||||
|
||||
from torch.utils import _pytree as pytree
|
||||
|
||||
|
||||
|
@ -3,7 +3,9 @@ import os
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
import torch._inductor.config as inductor_config
|
||||
|
||||
from torch._inductor.autoheuristic.autoheuristic import (
|
||||
AHContext,
|
||||
AutoHeuristic,
|
||||
|
@ -13,8 +13,8 @@ from torch.testing._internal.common_utils import (
|
||||
slowTest,
|
||||
TEST_WITH_ASAN,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
# Make the helper files in test/ importable
|
||||
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
|
@ -11,7 +11,6 @@ from torch import nn
|
||||
from torch._inductor import config as inductor_config
|
||||
from torch.testing._internal.common_cuda import TEST_CUDNN
|
||||
|
||||
|
||||
# Make the helper files in test/ importable
|
||||
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
sys.path.append(pytorch_test_dir)
|
||||
@ -19,7 +18,6 @@ sys.path.append(pytorch_test_dir)
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
|
||||
from torch.testing._internal.inductor_utils import skipCUDAIf
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
@ -31,13 +29,11 @@ if IS_WINDOWS and IS_CI:
|
||||
from inductor.test_inductor_freezing import TestCase
|
||||
from inductor.test_torchinductor import check_model, check_model_gpu, copy_tests
|
||||
|
||||
|
||||
importlib.import_module("functorch")
|
||||
importlib.import_module("filelock")
|
||||
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
|
||||
|
@ -6,13 +6,13 @@ import unittest
|
||||
import torch
|
||||
from torch._inductor import config
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
|
||||
torch.set_float32_matmul_precision("high")
|
||||
if HAS_CUDA:
|
||||
torch.cuda.memory._set_allocator_settings("expandable_segments:False")
|
||||
|
@ -40,7 +40,6 @@ from torch.testing._internal.inductor_utils import (
|
||||
)
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
|
||||
HAS_TRITON = has_triton()
|
||||
|
||||
if HAS_TRITON:
|
||||
|
@ -4,12 +4,14 @@ import contextlib
|
||||
import sympy
|
||||
|
||||
import torch
|
||||
|
||||
import torch._inductor.config as inductor_config
|
||||
from torch._inductor.codegen import triton_utils
|
||||
from torch._inductor.codegen.common import SizeArg
|
||||
from torch._inductor.graph import GraphLowering
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch._inductor.virtualized import V
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_GPU
|
||||
|
||||
|
||||
|
@ -7,6 +7,7 @@ from torch._inductor.compile_worker.subproc_pool import (
|
||||
SubprocException,
|
||||
SubprocPool,
|
||||
)
|
||||
|
||||
from torch._inductor.test_case import TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
|
||||
|
@ -20,7 +20,6 @@ from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
from torch.testing._internal.logging_utils import logs_to_string
|
||||
|
||||
|
||||
# note: these tests are not run on windows due to inductor_utils.HAS_CPU
|
||||
|
||||
|
||||
|
@ -4,15 +4,19 @@ import sys
|
||||
import unittest
|
||||
import weakref
|
||||
from contextlib import ExitStack
|
||||
|
||||
from copy import deepcopy
|
||||
from typing import NamedTuple
|
||||
|
||||
import torch
|
||||
|
||||
import torch._inductor
|
||||
import torch._inductor.cudagraph_trees
|
||||
import torch.optim.lr_scheduler
|
||||
from torch._inductor import config
|
||||
|
||||
from torch._inductor.test_case import TestCase
|
||||
|
||||
from torch.optim import (
|
||||
Adadelta,
|
||||
Adagrad,
|
||||
@ -27,6 +31,7 @@ from torch.optim import (
|
||||
SGD,
|
||||
SparseAdam,
|
||||
)
|
||||
|
||||
from torch.optim.lr_scheduler import (
|
||||
ChainedScheduler,
|
||||
ConstantLR,
|
||||
@ -43,15 +48,18 @@ from torch.optim.lr_scheduler import (
|
||||
ReduceLROnPlateau,
|
||||
StepLR,
|
||||
)
|
||||
|
||||
from torch.testing._internal.common_device_type import (
|
||||
instantiate_device_type_tests,
|
||||
skipCUDAIf,
|
||||
)
|
||||
|
||||
from torch.testing._internal.common_optimizers import (
|
||||
_get_optim_inputs_including_global_cliquey_kwargs,
|
||||
optim_db,
|
||||
optims,
|
||||
)
|
||||
|
||||
from torch.testing._internal.common_utils import parametrize
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA, has_triton
|
||||
from torch.testing._internal.triton_utils import requires_cuda
|
||||
|
@ -3,7 +3,9 @@ import math
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from torch._inductor import config
|
||||
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
|
||||
|
@ -3,6 +3,7 @@ import itertools
|
||||
|
||||
import torch
|
||||
import torch._dynamo.testing
|
||||
|
||||
from torch._inductor.test_case import TestCase
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
|
@ -6,11 +6,11 @@ from unittest import mock
|
||||
|
||||
import torch
|
||||
from torch._inductor.runtime.hints import TRITON_MAX_BLOCK
|
||||
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
|
||||
try:
|
||||
import triton
|
||||
except ImportError:
|
||||
@ -21,7 +21,6 @@ except ImportError:
|
||||
from torch._inductor import config
|
||||
from torch._inductor.runtime.coordinate_descent_tuner import CoordescTuner
|
||||
|
||||
|
||||
config.benchmark_kernel = True
|
||||
config.coordinate_descent_tuning = True
|
||||
|
||||
|
@ -1,7 +1,9 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import torch
|
||||
|
||||
from torch._inductor.codegen.aoti_hipify_utils import maybe_hipify_code_wrapper
|
||||
from torch._inductor.codegen.codegen_device_driver import cuda_kernel_driver
|
||||
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
|
||||
|
@ -45,7 +45,6 @@ from torch.testing._internal.common_utils import (
|
||||
)
|
||||
from torch.utils._python_dispatch import TorchDispatchMode
|
||||
|
||||
|
||||
try:
|
||||
try:
|
||||
from . import test_torchinductor
|
||||
|
@ -1,6 +1,7 @@
|
||||
# Owner(s): ["oncall: cpu inductor"]
|
||||
import contextlib
|
||||
import functools
|
||||
|
||||
import sys
|
||||
import unittest
|
||||
from typing import Optional
|
||||
@ -19,8 +20,8 @@ from torch.testing._internal.common_device_type import (
|
||||
instantiate_device_type_tests,
|
||||
)
|
||||
from torch.testing._internal.common_quantization import _generate_qdq_quantized_model
|
||||
from torch.testing._internal.common_utils import IS_MACOS, parametrize, TEST_MKL
|
||||
|
||||
from torch.testing._internal.common_utils import IS_MACOS, parametrize, TEST_MKL
|
||||
|
||||
try:
|
||||
try:
|
||||
|
@ -29,8 +29,8 @@ from torch.testing._internal.common_utils import (
|
||||
skipIfRocm,
|
||||
TEST_WITH_ASAN,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import skipCUDAIf
|
||||
|
||||
from torch.testing._internal.inductor_utils import skipCUDAIf
|
||||
|
||||
try:
|
||||
try:
|
||||
|
@ -4,6 +4,7 @@ import ctypes
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.async_compile import AsyncCompile
|
||||
from torch._inductor.codecache import CUDACodeCache
|
||||
@ -11,7 +12,6 @@ from torch._inductor.codegen.cuda.cuda_env import nvcc_exist
|
||||
from torch._inductor.exc import CUDACompileError
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
|
||||
|
||||
_SOURCE_CODE = r"""
|
||||
|
||||
#include <stdio.h>
|
||||
|
@ -8,6 +8,7 @@ import unittest
|
||||
import warnings
|
||||
|
||||
import torch
|
||||
|
||||
import torch._dynamo.config as dynamo_config
|
||||
import torch.nn as nn
|
||||
from torch._dynamo.utils import counters
|
||||
@ -18,6 +19,7 @@ from torch._inductor.cudagraph_utils import FunctionID
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
from torch.testing import FileCheck
|
||||
|
||||
from torch.testing._internal.common_cuda import TEST_MULTIGPU
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
@ -31,7 +33,6 @@ from torch.testing._internal.common_utils import (
|
||||
)
|
||||
from torch.utils._python_dispatch import TorchDispatchMode
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
@ -45,7 +46,6 @@ importlib.import_module("filelock")
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")
|
||||
requires_multigpu = functools.partial(
|
||||
|
@ -6,6 +6,7 @@ import pathlib
|
||||
import sys
|
||||
|
||||
import torch
|
||||
|
||||
from torch.testing._internal.common_cuda import IS_JETSON, IS_WINDOWS
|
||||
from torch.testing._internal.common_utils import (
|
||||
run_tests,
|
||||
@ -14,7 +15,6 @@ from torch.testing._internal.common_utils import (
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
sys.path.append(pytorch_test_dir)
|
||||
|
||||
@ -26,7 +26,6 @@ REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
|
||||
sys.path.insert(0, str(REPO_ROOT))
|
||||
from tools.stats.import_test_stats import get_disabled_tests
|
||||
|
||||
|
||||
# Make sure to remove REPO_ROOT after import is done
|
||||
sys.path.remove(str(REPO_ROOT))
|
||||
|
||||
|
@ -4,11 +4,13 @@ import unittest
|
||||
from functools import partial
|
||||
|
||||
import torch
|
||||
|
||||
from torch._inductor.ir import Pointwise
|
||||
from torch._inductor.lowering import make_pointwise, register_lowering
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch._inductor.virtualized import ops
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
|
||||
|
@ -7,10 +7,13 @@ import torch
|
||||
import torch._inductor.pattern_matcher as pattern_matcher
|
||||
import torch.fx as fx
|
||||
from torch._dynamo.utils import counters
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.lowering import lowerings as L
|
||||
from torch._inductor.pattern_matcher import Arg, CallFunction, PatternMatcherPass
|
||||
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
|
||||
|
@ -19,8 +19,8 @@ from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
torch.set_float32_matmul_precision("high")
|
||||
if HAS_CUDA:
|
||||
|
@ -9,13 +9,13 @@ from torch._dynamo.utils import counters
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import run_and_get_code
|
||||
from torch.testing import FileCheck
|
||||
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
parametrize,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")
|
||||
|
||||
|
||||
|
@ -3,11 +3,13 @@ import contextlib
|
||||
|
||||
import torch
|
||||
from torch._inductor.dependencies import MemoryDep
|
||||
|
||||
from torch._inductor.graph import GraphLowering
|
||||
from torch._inductor.ir import Buffer, FixedLayout, Pointwise
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch._inductor.utils import sympy_index_symbol
|
||||
from torch._inductor.virtualized import ops, V
|
||||
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
|
||||
|
||||
|
||||
|
@ -10,7 +10,6 @@ from torch._dynamo.testing import CompileCounter
|
||||
from torch.testing._internal.common_utils import IS_MACOS, skipIfRocm, skipIfXpu
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, requires_gpu
|
||||
|
||||
|
||||
# Fake distributed
|
||||
WORLD_SIZE = 2
|
||||
|
||||
|
@ -9,7 +9,6 @@ import unittest
|
||||
import torch
|
||||
from torch import nn
|
||||
|
||||
|
||||
# Make the helper files in test/ importable
|
||||
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
sys.path.append(pytorch_test_dir)
|
||||
@ -17,9 +16,10 @@ sys.path.append(pytorch_test_dir)
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor import config as inductor_config
|
||||
from torch._inductor.test_case import TestCase
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
|
@ -9,7 +9,6 @@ import torch._dynamo
|
||||
import torch.utils.cpp_extension
|
||||
from torch._C import FileCheck
|
||||
|
||||
|
||||
try:
|
||||
from extension_backends.cpp.extension_codegen_backend import (
|
||||
ExtensionCppWrapperCodegen,
|
||||
@ -33,7 +32,6 @@ from torch._inductor.codegen.common import (
|
||||
)
|
||||
from torch.testing._internal.common_utils import IS_FBCODE, IS_MACOS
|
||||
|
||||
|
||||
try:
|
||||
try:
|
||||
from . import test_torchinductor
|
||||
|
@ -5,10 +5,12 @@ import functools
|
||||
import string
|
||||
from collections import namedtuple
|
||||
from typing import Callable, Optional
|
||||
|
||||
from unittest import expectedFailure, skip, skipUnless
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
|
||||
from torch._dynamo.testing import CompileCounterWithBackend, normalize_gm
|
||||
from torch._higher_order_ops.flex_attention import flex_attention as flex_attention_hop
|
||||
from torch._inductor import metrics
|
||||
@ -31,7 +33,6 @@ from torch.testing._internal import common_utils
|
||||
from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_BF16
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
|
||||
# Skip tests if Triton is not available
|
||||
supported_platform = skipUnless(
|
||||
torch.cuda.is_available()
|
||||
|
@ -4,10 +4,12 @@
|
||||
import functools
|
||||
from collections import namedtuple
|
||||
from typing import Callable, Optional
|
||||
|
||||
from unittest import expectedFailure, skip, skipUnless
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
|
||||
from torch._higher_order_ops.flex_attention import flex_attention as flex_attention_hop
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch._inductor.utils import run_and_get_code
|
||||
@ -26,7 +28,6 @@ from torch.testing._internal import common_utils
|
||||
from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_BF16
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
|
||||
# Skip tests if Triton is not available
|
||||
supported_platform = skipUnless(
|
||||
torch.cuda.is_available()
|
||||
|
@ -4,17 +4,19 @@ import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
import torch._inductor
|
||||
|
||||
from torch._inductor.test_case import TestCase
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
IS_FBCODE,
|
||||
parametrize,
|
||||
)
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
from torch.testing._internal.triton_utils import requires_cuda
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
try:
|
||||
|
@ -15,7 +15,6 @@ from torch.testing._internal.common_utils import (
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
torch.set_float32_matmul_precision("high")
|
||||
|
||||
|
||||
|
@ -14,7 +14,6 @@ from torch._inductor.fx_passes.pre_grad import (
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.fx.passes.shape_prop import ShapeProp
|
||||
|
||||
|
||||
PassFunc = Callable[[torch.fx.GraphModule, Any], torch.fx.GraphModule]
|
||||
|
||||
|
||||
|
@ -13,7 +13,6 @@ from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FUSED_ATTENTIO
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
try:
|
||||
import pydot # noqa: F401
|
||||
|
||||
|
@ -11,7 +11,6 @@ from torch._dynamo.utils import counters, optimus_scuba_log
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
try:
|
||||
# importing this will register fbgemm lowerings for inductor
|
||||
import deeplearning.fbgemm.fbgemm_gpu.fb.inductor_lowerings # noqa: F401
|
||||
|
@ -13,6 +13,7 @@ from torch._inductor.codecache import HalideCodeCache
|
||||
from torch._inductor.runtime.hints import HalideInputSpec, HalideMeta
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import parallel_num_threads
|
||||
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_MACOS, IS_WINDOWS
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU
|
||||
from torch.utils._triton import has_triton
|
||||
|
@ -5,10 +5,12 @@ import unittest
|
||||
import sympy
|
||||
|
||||
import torch
|
||||
|
||||
from torch._inductor.codegen.cpp import cexpr
|
||||
from torch._inductor.codegen.triton import texpr
|
||||
from torch._inductor.codegen.wrapper import pexpr
|
||||
from torch._inductor.runtime.runtime_utils import do_bench_gpu
|
||||
|
||||
from torch._inductor.sizevars import SizeVarAllocator
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch._inductor.utils import run_and_get_triton_code
|
||||
@ -24,7 +26,6 @@ from torch.utils._sympy.functions import (
|
||||
RoundToInt,
|
||||
)
|
||||
|
||||
|
||||
DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"
|
||||
|
||||
|
||||
|
@ -9,6 +9,7 @@ import unittest
|
||||
import weakref
|
||||
|
||||
import torch
|
||||
|
||||
from torch import nn
|
||||
from torch._inductor import config
|
||||
from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
@ -17,7 +18,6 @@ from torch.testing import FileCheck
|
||||
from torch.testing._internal.common_cuda import SM80OrLater
|
||||
from torch.testing._internal.common_utils import skipIfRocm
|
||||
|
||||
|
||||
# Make the helper files in test/ importable
|
||||
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
|
||||
sys.path.append(pytorch_test_dir)
|
||||
@ -29,7 +29,6 @@ from torch.testing._internal.common_utils import (
|
||||
TEST_WITH_ROCM,
|
||||
)
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor yet\n"
|
||||
@ -40,13 +39,11 @@ if IS_WINDOWS and IS_CI:
|
||||
|
||||
from inductor.test_torchinductor import check_model, check_model_cuda, copy_tests
|
||||
|
||||
|
||||
importlib.import_module("functorch")
|
||||
importlib.import_module("filelock")
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
prims = torch.ops.prims
|
||||
requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")
|
||||
|
@ -5,9 +5,10 @@ import logging
|
||||
|
||||
import torch
|
||||
from torch._inductor.runtime.runtime_utils import do_bench
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import do_bench_using_profiling
|
||||
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
from torch._inductor.utils import do_bench_using_profiling
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
@ -5,7 +5,6 @@ from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
|
||||
|
@ -11,7 +11,6 @@ from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.common_cuda import tf32_off
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
USE_DDP_WRAPPER = os.environ.get("USE_DDP_WRAPPER", "1") == "1"
|
||||
|
||||
|
||||
|
@ -7,7 +7,6 @@ from torch._inductor import config as inductor_config, metrics
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
if HAS_CUDA:
|
||||
torch.set_default_device("cuda")
|
||||
|
||||
|
@ -2,6 +2,7 @@
|
||||
import json
|
||||
import os
|
||||
import unittest
|
||||
|
||||
from typing import Callable, List, Optional
|
||||
|
||||
import torch
|
||||
@ -23,6 +24,7 @@ from torch._inductor.select_algorithm import (
|
||||
TritonTemplateCaller,
|
||||
)
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
from torch._inductor.utils import fresh_inductor_cache, run_and_get_code
|
||||
from torch._inductor.virtualized import V
|
||||
from torch.fx.experimental.proxy_tensor import make_fx
|
||||
@ -32,8 +34,8 @@ from torch.testing._internal.common_utils import (
|
||||
parametrize,
|
||||
skipIfRocm,
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
|
||||
|
||||
torch.set_float32_matmul_precision("high")
|
||||
if HAS_CUDA:
|
||||
|
@ -1,12 +1,12 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
|
||||
import sys
|
||||
|
||||
import unittest
|
||||
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_memory_planning yet\n"
|
||||
|
@ -5,8 +5,8 @@ from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import collect_defined_kernels
|
||||
from torch._inductor.wrapper_benchmark import get_kernel_category_by_source_code
|
||||
from torch.testing._internal.common_device_type import largeTensorTest
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
example_kernel = """
|
||||
@triton_heuristics.reduction(
|
||||
|
@ -11,7 +11,6 @@ from torch.testing._internal.common_utils import (
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")
|
||||
|
||||
|
||||
|
@ -6,6 +6,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
|
||||
|
||||
from torch._dynamo import config as dynamo_config
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor import config, metrics
|
||||
|
@ -12,7 +12,6 @@ from torch.testing._internal.common_nn import NNTestCase
|
||||
from torch.testing._internal.common_utils import IS_WINDOWS, parametrize, run_tests
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
|
||||
default_atol = {
|
||||
torch.float16: 1e-3,
|
||||
torch.bfloat16: float("infinity"),
|
||||
|
@ -11,7 +11,6 @@ from torch.testing._internal.common_cuda import TEST_MULTIGPU
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
requires_multigpu = functools.partial(
|
||||
unittest.skipIf, not TEST_MULTIGPU, "requires multiple cuda devices"
|
||||
)
|
||||
|
@ -7,6 +7,7 @@ import unittest
|
||||
import torch
|
||||
from torch import nn
|
||||
from torch._dynamo.testing import reset_rng_state
|
||||
|
||||
from torch._inductor import config, test_operators
|
||||
from torch._inductor.codegen.multi_kernel import MultiKernelCall
|
||||
from torch._inductor.test_case import TestCase
|
||||
|
@ -2,6 +2,7 @@
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
import torch._inductor.config as inductor_config
|
||||
from torch._dynamo.testing import rand_strided
|
||||
from torch._inductor.fx_passes.pad_mm import (
|
||||
@ -10,6 +11,7 @@ from torch._inductor.fx_passes.pad_mm import (
|
||||
get_padded_length,
|
||||
should_pad_common,
|
||||
)
|
||||
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import fresh_inductor_cache, run_and_get_code
|
||||
from torch.testing import FileCheck
|
||||
|
@ -1,5 +1,6 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import copy
|
||||
|
||||
import functools
|
||||
import os
|
||||
import unittest
|
||||
@ -16,7 +17,6 @@ from torch._inductor.utils import run_and_get_code
|
||||
from torch.testing._internal.common_utils import serialTest
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"
|
||||
DO_ACC_TEST = os.environ.get("DO_ACC_TEST", "1") == "1"
|
||||
WITH_STACK = os.environ.get("WITH_STACK") == "1"
|
||||
|
@ -11,6 +11,7 @@ import torch.nn.functional as F
|
||||
from torch._dynamo.utils import count_calls, counters
|
||||
from torch._higher_order_ops.out_dtype import out_dtype
|
||||
from torch._inductor.fx_passes import joint_graph
|
||||
|
||||
from torch._inductor.pattern_matcher import (
|
||||
Arg,
|
||||
CallFunction,
|
||||
|
@ -3,6 +3,7 @@ import contextlib
|
||||
from unittest.mock import patch
|
||||
|
||||
import functorch
|
||||
|
||||
import torch
|
||||
import torch._inductor.config as config
|
||||
import torch.autograd
|
||||
@ -24,11 +25,10 @@ from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
#
|
||||
# That may still be aceeptable, but be aware that you are likely lowering
|
||||
# performance for that setting.
|
||||
#
|
||||
|
||||
# Defines all the kernels for tests
|
||||
from torch.testing._internal.triton_utils import HAS_CUDA, requires_cuda
|
||||
|
||||
|
||||
if HAS_CUDA:
|
||||
from torch.testing._internal.triton_utils import add_kernel
|
||||
|
||||
|
@ -5,12 +5,14 @@ import unittest
|
||||
import torch
|
||||
import torch._inductor.test_case
|
||||
import torch._inductor.utils
|
||||
|
||||
from torch._inductor import config
|
||||
from torch.profiler import ProfilerActivity
|
||||
|
||||
from torch.testing._internal.common_utils import TemporaryFileName
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
from torch.utils._triton import has_triton
|
||||
|
||||
HAS_TRITON = has_triton()
|
||||
|
||||
|
@ -12,7 +12,6 @@ from torch._inductor.runtime.runtime_utils import do_bench_gpu as do_bench
|
||||
from torch._inductor.test_case import TestCase
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
|
||||
DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"
|
||||
|
||||
|
||||
|
@ -11,10 +11,10 @@ from torch._dynamo.testing import expectedFailureDynamicWrapper
|
||||
from torch._dynamo.utils import counters
|
||||
from torch._inductor.autotune_process import TritonBenchmarkRequest
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
||||
|
||||
|
@ -4,6 +4,7 @@ import unittest
|
||||
|
||||
import torch
|
||||
import torch._logging
|
||||
|
||||
from torch._inductor.test_case import TestCase
|
||||
from torch.testing._internal.common_utils import IS_LINUX
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU
|
||||
|
@ -5,6 +5,7 @@ from unittest import skipIf
|
||||
|
||||
import torch
|
||||
import torch.distributed as dist
|
||||
|
||||
from torch._inductor import config, metrics
|
||||
from torch._inductor.comm_analysis import estimate_nccl_collective_runtime
|
||||
from torch._inductor.compile_fx import compile_fx, compile_fx_inner
|
||||
@ -12,7 +13,6 @@ from torch._inductor.test_case import TestCase as InductorTestCase
|
||||
from torch._inductor.utils import is_collective
|
||||
from torch.testing._internal.inductor_utils import HAS_CUDA
|
||||
|
||||
|
||||
aten = torch.ops.aten
|
||||
c10d = torch.ops.c10d_functional
|
||||
_c10d = torch.ops._c10d_functional
|
||||
|
@ -6,6 +6,7 @@ import torch._inductor
|
||||
import torch._inductor.decomposition
|
||||
from torch._higher_order_ops.torchbind import enable_torchbind_tracing
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
|
||||
from torch.testing._internal.torchbind_impls import init_torchbind_implementations
|
||||
|
||||
|
||||
|
@ -26,6 +26,7 @@ from unittest.mock import patch
|
||||
import numpy as np
|
||||
|
||||
import torch
|
||||
|
||||
import torch._dynamo.config as dynamo_config
|
||||
import torch._inductor.aoti_eager
|
||||
import torch.nn as nn
|
||||
@ -66,6 +67,7 @@ from torch.testing._internal.common_cuda import (
|
||||
TEST_CUDNN,
|
||||
with_tf32_off,
|
||||
)
|
||||
|
||||
from torch.testing._internal.common_device_type import (
|
||||
_has_sufficient_memory,
|
||||
expectedFailureXPU,
|
||||
@ -93,7 +95,6 @@ from torch.utils._python_dispatch import TorchDispatchMode
|
||||
from torch.utils._pytree import tree_flatten, tree_unflatten
|
||||
from torch.utils.weak import WeakTensorKeyDictionary
|
||||
|
||||
|
||||
DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
@ -108,12 +109,14 @@ importlib.import_module("functorch")
|
||||
importlib.import_module("filelock")
|
||||
|
||||
from torch._inductor import config, test_operators
|
||||
|
||||
from torch._inductor.compile_fx import (
|
||||
compile_fx,
|
||||
compile_fx_inner,
|
||||
complex_memory_overlap,
|
||||
)
|
||||
from torch._inductor.utils import has_torchvision_roi_align
|
||||
|
||||
from torch.testing._internal.common_utils import slowTest
|
||||
from torch.testing._internal.inductor_utils import (
|
||||
GPU_TYPE,
|
||||
@ -125,7 +128,6 @@ from torch.testing._internal.inductor_utils import (
|
||||
skipCUDAIf,
|
||||
)
|
||||
|
||||
|
||||
HAS_AVX2 = "fbgemm" in torch.backends.quantized.supported_engines
|
||||
|
||||
aten = torch.ops.aten
|
||||
|
@ -20,7 +20,6 @@ from torch.testing._internal.inductor_utils import (
|
||||
HAS_GPU,
|
||||
)
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor_codegen_dynamic_shapes yet\n"
|
||||
|
@ -1,6 +1,7 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import contextlib
|
||||
import importlib
|
||||
|
||||
import math
|
||||
import operator
|
||||
import os
|
||||
@ -36,7 +37,6 @@ from torch.testing._internal.common_utils import (
|
||||
)
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_torchinductor_dynamic_shapes yet\n"
|
||||
@ -56,7 +56,6 @@ from inductor.test_torchinductor import (
|
||||
TestFailure,
|
||||
)
|
||||
|
||||
|
||||
importlib.import_module("filelock")
|
||||
|
||||
# xfail by default, set is_skip=True to skip
|
||||
|
@ -11,6 +11,7 @@ from functools import partial
|
||||
from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
|
||||
from torch._dispatch.python import enable_python_dispatcher
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._subclasses.fake_tensor import (
|
||||
@ -44,7 +45,6 @@ from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_CUDA
|
||||
from torch.utils._python_dispatch import TorchDispatchMode
|
||||
from torch.utils._pytree import tree_map
|
||||
|
||||
|
||||
try:
|
||||
try:
|
||||
from .test_torchinductor import check_model, check_model_gpu
|
||||
@ -263,7 +263,6 @@ intentionally_not_handled = {
|
||||
# We should eventually always turn it on
|
||||
import torch._functorch.config as functorch_config
|
||||
|
||||
|
||||
if not functorch_config.view_replay_for_aliased_outputs:
|
||||
intentionally_not_handled['("as_strided", "partial_views")'] = {
|
||||
b8,
|
||||
|
@ -8,7 +8,6 @@ import torch
|
||||
import torch._dynamo
|
||||
import torch.utils.cpp_extension
|
||||
|
||||
|
||||
try:
|
||||
from extension_backends.triton.device_interface import DeviceInterface
|
||||
from extension_backends.triton.extension_codegen_backend import (
|
||||
@ -36,7 +35,6 @@ from torch._inductor.codegen.common import (
|
||||
from torch._inductor.utils import get_triton_code
|
||||
from torch.testing._internal.common_utils import IS_MACOS
|
||||
|
||||
|
||||
try:
|
||||
try:
|
||||
from . import test_torchinductor
|
||||
|
@ -4,10 +4,10 @@ import sys
|
||||
import unittest
|
||||
|
||||
import torch
|
||||
|
||||
from torch.testing._internal.common_utils import IS_LINUX, skipIfXpu
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU
|
||||
|
||||
|
||||
try:
|
||||
import triton # noqa: F401
|
||||
except ImportError:
|
||||
|
@ -6,7 +6,9 @@ from unittest.mock import patch
|
||||
|
||||
import torch
|
||||
import torch._dynamo.testing
|
||||
|
||||
import torch._inductor.test_case
|
||||
|
||||
from torch._higher_order_ops.triton_kernel_wrap import (
|
||||
generate_ttir,
|
||||
triton_kernel_wrapper_functional,
|
||||
@ -17,11 +19,10 @@ from torch._inductor.utils import run_and_get_code
|
||||
from torch._library import capture_triton
|
||||
from torch.testing._internal import common_utils
|
||||
from torch.testing._internal.common_utils import skipIfRocm, skipIfXpu, TEST_WITH_ROCM
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU, HAS_XPU
|
||||
|
||||
# Defines all the kernels for tests
|
||||
from torch.testing._internal.triton_utils import * # noqa: F403
|
||||
|
||||
from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU, HAS_XPU
|
||||
|
||||
if HAS_GPU:
|
||||
import triton
|
||||
|
@ -7,7 +7,6 @@ import unittest
|
||||
import torch
|
||||
from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS
|
||||
|
||||
|
||||
if IS_WINDOWS and IS_CI:
|
||||
sys.stderr.write(
|
||||
"Windows CI does not have necessary dependencies for test_xpu_basic yet\n"
|
||||
|
@ -43,6 +43,7 @@ ISORT_SKIPLIST = re.compile(
|
||||
"test/dy*/**",
|
||||
# test/[e-h]*/**
|
||||
# test/i*/**
|
||||
"test/i*/**",
|
||||
# test/j*/**
|
||||
"test/j*/**",
|
||||
# test/[k-p]*/**
|
||||
|
Reference in New Issue
Block a user