Revert "[BE][Easy][12/19] enforce style for empty lines in import segments in test/i*/ (#129763)"

This reverts commit aecc746fccc4495313167e3a7f94210daf457e1d. Reverted https://github.com/pytorch/pytorch/pull/129763 on behalf of https://github.com/XuehaiPan due to need reland after rerunning lintrunner on main ([comment](https://github.com/pytorch/pytorch/pull/129763#issuecomment-2235736732))
2025-10-21 05:34:18 +08:00 · 2024-07-18 06:39:58 +00:00
parent 6c2c8ee15b
commit b732b52f1e
73 changed files with 90 additions and 72 deletions
--- a/test/inductor/indirect_assert_helper.py
+++ b/test/inductor/indirect_assert_helper.py
@ -1,6 +1,7 @@
 import sys

 import torch
+
 from torch.testing._internal.inductor_utils import GPU_TYPE


--- a/test/inductor/minifier_smoke.py
+++ b/test/inductor/minifier_smoke.py
@ -4,7 +4,6 @@
 # https://docs.google.com/document/d/18L9e7bZSBpJ7gGbwlUV13LasmjiEX2lree2pl-SdbCU/edit
 import os

-
 os.environ["TORCHDYNAMO_REPRO_AFTER"] = "dynamo"
 import torch
 import torch._dynamo as torchdynamo
--- a/test/inductor/opinfo_harness.py
+++ b/test/inductor/opinfo_harness.py
@ -3,7 +3,6 @@ import subprocess

 from torch.testing._internal.common_methods_invocations import op_db

-
 if __name__ == "__main__":
    i = 0
    while i < len(op_db):
--- a/test/inductor/test_aot_inductor.py
+++ b/test/inductor/test_aot_inductor.py
@ -20,6 +20,7 @@ from torch._inductor import config
 from torch._inductor.exc import CppWrapperCodeGenError
 from torch._inductor.runtime.runtime_utils import cache_dir
 from torch._inductor.test_case import TestCase
+
 from torch.export import Dim, export
 from torch.testing import FileCheck
 from torch.testing._internal import common_utils
@ -36,10 +37,10 @@ from torch.testing._internal.common_utils import (
    skipIfRocm,
    TEST_WITH_ROCM,
 )
+
 from torch.testing._internal.triton_utils import HAS_CUDA, requires_cuda
 from torch.utils import _pytree as pytree

-
 if HAS_CUDA:
    import triton

--- a/test/inductor/test_aot_inductor_utils.py
+++ b/test/inductor/test_aot_inductor_utils.py
@ -5,7 +5,9 @@ import torch._export
 import torch._inductor
 import torch.export._trace
 import torch.fx._pytree as fx_pytree
+
 from torch.testing._internal.common_utils import IS_FBCODE
+
 from torch.utils import _pytree as pytree


--- a/test/inductor/test_autoheuristic.py
+++ b/test/inductor/test_autoheuristic.py
@ -3,7 +3,9 @@ import os
 import unittest

 import torch
+
 import torch._inductor.config as inductor_config
+
 from torch._inductor.autoheuristic.autoheuristic import (
    AHContext,
    AutoHeuristic,
--- a/test/inductor/test_benchmark_fusion.py
+++ b/test/inductor/test_benchmark_fusion.py
@ -13,8 +13,8 @@ from torch.testing._internal.common_utils import (
    slowTest,
    TEST_WITH_ASAN,
 )
-from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

+from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

 # Make the helper files in test/ importable
 pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
--- a/test/inductor/test_binary_folding.py
+++ b/test/inductor/test_binary_folding.py
@ -11,7 +11,6 @@ from torch import nn
 from torch._inductor import config as inductor_config
 from torch.testing._internal.common_cuda import TEST_CUDNN

-
 # Make the helper files in test/ importable
 pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(pytorch_test_dir)
@ -19,7 +18,6 @@ sys.path.append(pytorch_test_dir)
 from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
 from torch.testing._internal.inductor_utils import skipCUDAIf

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_torchinductor yet\n"
@ -31,13 +29,11 @@ if IS_WINDOWS and IS_CI:
 from inductor.test_inductor_freezing import TestCase
 from inductor.test_torchinductor import check_model, check_model_gpu, copy_tests

-
 importlib.import_module("functorch")
 importlib.import_module("filelock")

 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU

-
 aten = torch.ops.aten


--- a/test/inductor/test_ck_backend.py
+++ b/test/inductor/test_ck_backend.py
@ -6,13 +6,13 @@ import unittest
 import torch
 from torch._inductor import config
 from torch._inductor.test_case import run_tests, TestCase
+
 from torch.testing._internal.common_utils import (
    instantiate_parametrized_tests,
    parametrize,
 )
 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

-
 torch.set_float32_matmul_precision("high")
 if HAS_CUDA:
    torch.cuda.memory._set_allocator_settings("expandable_segments:False")
--- a/test/inductor/test_codecache.py
+++ b/test/inductor/test_codecache.py
@ -40,7 +40,6 @@ from torch.testing._internal.inductor_utils import (
 )
 from torch.utils._triton import has_triton

-
 HAS_TRITON = has_triton()

 if HAS_TRITON:
--- a/test/inductor/test_codegen_triton.py
+++ b/test/inductor/test_codegen_triton.py
@ -4,12 +4,14 @@ import contextlib
 import sympy

 import torch
+
 import torch._inductor.config as inductor_config
 from torch._inductor.codegen import triton_utils
 from torch._inductor.codegen.common import SizeArg
 from torch._inductor.graph import GraphLowering
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch._inductor.virtualized import V
+
 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_GPU


--- a/test/inductor/test_compile_worker.py
+++ b/test/inductor/test_compile_worker.py
@ -7,6 +7,7 @@ from torch._inductor.compile_worker.subproc_pool import (
    SubprocException,
    SubprocPool,
 )
+
 from torch._inductor.test_case import TestCase
 from torch.testing._internal.inductor_utils import HAS_CPU

--- a/test/inductor/test_compiled_autograd.py
+++ b/test/inductor/test_compiled_autograd.py
@ -20,7 +20,6 @@ from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
 from torch.testing._internal.logging_utils import logs_to_string

-
 # note: these tests are not run on windows due to inductor_utils.HAS_CPU


--- a/test/inductor/test_compiled_optimizers.py
+++ b/test/inductor/test_compiled_optimizers.py
@ -4,15 +4,19 @@ import sys
 import unittest
 import weakref
 from contextlib import ExitStack
+
 from copy import deepcopy
 from typing import NamedTuple

 import torch
+
 import torch._inductor
 import torch._inductor.cudagraph_trees
 import torch.optim.lr_scheduler
 from torch._inductor import config
+
 from torch._inductor.test_case import TestCase
+
 from torch.optim import (
    Adadelta,
    Adagrad,
@ -27,6 +31,7 @@ from torch.optim import (
    SGD,
    SparseAdam,
 )
+
 from torch.optim.lr_scheduler import (
    ChainedScheduler,
    ConstantLR,
@ -43,15 +48,18 @@ from torch.optim.lr_scheduler import (
    ReduceLROnPlateau,
    StepLR,
 )
+
 from torch.testing._internal.common_device_type import (
    instantiate_device_type_tests,
    skipCUDAIf,
 )
+
 from torch.testing._internal.common_optimizers import (
    _get_optim_inputs_including_global_cliquey_kwargs,
    optim_db,
    optims,
 )
+
 from torch.testing._internal.common_utils import parametrize
 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA, has_triton
 from torch.testing._internal.triton_utils import requires_cuda
--- a/test/inductor/test_config.py
+++ b/test/inductor/test_config.py
@ -3,7 +3,9 @@ import math
 import unittest

 import torch
+
 from torch._inductor import config
+
 from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.inductor_utils import HAS_CPU

--- a/test/inductor/test_control_flow.py
+++ b/test/inductor/test_control_flow.py
@ -3,6 +3,7 @@ import itertools

 import torch
 import torch._dynamo.testing
+
 from torch._inductor.test_case import TestCase
 from torch.testing._internal.common_utils import (
    instantiate_parametrized_tests,
--- a/test/inductor/test_coordinate_descent_tuner.py
+++ b/test/inductor/test_coordinate_descent_tuner.py
@ -6,11 +6,11 @@ from unittest import mock

 import torch
 from torch._inductor.runtime.hints import TRITON_MAX_BLOCK
+
 from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.common_utils import IS_LINUX
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU

-
 try:
    import triton
 except ImportError:
@ -21,7 +21,6 @@ except ImportError:
 from torch._inductor import config
 from torch._inductor.runtime.coordinate_descent_tuner import CoordescTuner

-
 config.benchmark_kernel = True
 config.coordinate_descent_tuning = True

--- a/test/inductor/test_cpp_wrapper_hipify.py
+++ b/test/inductor/test_cpp_wrapper_hipify.py
@ -1,7 +1,9 @@
 # Owner(s): ["module: inductor"]
 import torch
+
 from torch._inductor.codegen.aoti_hipify_utils import maybe_hipify_code_wrapper
 from torch._inductor.codegen.codegen_device_driver import cuda_kernel_driver
+
 from torch._inductor.test_case import run_tests, TestCase


--- a/test/inductor/test_cpu_repro.py
+++ b/test/inductor/test_cpu_repro.py
@ -45,7 +45,6 @@ from torch.testing._internal.common_utils import (
 )
 from torch.utils._python_dispatch import TorchDispatchMode

-
 try:
    try:
        from . import test_torchinductor
--- a/test/inductor/test_cpu_select_algorithm.py
+++ b/test/inductor/test_cpu_select_algorithm.py
@ -1,6 +1,7 @@
 # Owner(s): ["oncall: cpu inductor"]
 import contextlib
 import functools
+
 import sys
 import unittest
 from typing import Optional
@ -19,8 +20,8 @@ from torch.testing._internal.common_device_type import (
    instantiate_device_type_tests,
 )
 from torch.testing._internal.common_quantization import _generate_qdq_quantized_model
-from torch.testing._internal.common_utils import IS_MACOS, parametrize, TEST_MKL

+from torch.testing._internal.common_utils import IS_MACOS, parametrize, TEST_MKL

 try:
    try:
--- a/test/inductor/test_cuda_repro.py
+++ b/test/inductor/test_cuda_repro.py
@ -29,8 +29,8 @@ from torch.testing._internal.common_utils import (
    skipIfRocm,
    TEST_WITH_ASAN,
 )
-from torch.testing._internal.inductor_utils import skipCUDAIf

+from torch.testing._internal.inductor_utils import skipCUDAIf

 try:
    try:
--- a/test/inductor/test_cudacodecache.py
+++ b/test/inductor/test_cudacodecache.py
@ -4,6 +4,7 @@ import ctypes
 import unittest

 import torch
+
 from torch._inductor import config
 from torch._inductor.async_compile import AsyncCompile
 from torch._inductor.codecache import CUDACodeCache
@ -11,7 +12,6 @@ from torch._inductor.codegen.cuda.cuda_env import nvcc_exist
 from torch._inductor.exc import CUDACompileError
 from torch._inductor.test_case import TestCase as InductorTestCase

-
 _SOURCE_CODE = r"""

 #include <stdio.h>
--- a/test/inductor/test_cudagraph_trees.py
+++ b/test/inductor/test_cudagraph_trees.py
@ -8,6 +8,7 @@ import unittest
 import warnings

 import torch
+
 import torch._dynamo.config as dynamo_config
 import torch.nn as nn
 from torch._dynamo.utils import counters
@ -18,6 +19,7 @@ from torch._inductor.cudagraph_utils import FunctionID
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch.fx.experimental.proxy_tensor import make_fx
 from torch.testing import FileCheck
+
 from torch.testing._internal.common_cuda import TEST_MULTIGPU
 from torch.testing._internal.common_utils import (
    instantiate_parametrized_tests,
@ -31,7 +33,6 @@ from torch.testing._internal.common_utils import (
 )
 from torch.utils._python_dispatch import TorchDispatchMode

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_torchinductor yet\n"
@ -45,7 +46,6 @@ importlib.import_module("filelock")

 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

-
 aten = torch.ops.aten
 requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")
 requires_multigpu = functools.partial(
--- a/test/inductor/test_cudagraph_trees_expandable_segments.py
+++ b/test/inductor/test_cudagraph_trees_expandable_segments.py
@ -6,6 +6,7 @@ import pathlib
 import sys

 import torch
+
 from torch.testing._internal.common_cuda import IS_JETSON, IS_WINDOWS
 from torch.testing._internal.common_utils import (
    run_tests,
@ -14,7 +15,6 @@ from torch.testing._internal.common_utils import (
 )
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(pytorch_test_dir)

@ -26,7 +26,6 @@ REPO_ROOT = pathlib.Path(__file__).resolve().parent.parent.parent
 sys.path.insert(0, str(REPO_ROOT))
 from tools.stats.import_test_stats import get_disabled_tests

-
 # Make sure to remove REPO_ROOT after import is done
 sys.path.remove(str(REPO_ROOT))

--- a/test/inductor/test_custom_lowering.py
+++ b/test/inductor/test_custom_lowering.py
@ -4,11 +4,13 @@ import unittest
 from functools import partial

 import torch
+
 from torch._inductor.ir import Pointwise
 from torch._inductor.lowering import make_pointwise, register_lowering
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch._inductor.virtualized import ops
 from torch.testing._internal.common_utils import skipIfRocm
+
 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA


--- a/test/inductor/test_custom_post_grad_passes.py
+++ b/test/inductor/test_custom_post_grad_passes.py
@ -7,10 +7,13 @@ import torch
 import torch._inductor.pattern_matcher as pattern_matcher
 import torch.fx as fx
 from torch._dynamo.utils import counters
+
 from torch._inductor import config
 from torch._inductor.lowering import lowerings as L
 from torch._inductor.pattern_matcher import Arg, CallFunction, PatternMatcherPass
+
 from torch._inductor.test_case import run_tests, TestCase
+
 from torch.testing._internal.common_utils import IS_LINUX
 from torch.testing._internal.inductor_utils import HAS_CPU

--- a/test/inductor/test_cutlass_backend.py
+++ b/test/inductor/test_cutlass_backend.py
@ -19,8 +19,8 @@ from torch.testing._internal.common_utils import (
    instantiate_parametrized_tests,
    parametrize,
 )
-from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

+from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

 torch.set_float32_matmul_precision("high")
 if HAS_CUDA:
--- a/test/inductor/test_decompose_mem_bound_mm.py
+++ b/test/inductor/test_decompose_mem_bound_mm.py
@ -9,13 +9,13 @@ from torch._dynamo.utils import counters
 from torch._inductor.test_case import run_tests, TestCase
 from torch._inductor.utils import run_and_get_code
 from torch.testing import FileCheck
+
 from torch.testing._internal.common_utils import (
    instantiate_parametrized_tests,
    parametrize,
 )
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")


--- a/test/inductor/test_dependencies.py
+++ b/test/inductor/test_dependencies.py
@ -3,11 +3,13 @@ import contextlib

 import torch
 from torch._inductor.dependencies import MemoryDep
+
 from torch._inductor.graph import GraphLowering
 from torch._inductor.ir import Buffer, FixedLayout, Pointwise
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch._inductor.utils import sympy_index_symbol
 from torch._inductor.virtualized import ops, V
+
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU


--- a/test/inductor/test_distributed_patterns.py
+++ b/test/inductor/test_distributed_patterns.py
@ -10,7 +10,6 @@ from torch._dynamo.testing import CompileCounter
 from torch.testing._internal.common_utils import IS_MACOS, skipIfRocm, skipIfXpu
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, requires_gpu

-
 # Fake distributed
 WORLD_SIZE = 2

--- a/test/inductor/test_efficient_conv_bn_eval.py
+++ b/test/inductor/test_efficient_conv_bn_eval.py
@ -9,7 +9,6 @@ import unittest
 import torch
 from torch import nn

-
 # Make the helper files in test/ importable
 pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(pytorch_test_dir)
@ -17,9 +16,10 @@ sys.path.append(pytorch_test_dir)
 from torch._dynamo.utils import counters
 from torch._inductor import config as inductor_config
 from torch._inductor.test_case import TestCase
-from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
-from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

+from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, TEST_WITH_ASAN
+
+from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
--- a/test/inductor/test_extension_backend.py
+++ b/test/inductor/test_extension_backend.py
@ -9,7 +9,6 @@ import torch._dynamo
 import torch.utils.cpp_extension
 from torch._C import FileCheck

-
 try:
    from extension_backends.cpp.extension_codegen_backend import (
        ExtensionCppWrapperCodegen,
@ -33,7 +32,6 @@ from torch._inductor.codegen.common import (
 )
 from torch.testing._internal.common_utils import IS_FBCODE, IS_MACOS

-
 try:
    try:
        from . import test_torchinductor
--- a/test/inductor/test_flex_attention.py
+++ b/test/inductor/test_flex_attention.py
@ -5,10 +5,12 @@ import functools
 import string
 from collections import namedtuple
 from typing import Callable, Optional
+
 from unittest import expectedFailure, skip, skipUnless
 from unittest.mock import patch

 import torch
+
 from torch._dynamo.testing import CompileCounterWithBackend, normalize_gm
 from torch._higher_order_ops.flex_attention import flex_attention as flex_attention_hop
 from torch._inductor import metrics
@ -31,7 +33,6 @@ from torch.testing._internal import common_utils
 from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_BF16
 from torch.utils._triton import has_triton

-
 # Skip tests if Triton is not available
 supported_platform = skipUnless(
    torch.cuda.is_available()
--- a/test/inductor/test_flex_decoding.py
+++ b/test/inductor/test_flex_decoding.py
@ -4,10 +4,12 @@
 import functools
 from collections import namedtuple
 from typing import Callable, Optional
+
 from unittest import expectedFailure, skip, skipUnless
 from unittest.mock import patch

 import torch
+
 from torch._higher_order_ops.flex_attention import flex_attention as flex_attention_hop
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch._inductor.utils import run_and_get_code
@ -26,7 +28,6 @@ from torch.testing._internal import common_utils
 from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_BF16
 from torch.utils._triton import has_triton

-
 # Skip tests if Triton is not available
 supported_platform = skipUnless(
    torch.cuda.is_available()
--- a/test/inductor/test_foreach.py
+++ b/test/inductor/test_foreach.py
@ -4,17 +4,19 @@ import sys
 import unittest

 import torch
+
 import torch._inductor
+
 from torch._inductor.test_case import TestCase
 from torch.testing._internal.common_utils import (
    instantiate_parametrized_tests,
    IS_FBCODE,
    parametrize,
 )
+
 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA
 from torch.testing._internal.triton_utils import requires_cuda

-
 aten = torch.ops.aten

 try:
--- a/test/inductor/test_fp8.py
+++ b/test/inductor/test_fp8.py
@ -15,7 +15,6 @@ from torch.testing._internal.common_utils import (
 )
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 torch.set_float32_matmul_precision("high")


--- a/test/inductor/test_fx_fusion.py
+++ b/test/inductor/test_fx_fusion.py
@ -14,7 +14,6 @@ from torch._inductor.fx_passes.pre_grad import (
 from torch._inductor.test_case import run_tests, TestCase
 from torch.fx.passes.shape_prop import ShapeProp

-
 PassFunc = Callable[[torch.fx.GraphModule, Any], torch.fx.GraphModule]


--- a/test/inductor/test_graph_transform_observer.py
+++ b/test/inductor/test_graph_transform_observer.py
@ -13,7 +13,6 @@ from torch.testing._internal.common_cuda import PLATFORM_SUPPORTS_FUSED_ATTENTIO
 from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 try:
    import pydot  # noqa: F401

--- a/test/inductor/test_group_batch_fusion.py
+++ b/test/inductor/test_group_batch_fusion.py
@ -11,7 +11,6 @@ from torch._dynamo.utils import counters, optimus_scuba_log
 from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 try:
    # importing this will register fbgemm lowerings for inductor
    import deeplearning.fbgemm.fbgemm_gpu.fb.inductor_lowerings  # noqa: F401
--- a/test/inductor/test_halide.py
+++ b/test/inductor/test_halide.py
@ -13,6 +13,7 @@ from torch._inductor.codecache import HalideCodeCache
 from torch._inductor.runtime.hints import HalideInputSpec, HalideMeta
 from torch._inductor.test_case import run_tests, TestCase
 from torch._inductor.utils import parallel_num_threads
+
 from torch.testing._internal.common_utils import IS_CI, IS_MACOS, IS_WINDOWS
 from torch.testing._internal.inductor_utils import HAS_CPU
 from torch.utils._triton import has_triton
--- a/test/inductor/test_indexing.py
+++ b/test/inductor/test_indexing.py
@ -5,10 +5,12 @@ import unittest
 import sympy

 import torch
+
 from torch._inductor.codegen.cpp import cexpr
 from torch._inductor.codegen.triton import texpr
 from torch._inductor.codegen.wrapper import pexpr
 from torch._inductor.runtime.runtime_utils import do_bench_gpu
+
 from torch._inductor.sizevars import SizeVarAllocator
 from torch._inductor.test_case import TestCase as InductorTestCase
 from torch._inductor.utils import run_and_get_triton_code
@ -24,7 +26,6 @@ from torch.utils._sympy.functions import (
    RoundToInt,
 )

-
 DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"


--- a/test/inductor/test_inductor_freezing.py
+++ b/test/inductor/test_inductor_freezing.py
@ -9,6 +9,7 @@ import unittest
 import weakref

 import torch
+
 from torch import nn
 from torch._inductor import config
 from torch._inductor.test_case import TestCase as InductorTestCase
@ -17,7 +18,6 @@ from torch.testing import FileCheck
 from torch.testing._internal.common_cuda import SM80OrLater
 from torch.testing._internal.common_utils import skipIfRocm

-
 # Make the helper files in test/ importable
 pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(pytorch_test_dir)
@ -29,7 +29,6 @@ from torch.testing._internal.common_utils import (
    TEST_WITH_ROCM,
 )

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_torchinductor yet\n"
@ -40,13 +39,11 @@ if IS_WINDOWS and IS_CI:

 from inductor.test_torchinductor import check_model, check_model_cuda, copy_tests

-
 importlib.import_module("functorch")
 importlib.import_module("filelock")

 from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

-
 aten = torch.ops.aten
 prims = torch.ops.prims
 requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")
--- a/test/inductor/test_inductor_utils.py
+++ b/test/inductor/test_inductor_utils.py
@ -5,9 +5,10 @@ import logging

 import torch
 from torch._inductor.runtime.runtime_utils import do_bench
-from torch._inductor.test_case import run_tests, TestCase
-from torch._inductor.utils import do_bench_using_profiling

+from torch._inductor.test_case import run_tests, TestCase
+
+from torch._inductor.utils import do_bench_using_profiling

 log = logging.getLogger(__name__)

--- a/test/inductor/test_inplacing_pass.py
+++ b/test/inductor/test_inplacing_pass.py
@ -5,7 +5,6 @@ from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.common_utils import IS_LINUX
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 aten = torch.ops.aten


--- a/test/inductor/test_layout_optim.py
+++ b/test/inductor/test_layout_optim.py
@ -11,7 +11,6 @@ from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.common_cuda import tf32_off
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 USE_DDP_WRAPPER = os.environ.get("USE_DDP_WRAPPER", "1") == "1"


--- a/test/inductor/test_loop_ordering.py
+++ b/test/inductor/test_loop_ordering.py
@ -7,7 +7,6 @@ from torch._inductor import config as inductor_config, metrics
 from torch._inductor.test_case import run_tests, TestCase
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 if HAS_CUDA:
    torch.set_default_device("cuda")

--- a/test/inductor/test_max_autotune.py
+++ b/test/inductor/test_max_autotune.py
@ -2,6 +2,7 @@
 import json
 import os
 import unittest
+
 from typing import Callable, List, Optional

 import torch
@ -23,6 +24,7 @@ from torch._inductor.select_algorithm import (
    TritonTemplateCaller,
 )
 from torch._inductor.test_case import run_tests, TestCase
+
 from torch._inductor.utils import fresh_inductor_cache, run_and_get_code
 from torch._inductor.virtualized import V
 from torch.fx.experimental.proxy_tensor import make_fx
@ -32,8 +34,8 @@ from torch.testing._internal.common_utils import (
    parametrize,
    skipIfRocm,
 )
-from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

+from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA

 torch.set_float32_matmul_precision("high")
 if HAS_CUDA:
--- a/test/inductor/test_memory_planning.py
+++ b/test/inductor/test_memory_planning.py
@ -1,12 +1,12 @@
 # Owner(s): ["module: inductor"]

 import sys
+
 import unittest

 from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS, skipIfRocm
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_memory_planning yet\n"
--- a/test/inductor/test_metrics.py
+++ b/test/inductor/test_metrics.py
@ -5,8 +5,8 @@ from torch._inductor.test_case import run_tests, TestCase
 from torch._inductor.utils import collect_defined_kernels
 from torch._inductor.wrapper_benchmark import get_kernel_category_by_source_code
 from torch.testing._internal.common_device_type import largeTensorTest
-from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU

+from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU

 example_kernel = """
@triton_heuristics.reduction(
--- a/test/inductor/test_minifier_isolate.py
+++ b/test/inductor/test_minifier_isolate.py
@ -11,7 +11,6 @@ from torch.testing._internal.common_utils import (
 )
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 requires_cuda = unittest.skipUnless(HAS_CUDA, "requires cuda")


--- a/test/inductor/test_mkldnn_pattern_matcher.py
+++ b/test/inductor/test_mkldnn_pattern_matcher.py
@ -6,6 +6,7 @@ import unittest

 import torch
 import torch.ao.quantization.quantizer.x86_inductor_quantizer as xiq
+
 from torch._dynamo import config as dynamo_config
 from torch._dynamo.utils import counters
 from torch._inductor import config, metrics
--- a/test/inductor/test_mmdecomp.py
+++ b/test/inductor/test_mmdecomp.py
@ -12,7 +12,6 @@ from torch.testing._internal.common_nn import NNTestCase
 from torch.testing._internal.common_utils import IS_WINDOWS, parametrize, run_tests
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU

-
 default_atol = {
    torch.float16: 1e-3,
    torch.bfloat16: float("infinity"),
--- a/test/inductor/test_move_constructors_to_cuda.py
+++ b/test/inductor/test_move_constructors_to_cuda.py
@ -11,7 +11,6 @@ from torch.testing._internal.common_cuda import TEST_MULTIGPU
 from torch.testing._internal.common_utils import IS_LINUX
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 requires_multigpu = functools.partial(
    unittest.skipIf, not TEST_MULTIGPU, "requires multiple cuda devices"
 )
--- a/test/inductor/test_multi_kernel.py
+++ b/test/inductor/test_multi_kernel.py
@ -7,6 +7,7 @@ import unittest
 import torch
 from torch import nn
 from torch._dynamo.testing import reset_rng_state
+
 from torch._inductor import config, test_operators
 from torch._inductor.codegen.multi_kernel import MultiKernelCall
 from torch._inductor.test_case import TestCase
--- a/test/inductor/test_pad_mm.py
+++ b/test/inductor/test_pad_mm.py
@ -2,6 +2,7 @@
 import unittest

 import torch
+
 import torch._inductor.config as inductor_config
 from torch._dynamo.testing import rand_strided
 from torch._inductor.fx_passes.pad_mm import (
@ -10,6 +11,7 @@ from torch._inductor.fx_passes.pad_mm import (
    get_padded_length,
    should_pad_common,
 )
+
 from torch._inductor.test_case import run_tests, TestCase
 from torch._inductor.utils import fresh_inductor_cache, run_and_get_code
 from torch.testing import FileCheck
--- a/test/inductor/test_padding.py
+++ b/test/inductor/test_padding.py
@ -1,5 +1,6 @@
 # Owner(s): ["module: inductor"]
 import copy
+
 import functools
 import os
 import unittest
@ -16,7 +17,6 @@ from torch._inductor.utils import run_and_get_code
 from torch.testing._internal.common_utils import serialTest
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"
 DO_ACC_TEST = os.environ.get("DO_ACC_TEST", "1") == "1"
 WITH_STACK = os.environ.get("WITH_STACK") == "1"
--- a/test/inductor/test_pattern_matcher.py
+++ b/test/inductor/test_pattern_matcher.py
@ -11,6 +11,7 @@ import torch.nn.functional as F
 from torch._dynamo.utils import count_calls, counters
 from torch._higher_order_ops.out_dtype import out_dtype
 from torch._inductor.fx_passes import joint_graph
+
 from torch._inductor.pattern_matcher import (
    Arg,
    CallFunction,
--- a/test/inductor/test_perf.py
+++ b/test/inductor/test_perf.py
@ -3,6 +3,7 @@ import contextlib
 from unittest.mock import patch

 import functorch
+
 import torch
 import torch._inductor.config as config
 import torch.autograd
@ -24,11 +25,10 @@ from torch._inductor.test_case import TestCase as InductorTestCase
 #
 # That may still be aceeptable, but be aware that you are likely lowering
 # performance for that setting.
-#
+
 # Defines all the kernels for tests
 from torch.testing._internal.triton_utils import HAS_CUDA, requires_cuda

-
 if HAS_CUDA:
    from torch.testing._internal.triton_utils import add_kernel

--- a/test/inductor/test_profiler.py
+++ b/test/inductor/test_profiler.py
@ -5,12 +5,14 @@ import unittest
 import torch
 import torch._inductor.test_case
 import torch._inductor.utils
+
 from torch._inductor import config
 from torch.profiler import ProfilerActivity
+
 from torch.testing._internal.common_utils import TemporaryFileName
 from torch.testing._internal.inductor_utils import HAS_CUDA
-from torch.utils._triton import has_triton

+from torch.utils._triton import has_triton

 HAS_TRITON = has_triton()

--- a/test/inductor/test_scatter_optimization.py
+++ b/test/inductor/test_scatter_optimization.py
@ -12,7 +12,6 @@ from torch._inductor.runtime.runtime_utils import do_bench_gpu as do_bench
 from torch._inductor.test_case import TestCase
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU

-
 DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"


--- a/test/inductor/test_select_algorithm.py
+++ b/test/inductor/test_select_algorithm.py
@ -11,10 +11,10 @@ from torch._dynamo.testing import expectedFailureDynamicWrapper
 from torch._dynamo.utils import counters
 from torch._inductor.autotune_process import TritonBenchmarkRequest
 from torch._inductor.test_case import run_tests, TestCase
+
 from torch.testing._internal.common_utils import IS_LINUX, skipIfRocm
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 aten = torch.ops.aten


--- a/test/inductor/test_smoke.py
+++ b/test/inductor/test_smoke.py
@ -4,6 +4,7 @@ import unittest

 import torch
 import torch._logging
+
 from torch._inductor.test_case import TestCase
 from torch.testing._internal.common_utils import IS_LINUX
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU
--- a/test/inductor/test_snode_runtime.py
+++ b/test/inductor/test_snode_runtime.py
@ -5,6 +5,7 @@ from unittest import skipIf

 import torch
 import torch.distributed as dist
+
 from torch._inductor import config, metrics
 from torch._inductor.comm_analysis import estimate_nccl_collective_runtime
 from torch._inductor.compile_fx import compile_fx, compile_fx_inner
@ -12,7 +13,6 @@ from torch._inductor.test_case import TestCase as InductorTestCase
 from torch._inductor.utils import is_collective
 from torch.testing._internal.inductor_utils import HAS_CUDA

-
 aten = torch.ops.aten
 c10d = torch.ops.c10d_functional
 _c10d = torch.ops._c10d_functional
--- a/test/inductor/test_torchbind.py
+++ b/test/inductor/test_torchbind.py
@ -6,6 +6,7 @@ import torch._inductor
 import torch._inductor.decomposition
 from torch._higher_order_ops.torchbind import enable_torchbind_tracing
 from torch._inductor.test_case import run_tests, TestCase
+
 from torch.testing._internal.torchbind_impls import init_torchbind_implementations


--- a/test/inductor/test_torchinductor.py
+++ b/test/inductor/test_torchinductor.py
@ -26,6 +26,7 @@ from unittest.mock import patch
 import numpy as np

 import torch
+
 import torch._dynamo.config as dynamo_config
 import torch._inductor.aoti_eager
 import torch.nn as nn
@ -66,6 +67,7 @@ from torch.testing._internal.common_cuda import (
    TEST_CUDNN,
    with_tf32_off,
 )
+
 from torch.testing._internal.common_device_type import (
    _has_sufficient_memory,
    expectedFailureXPU,
@ -93,7 +95,6 @@ from torch.utils._python_dispatch import TorchDispatchMode
 from torch.utils._pytree import tree_flatten, tree_unflatten
 from torch.utils.weak import WeakTensorKeyDictionary

-
 DO_PERF_TEST = os.environ.get("DO_PERF_TEST") == "1"

 if IS_WINDOWS and IS_CI:
@ -108,12 +109,14 @@ importlib.import_module("functorch")
 importlib.import_module("filelock")

 from torch._inductor import config, test_operators
+
 from torch._inductor.compile_fx import (
    compile_fx,
    compile_fx_inner,
    complex_memory_overlap,
 )
 from torch._inductor.utils import has_torchvision_roi_align
+
 from torch.testing._internal.common_utils import slowTest
 from torch.testing._internal.inductor_utils import (
    GPU_TYPE,
@ -125,7 +128,6 @@ from torch.testing._internal.inductor_utils import (
    skipCUDAIf,
 )

-
 HAS_AVX2 = "fbgemm" in torch.backends.quantized.supported_engines

 aten = torch.ops.aten
--- a/test/inductor/test_torchinductor_codegen_dynamic_shapes.py
+++ b/test/inductor/test_torchinductor_codegen_dynamic_shapes.py
@ -20,7 +20,6 @@ from torch.testing._internal.inductor_utils import (
    HAS_GPU,
 )

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_torchinductor_codegen_dynamic_shapes yet\n"
--- a/test/inductor/test_torchinductor_dynamic_shapes.py
+++ b/test/inductor/test_torchinductor_dynamic_shapes.py
@ -1,6 +1,7 @@
 # Owner(s): ["module: inductor"]
 import contextlib
 import importlib
+
 import math
 import operator
 import os
@ -36,7 +37,6 @@ from torch.testing._internal.common_utils import (
 )
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_GPU

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_torchinductor_dynamic_shapes yet\n"
@ -56,7 +56,6 @@ from inductor.test_torchinductor import (
    TestFailure,
 )

-
 importlib.import_module("filelock")

 # xfail by default, set is_skip=True to skip
--- a/test/inductor/test_torchinductor_opinfo.py
+++ b/test/inductor/test_torchinductor_opinfo.py
@ -11,6 +11,7 @@ from functools import partial
 from unittest.mock import patch

 import torch
+
 from torch._dispatch.python import enable_python_dispatcher
 from torch._inductor.test_case import run_tests, TestCase
 from torch._subclasses.fake_tensor import (
@ -44,7 +45,6 @@ from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CPU, HAS_CUDA
 from torch.utils._python_dispatch import TorchDispatchMode
 from torch.utils._pytree import tree_map

-
 try:
    try:
        from .test_torchinductor import check_model, check_model_gpu
@ -263,7 +263,6 @@ intentionally_not_handled = {
 # We should eventually always turn it on
 import torch._functorch.config as functorch_config

-
 if not functorch_config.view_replay_for_aliased_outputs:
    intentionally_not_handled['("as_strided", "partial_views")'] = {
        b8,
--- a/test/inductor/test_triton_extension_backend.py
+++ b/test/inductor/test_triton_extension_backend.py
@ -8,7 +8,6 @@ import torch
 import torch._dynamo
 import torch.utils.cpp_extension

-
 try:
    from extension_backends.triton.device_interface import DeviceInterface
    from extension_backends.triton.extension_codegen_backend import (
@ -36,7 +35,6 @@ from torch._inductor.codegen.common import (
 from torch._inductor.utils import get_triton_code
 from torch.testing._internal.common_utils import IS_MACOS

-
 try:
    try:
        from . import test_torchinductor
--- a/test/inductor/test_triton_heuristics.py
+++ b/test/inductor/test_triton_heuristics.py
@ -4,10 +4,10 @@ import sys
 import unittest

 import torch
+
 from torch.testing._internal.common_utils import IS_LINUX, skipIfXpu
 from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_GPU

-
 try:
    import triton  # noqa: F401
 except ImportError:
--- a/test/inductor/test_triton_kernels.py
+++ b/test/inductor/test_triton_kernels.py
@ -6,7 +6,9 @@ from unittest.mock import patch

 import torch
 import torch._dynamo.testing
+
 import torch._inductor.test_case
+
 from torch._higher_order_ops.triton_kernel_wrap import (
    generate_ttir,
    triton_kernel_wrapper_functional,
@ -17,11 +19,10 @@ from torch._inductor.utils import run_and_get_code
 from torch._library import capture_triton
 from torch.testing._internal import common_utils
 from torch.testing._internal.common_utils import skipIfRocm, skipIfXpu, TEST_WITH_ROCM
-from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU, HAS_XPU

 # Defines all the kernels for tests
 from torch.testing._internal.triton_utils import *  # noqa: F403
-
+from torch.testing._internal.inductor_utils import GPU_TYPE, HAS_CUDA, HAS_GPU, HAS_XPU

 if HAS_GPU:
    import triton
--- a/test/inductor/test_xpu_basic.py
+++ b/test/inductor/test_xpu_basic.py
@ -7,7 +7,6 @@ import unittest
 import torch
 from torch.testing._internal.common_utils import IS_CI, IS_WINDOWS

-
 if IS_WINDOWS and IS_CI:
    sys.stderr.write(
        "Windows CI does not have necessary dependencies for test_xpu_basic yet\n"
--- a/tools/linter/adapters/ufmt_linter.py
+++ b/tools/linter/adapters/ufmt_linter.py
@ -43,6 +43,7 @@ ISORT_SKIPLIST = re.compile(
                    "test/dy*/**",
                    # test/[e-h]*/**
                    # test/i*/**
+                    "test/i*/**",
                    # test/j*/**
                    "test/j*/**",
                    # test/[k-p]*/**