[inductor] slow test some Windows UTs. (#160267)

When we enabled Windows inductor UTs since the PR: https://github.com/pytorch/pytorch/pull/160161/
The main branch CI occurred timeout issue, Let's move some UT to slow test.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/160267
Approved by: https://github.com/ezyang
This commit is contained in:
Xu Han
2025-08-10 18:35:42 +00:00
committed by PyTorch MergeBot
parent 7ae0629d64
commit d6786741a7
3 changed files with 34 additions and 1 deletions

View File

@ -14,9 +14,12 @@ from torch.utils._python_dispatch import TorchDispatchMode
from torch.testing._internal.common_methods_invocations import op_db
from torch.testing._internal.jit_utils import JitTestCase
from torch.testing._internal.common_device_type import ops, OpDTypes, instantiate_device_type_tests
from torch.testing._internal.common_utils import IS_WINDOWS, slowTestIf
pytorch_test_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
sys.path.append(pytorch_test_dir)
def secretly_aliasing(x):
return x.view(-1)
@ -493,9 +496,9 @@ class TestSchemaCheck(JitTestCase):
with SchemaInfoBindTestMode(self) as schemaInfoCheck:
x.add(x)
class TestSchemaCheckModeOpInfo(JitTestCase):
@ops(op_db, dtypes=OpDTypes.supported)
@slowTestIf(IS_WINDOWS)
def test_schema_correctness(self, device, dtype, op):
# Currently torch.equal isn't supported with torch.complex32
# There's also errors with complex64 and complex128

View File

@ -66,6 +66,7 @@ from torch.testing._internal.common_dtype import (
get_all_qint_dtypes, all_types_complex_float8_and,
)
from torch.testing._internal.two_tensor import TwoTensor
from torch.testing._internal.common_utils import IS_WINDOWS
if TEST_WITH_TORCHINDUCTOR:
from torch._inductor.test_case import TestCase
@ -158,6 +159,7 @@ class TestTorchDeviceType(TestCase):
self.assertEqual(torch.inf, math.inf)
@onlyNativeDeviceTypes
@slowTestIf(IS_WINDOWS)
@dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64,
torch.bool, torch.float32, torch.complex64, torch.float64,
torch.complex128, torch.uint16, torch.uint32, torch.uint64)
@ -190,6 +192,7 @@ class TestTorchDeviceType(TestCase):
@dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64,
torch.bool, torch.float32, torch.complex64, torch.float64,
torch.complex128, torch.uint16, torch.uint32, torch.uint64)
@slowTestIf(IS_WINDOWS)
def test_storage(self, device, dtype):
v = make_tensor((3, 5), dtype=dtype, device=device, low=-9, high=9)
self.assertEqual(v.storage()[0], v[0][0])
@ -220,6 +223,7 @@ class TestTorchDeviceType(TestCase):
torch.bool, torch.float32, torch.complex64, torch.float64,
torch.complex128, torch.quint8, torch.qint8, torch.qint32,
torch.quint4x2)
@slowTestIf(IS_WINDOWS)
def test_storage_setitem(self, device, dtype):
# Skip quantized dtypes for CUDA, since they're not supported
if torch.device(device).type == 'cuda':
@ -251,6 +255,7 @@ class TestTorchDeviceType(TestCase):
@skipIfTorchDynamo("Not a suitable test for TorchDynamo")
@onlyNativeDeviceTypes
@slowTestIf(IS_WINDOWS)
def test_storage_use_count(self, device):
a = torch.randn(10, device=device)
prev_cf = torch._C._storage_Use_Count(a.untyped_storage()._cdata)
@ -261,6 +266,7 @@ class TestTorchDeviceType(TestCase):
@xfailIfTorchDynamo
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_tensor_storage_type(self, device, dtype):
a = make_tensor((10,), dtype=dtype, device=device, low=-9, high=9)
@ -271,6 +277,7 @@ class TestTorchDeviceType(TestCase):
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16, torch.uint16, torch.uint32, torch.uint64))
@slowTestIf(IS_WINDOWS)
def test_tensor_from_storage(self, device, dtype):
a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9)
a_s = a.storage()
@ -288,6 +295,7 @@ class TestTorchDeviceType(TestCase):
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_set_storage(self, device, dtype):
a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9)
a_s = a.storage()
@ -326,6 +334,7 @@ class TestTorchDeviceType(TestCase):
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_typed_storage_meta(self, device, dtype):
args_list = [
[],
@ -339,6 +348,7 @@ class TestTorchDeviceType(TestCase):
self._check_storage_meta(s, s_check)
@onlyNativeDeviceTypes
@slowTestIf(IS_WINDOWS)
def test_untyped_storage_meta(self, device):
args_list = [
[],
@ -353,6 +363,7 @@ class TestTorchDeviceType(TestCase):
@onlyNativeDeviceTypes
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_storage_meta_from_tensor(self, device, dtype):
t_check = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9)
t = t_check.to('meta')
@ -362,6 +373,7 @@ class TestTorchDeviceType(TestCase):
self._check_storage_meta(s, s_check)
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_storage_meta_errors(self, device, dtype):
s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype)
@ -402,6 +414,7 @@ class TestTorchDeviceType(TestCase):
@onlyCPU
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_storage_meta_ok(self, device, dtype):
s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype)
@ -417,6 +430,7 @@ class TestTorchDeviceType(TestCase):
model.share_memory()
@dtypes(torch.float32, torch.complex64)
@slowTestIf(IS_WINDOWS)
def test_deepcopy(self, device, dtype):
from copy import deepcopy
a = torch.randn(5, 5, dtype=dtype, device=device)
@ -444,6 +458,7 @@ class TestTorchDeviceType(TestCase):
self.assertEqual(deepcopy(a).foo, 3)
@dtypes(torch.float32, torch.complex64)
@slowTestIf(IS_WINDOWS)
def test_deepcopy_scalar(self, device, dtype):
from copy import deepcopy
a = torch.tensor(5, dtype=dtype, device=device)
@ -3696,6 +3711,7 @@ else:
# FIXME: find a test suite for the take operator
@dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16))
@slowTestIf(IS_WINDOWS)
def test_take(self, device, dtype):
idx_size = (4,)

View File

@ -54,6 +54,8 @@ from torch.testing._internal.common_utils import (
)
from torch.utils import _pytree as pytree
from torch.testing._internal.common_utils import IS_WINDOWS, slowTestIf
if TEST_SCIPY:
import scipy
@ -271,6 +273,7 @@ class TestUnaryUfuncs(TestCase):
# and noncontiguities.
@suppress_warnings
@ops(reference_filtered_ops)
@slowTestIf(IS_WINDOWS)
def test_reference_numerics_normal(self, device, dtype, op):
tensors = generate_elementwise_unary_tensors(
op, device=device, dtype=dtype, requires_grad=False
@ -279,6 +282,7 @@ class TestUnaryUfuncs(TestCase):
@suppress_warnings
@ops(reference_filtered_ops)
@slowTestIf(IS_WINDOWS)
def test_reference_numerics_small(self, device, dtype, op):
if dtype in (torch.bool,):
raise self.skipTest("bool has no small values")
@ -290,6 +294,7 @@ class TestUnaryUfuncs(TestCase):
@suppress_warnings
@ops(reference_filtered_ops)
@slowTestIf(IS_WINDOWS)
def test_reference_numerics_large(self, device, dtype, op):
if dtype in (torch.bool, torch.uint8, torch.int8):
raise self.skipTest("bool, uint8, and int8 dtypes have no large values")
@ -304,6 +309,7 @@ class TestUnaryUfuncs(TestCase):
reference_filtered_ops,
allowed_dtypes=floating_and_complex_types_and(torch.bfloat16, torch.half),
)
@slowTestIf(IS_WINDOWS)
def test_reference_numerics_extremal(self, device, dtype, op):
tensors = generate_elementwise_unary_extremal_value_tensors(
op, device=device, dtype=dtype, requires_grad=False
@ -312,6 +318,7 @@ class TestUnaryUfuncs(TestCase):
# Tests for testing (non)contiguity consistency
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_contig_vs_every_other(self, device, dtype, op):
contig = make_tensor(
(1026,), device=device, dtype=dtype, low=op.domain[0], high=op.domain[1]
@ -328,6 +335,7 @@ class TestUnaryUfuncs(TestCase):
self.assertEqual(result, expected)
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_contig_vs_transposed(self, device, dtype, op):
contig = make_tensor(
(789, 357), device=device, dtype=dtype, low=op.domain[0], high=op.domain[1]
@ -344,6 +352,7 @@ class TestUnaryUfuncs(TestCase):
self.assertEqual(result, expected)
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_non_contig(self, device, dtype, op):
shapes = [(5, 7), (1024,)]
for shape in shapes:
@ -360,6 +369,7 @@ class TestUnaryUfuncs(TestCase):
self.assertEqual(op(contig, **torch_kwargs), op(non_contig, **torch_kwargs))
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_non_contig_index(self, device, dtype, op):
contig = make_tensor(
(2, 2, 1, 2),
@ -378,6 +388,7 @@ class TestUnaryUfuncs(TestCase):
self.assertEqual(op(contig, **torch_kwargs), op(non_contig, **torch_kwargs))
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_non_contig_expand(self, device, dtype, op):
shapes = [(1, 3), (1, 7), (5, 7)]
for shape in shapes:
@ -399,6 +410,7 @@ class TestUnaryUfuncs(TestCase):
)
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_contig_size1(self, device, dtype, op):
contig = make_tensor(
(5, 100), dtype=dtype, device=device, low=op.domain[0], high=op.domain[1]
@ -414,6 +426,7 @@ class TestUnaryUfuncs(TestCase):
self.assertEqual(op(contig, **torch_kwargs), op(contig2, **torch_kwargs))
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_contig_size1_large_dim(self, device, dtype, op):
contig = make_tensor(
(5, 2, 3, 1, 4, 5, 3, 2, 1, 2, 3, 4),
@ -435,6 +448,7 @@ class TestUnaryUfuncs(TestCase):
# Tests that computation on a multiple batches is the same as
# per-batch computation.
@ops(unary_ufuncs)
@slowTestIf(IS_WINDOWS)
def test_batch_vs_slicing(self, device, dtype, op):
input = make_tensor(
(1024, 512), dtype=dtype, device=device, low=op.domain[0], high=op.domain[1]