[CI] Disable some tests that are failing in periodic (#150059)

Disabling some tests to restore periodic nogpu avx512 timeout: 59f14d19ae (38492953496-box) profiler failure: 7ae0ce6360 (38461255009-box) test_accelerator failure: 87bfd66c3c (39476723746-box) origin: 146098 test_overrides failure: bf752c36da (39484562957-box) origin: 146098 inductor cpu repro: bb9c426024 (38447525659-box) functorch eager transforms: 8f858e226b (39488068620-box) f2cea01f71 (39555064878) b5281a4a18 (39599355600) either 148288 or 148261? 2ec9aceaeb/1 Pull Request resolved: https://github.com/pytorch/pytorch/pull/150059 Approved by: https://github.com/ZainRizvi, https://github.com/atalman, https://github.com/malfet
2025-10-20 21:14:14 +08:00 · 2025-03-28 20:31:32 +00:00
parent 2bd5bfa3ce
commit 9092dd2e82
6 changed files with 48 additions and 2 deletions
--- a/.github/workflows/periodic.yml
+++ b/.github/workflows/periodic.yml
@ -59,8 +59,9 @@ jobs:
      docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
      test-matrix: |
        { include: [
-          { config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
-          { config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
+          { config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
          { config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },
--- a/test/functorch/test_eager_transforms.py
+++ b/test/functorch/test_eager_transforms.py
@ -74,6 +74,7 @@ from torch.testing._internal.common_utils import (
    skipIfRocm,
    skipIfTorchDynamo,
    subtest,
+    TEST_CUDA_MEM_LEAK_CHECK,
    TEST_WITH_TORCHDYNAMO,
    TestCase,
    xfailIfTorchDynamo,
@ -2865,6 +2866,10 @@ class TestLinearize(TestCase):
        self.assertEqual(actual_jvp, expected_jvp)

    @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
    def test_linearize_return(self, device, dtype):
        x_p = make_tensor((3, 1), device=device, dtype=dtype)
        x_t = make_tensor((3, 1), device=device, dtype=dtype)
@ -2879,6 +2884,10 @@ class TestLinearize(TestCase):
        self.assertEqual(actual_jvp, expected_jvp)

    @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
    def test_linearize_composition_vmap(self, device, dtype):
        x_p = make_tensor((3, 1), device=device, dtype=dtype)
        x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
@ -2897,6 +2906,10 @@ class TestLinearize(TestCase):
        self.assertEqual(actual_batched_jvp, expected_batched_jvp)

    @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
    def test_linearize_composition_grad(self, device, dtype):
        x_p = make_tensor((3,), device=device, dtype=dtype)
        x_t = make_tensor((3,), device=device, dtype=dtype)
@ -2916,6 +2929,10 @@ class TestLinearize(TestCase):
        self.assertEqual(actual_batched_jvp, expected_batched_jvp)

    @dtypes(torch.float)
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
    def test_linearize_nested_input_nested_output(self, device, dtype):
        x_p = make_tensor((3, 1), device=device, dtype=dtype)
        x_t = make_tensor((3, 1), device=device, dtype=dtype)
@ -5151,6 +5168,10 @@ class TestCompileTransforms(TestCase):
    # torch.compile is not supported on Windows CUDA.
    # Triton only supports GPU with SM70 or later.
    @expectedFailureIf((IS_WINDOWS and TEST_CUDA) or (TEST_CUDA and not SM70OrLater))
+    @unittest.skipIf(
+        TEST_CUDA_MEM_LEAK_CHECK,
+        "Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
    def test_compile_vmap_hessian(self, device):
        # The model and inputs are a smaller version
        # of code at benchmark repo:
--- a/test/inductor/test_cpu_repro.py
+++ b/test/inductor/test_cpu_repro.py
@ -4149,6 +4149,10 @@ class CPUReproTests(TestCase):
                        "__at_align__ std::array", 0, exactly=True
                    ).run(code)

+    @unittest.skipIf(
+        os.getenv("ATEN_CPU_CAPABILITY") == "default",
+        "Failing in periodic nogpu_NO_AVX2, see #150059 for example",
+    )
    def test_group_norm_large_input(self):
        class M(torch.nn.Module):
            def __init__(self) -> None:
@ -4178,6 +4182,10 @@ class CPUReproTests(TestCase):
                # check for parallel reduction.
                self.assertEqual(metrics.parallel_reduction_count, 1)

+    @unittest.skipIf(
+        os.getenv("ATEN_CPU_CAPABILITY") == "default",
+        "Failing in periodic nogpu_NO_AVX2, see #150059 for example",
+    )
    def test_group_norm_large_size(self):
        # https://github.com/pytorch/pytorch/issues/141541
        # We are using the chunk size of 4096 for cascade summation,
--- a/test/profiler/test_profiler.py
+++ b/test/profiler/test_profiler.py
@ -2169,6 +2169,10 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
    @skipIfTorchDynamo("profiler gets ignored if dynamo activated")
    @unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
    @unittest.skipIf(not kineto_available(), "Kineto is required")
+    @unittest.skipIf(
+        "RelWithAssert" in torch.__config__.show(),
+        "failing in debug build, see https://github.com/pytorch/pytorch/pull/150059 for example",
+    )
    def test_profile_all_threads(self):
        profiling_started = threading.Event()
        profiling_ended = threading.Event()
--- a/test/test_accelerator.py
+++ b/test/test_accelerator.py
@ -10,6 +10,8 @@ from torch.testing._internal.common_utils import NoTest, run_tests, TEST_MPS, Te
 if not torch.accelerator.is_available():
    print("No available accelerator detected, skipping tests", file=sys.stderr)
    TestCase = NoTest  # noqa: F811
+    # Skip because failing when run on cuda build with no GPU, see #150059 for example
+    sys.exit()

 TEST_MULTIACCELERATOR = torch.accelerator.device_count() > 1

--- a/test/test_overrides.py
+++ b/test/test_overrides.py
@ -1,5 +1,6 @@
 # Owner(s): ["module: __torch_function__"]

+import sys
 import torch
 import numpy as np
 import inspect
@ -9,6 +10,7 @@ import pickle
 import collections
 import unittest
 import contextlib
+import os

 from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF, TEST_WITH_TORCHDYNAMO
 from torch.overrides import (
@ -29,6 +31,14 @@ from torch.utils._pytree import tree_map

 Tensor = torch.Tensor

+if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"):
+    # This test is not supported on ARM
+    print(
+        "Skipping due to failing when cuda build runs on non cuda machine, "
+        + "see https://github.com/pytorch/pytorch/pull/150059 for example"
+    )
+    sys.exit()
+
 # The functions below simulate the pure-python torch functions in the
 # torch.functional namespace. We use examples local to this file rather
 # than any of the real examples implemented in Python since in the