[CI] Disable some tests that are failing in periodic (#150059)

Disabling some tests to restore periodic

nogpu avx512 timeout:
59f14d19ae (38492953496-box)

profiler failure: 7ae0ce6360 (38461255009-box)

test_accelerator failure:
87bfd66c3c (39476723746-box)
origin: 146098

test_overrides failure:
bf752c36da (39484562957-box)
origin: 146098

inductor cpu repro:
bb9c426024 (38447525659-box)

functorch eager transforms:
8f858e226b (39488068620-box)
f2cea01f71 (39555064878)
b5281a4a18 (39599355600)
either 148288 or 148261?

2ec9aceaeb/1

Pull Request resolved: https://github.com/pytorch/pytorch/pull/150059
Approved by: https://github.com/ZainRizvi, https://github.com/atalman, https://github.com/malfet
This commit is contained in:
Catherine Lee
2025-03-28 20:31:32 +00:00
committed by PyTorch MergeBot
parent 2bd5bfa3ce
commit 9092dd2e82
6 changed files with 48 additions and 2 deletions

View File

@ -59,8 +59,9 @@ jobs:
docker-image-name: pytorch-linux-focal-cuda12.6-cudnn9-py3-gcc11
test-matrix: |
{ include: [
{ config: "nogpu_AVX512", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_AVX512", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 1, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "nogpu_NO_AVX2", shard: 2, num_shards: 2, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge" },
{ config: "jit_legacy", shard: 1, num_shards: 1, runner: "${{ needs.get-label-type.outputs.label-type }}linux.4xlarge.nvidia.gpu" },

View File

@ -74,6 +74,7 @@ from torch.testing._internal.common_utils import (
skipIfRocm,
skipIfTorchDynamo,
subtest,
TEST_CUDA_MEM_LEAK_CHECK,
TEST_WITH_TORCHDYNAMO,
TestCase,
xfailIfTorchDynamo,
@ -2865,6 +2866,10 @@ class TestLinearize(TestCase):
self.assertEqual(actual_jvp, expected_jvp)
@dtypes(torch.float)
@unittest.skipIf(
TEST_CUDA_MEM_LEAK_CHECK,
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
)
def test_linearize_return(self, device, dtype):
x_p = make_tensor((3, 1), device=device, dtype=dtype)
x_t = make_tensor((3, 1), device=device, dtype=dtype)
@ -2879,6 +2884,10 @@ class TestLinearize(TestCase):
self.assertEqual(actual_jvp, expected_jvp)
@dtypes(torch.float)
@unittest.skipIf(
TEST_CUDA_MEM_LEAK_CHECK,
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
)
def test_linearize_composition_vmap(self, device, dtype):
x_p = make_tensor((3, 1), device=device, dtype=dtype)
x_t = make_tensor((3, 3, 1), device=device, dtype=dtype)
@ -2897,6 +2906,10 @@ class TestLinearize(TestCase):
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
@dtypes(torch.float)
@unittest.skipIf(
TEST_CUDA_MEM_LEAK_CHECK,
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
)
def test_linearize_composition_grad(self, device, dtype):
x_p = make_tensor((3,), device=device, dtype=dtype)
x_t = make_tensor((3,), device=device, dtype=dtype)
@ -2916,6 +2929,10 @@ class TestLinearize(TestCase):
self.assertEqual(actual_batched_jvp, expected_batched_jvp)
@dtypes(torch.float)
@unittest.skipIf(
TEST_CUDA_MEM_LEAK_CHECK,
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
)
def test_linearize_nested_input_nested_output(self, device, dtype):
x_p = make_tensor((3, 1), device=device, dtype=dtype)
x_t = make_tensor((3, 1), device=device, dtype=dtype)
@ -5151,6 +5168,10 @@ class TestCompileTransforms(TestCase):
# torch.compile is not supported on Windows CUDA.
# Triton only supports GPU with SM70 or later.
@expectedFailureIf((IS_WINDOWS and TEST_CUDA) or (TEST_CUDA and not SM70OrLater))
@unittest.skipIf(
TEST_CUDA_MEM_LEAK_CHECK,
"Leaking memory, see https://github.com/pytorch/pytorch/pull/150059 for example",
)
def test_compile_vmap_hessian(self, device):
# The model and inputs are a smaller version
# of code at benchmark repo:

View File

@ -4149,6 +4149,10 @@ class CPUReproTests(TestCase):
"__at_align__ std::array", 0, exactly=True
).run(code)
@unittest.skipIf(
os.getenv("ATEN_CPU_CAPABILITY") == "default",
"Failing in periodic nogpu_NO_AVX2, see #150059 for example",
)
def test_group_norm_large_input(self):
class M(torch.nn.Module):
def __init__(self) -> None:
@ -4178,6 +4182,10 @@ class CPUReproTests(TestCase):
# check for parallel reduction.
self.assertEqual(metrics.parallel_reduction_count, 1)
@unittest.skipIf(
os.getenv("ATEN_CPU_CAPABILITY") == "default",
"Failing in periodic nogpu_NO_AVX2, see #150059 for example",
)
def test_group_norm_large_size(self):
# https://github.com/pytorch/pytorch/issues/141541
# We are using the chunk size of 4096 for cascade summation,

View File

@ -2169,6 +2169,10 @@ assert KinetoStepTracker.current_step() == initial_step + 2 * niters
@skipIfTorchDynamo("profiler gets ignored if dynamo activated")
@unittest.skipIf(not torch.cuda.is_available(), "CUDA is required")
@unittest.skipIf(not kineto_available(), "Kineto is required")
@unittest.skipIf(
"RelWithAssert" in torch.__config__.show(),
"failing in debug build, see https://github.com/pytorch/pytorch/pull/150059 for example",
)
def test_profile_all_threads(self):
profiling_started = threading.Event()
profiling_ended = threading.Event()

View File

@ -10,6 +10,8 @@ from torch.testing._internal.common_utils import NoTest, run_tests, TEST_MPS, Te
if not torch.accelerator.is_available():
print("No available accelerator detected, skipping tests", file=sys.stderr)
TestCase = NoTest # noqa: F811
# Skip because failing when run on cuda build with no GPU, see #150059 for example
sys.exit()
TEST_MULTIACCELERATOR = torch.accelerator.device_count() > 1

View File

@ -1,5 +1,6 @@
# Owner(s): ["module: __torch_function__"]
import sys
import torch
import numpy as np
import inspect
@ -9,6 +10,7 @@ import pickle
import collections
import unittest
import contextlib
import os
from torch.testing._internal.common_utils import TestCase, run_tests, TEST_WITH_CROSSREF, TEST_WITH_TORCHDYNAMO
from torch.overrides import (
@ -29,6 +31,14 @@ from torch.utils._pytree import tree_map
Tensor = torch.Tensor
if os.getenv("ATEN_CPU_CAPABILITY") in ("default", "avx2"):
# This test is not supported on ARM
print(
"Skipping due to failing when cuda build runs on non cuda machine, "
+ "see https://github.com/pytorch/pytorch/pull/150059 for example"
)
sys.exit()
# The functions below simulate the pure-python torch functions in the
# torch.functional namespace. We use examples local to this file rather
# than any of the real examples implemented in Python since in the