[CI] Fix XPU CI failure (#138548)

# Motivation
Fix https://github.com/pytorch/pytorch/issues/138577.

# Solution
1. All UTs in `test/inductor/test_compiled_optimizers.py` are fixed by https://github.com/pytorch/pytorch/pull/134170
2. UT in `test/inductor/test_pattern_matcher.py` is introduced by https://github.com/pytorch/pytorch/pull/138089, we will skip this UT due to the unsupported feature `max_autotune_gemm_backends:Triton`.
3. We have a new impl related to `histc`, so we remove the expected failure from `test/inductor/test_torchinductor_opinfo.py`
4. We support `avg_pool3d` for `fp16` data type, so we remove the expected failure from `test/inductor/test_torchinductor_opinfo.py`
5. CUDA-bias code is introduced by https://github.com/pytorch/pytorch/issues/138472, we just generalize it to `GPU_TYPE`.

# Additional Context
> Why update torch-xpu-ops commit pin here?

We have to update commit pin to avoid the build failure raised by the code change [C10_UNUSED](https://github.com/pytorch/pytorch/pull/138364).

> What does the feature of torch-xpu-ops update?

1. Add some foreach ops, like `unary ops` and `foreach_clamp_max` etc;
2. Add some maxpool ops forward and backward, like `averge_pool3d` and `max_pool3d`
3. Add some other ops, like `log_normal_`, `index_copy`, and `mode` etc;
4. fix build failure related to `C10_UNUSED`;

Pull Request resolved: https://github.com/pytorch/pytorch/pull/138548
Approved by: https://github.com/malfet, https://github.com/EikanWang
This commit is contained in:
Yu, Guangye
2024-10-24 11:39:01 +00:00
committed by PyTorch MergeBot
parent dbf0fa811a
commit 0efa590d43
5 changed files with 4 additions and 8 deletions

View File

@ -121,6 +121,7 @@ KERNEL_COUNT_OVERRIDES = {
"test_adamw_amsgrad_capturable_cuda": 6,
"test_adamw_amsgrad_capturable_xpu": 6,
"test_adamw_tensor_lr_tensor_betas_amsgrad_capturable_cuda": 6,
"test_adamw_tensor_lr_tensor_betas_amsgrad_capturable_xpu": 6,
"test_adamw_tensor_lr_amsgrad_capturable_cuda": 6,
"test_adamw_tensor_lr_amsgrad_capturable_xpu": 6,
"test_adam_tensor_lr_amsgrad_capturable_cuda": 6,
@ -153,7 +154,6 @@ KERNEL_COUNT_OVERRIDES = {
"test_sgd_cuda": 4,
"test_sgd_cpu": 4,
"test_sgd_xpu": 4,
"test_rmsprop_tensor_lr_capturable_foreach_xpu": 4,
"test_adagrad_initial_accumulator_value_weight_decay_foreach_xpu": 2,
"test_adagrad_lr_decay_weight_decay_foreach_xpu": 2,
"test_adagrad_weight_decay_foreach_xpu": 2,
@ -167,14 +167,11 @@ KERNEL_COUNT_OVERRIDES = {
"test_asgd_tensor_lr_weight_decay_maximize_capturable_xpu": 8,
"test_nadam_tensor_lr_weight_decay_momentum_decay_decoupled_weight_decay_capturable_cuda": 6,
"test_nadam_tensor_lr_weight_decay_momentum_decay_decoupled_weight_decay_capturable_xpu": 9,
"test_nadam_tensor_lr_weight_decay_momentum_decay_decoupled_weight_decay_capturable_foreach_xpu": 3,
"test_radam_tensor_lr_capturable_weight_decay_decoupled_weight_decay_cuda": 6,
"test_radam_tensor_lr_capturable_weight_decay_decoupled_weight_decay_xpu": 6,
"test_radam_tensor_lr_capturable_weight_decay_decoupled_weight_decay_foreach_xpu": 3,
"test_sgd_tensor_lr_cpu": 2,
"test_sgd_tensor_lr_cuda": 2,
"test_sgd_tensor_lr_xpu": 2,
"test_sgd_tensor_lr_foreach_xpu": 2,
}
# also tracks currently supported optimizers

View File

@ -1234,6 +1234,7 @@ class TestPatternMatcher(TestCase):
# of search_fn).
self.assertTrue(pattern.pattern_eq(search_fn_pattern))
@skipIfXpu
@inductor_config.patch(
{
"triton.unique_kernel_names": "original_aten",

View File

@ -351,11 +351,9 @@ inductor_expected_failures_single_sample["xpu"] = {
"nn.functional.conv_transpose3d": {f32, f64},
# rrelu not supported on XPU now
"nn.functional.rrelu": {f16, f32, f64},
"histc": {i32, i64},
# not implemented for 'Half'
"nn.functional.multilabel_margin_loss": {f16},
"nn.functional.multi_margin_loss": {f16},
"nn.functional.avg_pool3d": {f16},
}

View File

@ -2158,7 +2158,7 @@ def forward(self, arg0_1, arg1_1):
n_elements = out.numel()
sin_kernel[(n_elements,)](x, out, n_elements)
x = torch.randn(65, device="cuda")
x = torch.randn(65, device=GPU_TYPE)
out = torch.empty_like(x)
out_compiled = torch.empty_like(x)
sin_triton_compiled = torch.compile(fullgraph=True)(sin_triton)

2
third_party/xpu.txt vendored
View File

@ -1 +1 @@
1d217ae491669b550b136ca16e91b85c4597cd66
b3d5d78c72eadc5140aef1f8e06844385e9a2d45