mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[CI] Fix XPU CI failure (#138548)
# Motivation Fix https://github.com/pytorch/pytorch/issues/138577. # Solution 1. All UTs in `test/inductor/test_compiled_optimizers.py` are fixed by https://github.com/pytorch/pytorch/pull/134170 2. UT in `test/inductor/test_pattern_matcher.py` is introduced by https://github.com/pytorch/pytorch/pull/138089, we will skip this UT due to the unsupported feature `max_autotune_gemm_backends:Triton`. 3. We have a new impl related to `histc`, so we remove the expected failure from `test/inductor/test_torchinductor_opinfo.py` 4. We support `avg_pool3d` for `fp16` data type, so we remove the expected failure from `test/inductor/test_torchinductor_opinfo.py` 5. CUDA-bias code is introduced by https://github.com/pytorch/pytorch/issues/138472, we just generalize it to `GPU_TYPE`. # Additional Context > Why update torch-xpu-ops commit pin here? We have to update commit pin to avoid the build failure raised by the code change [C10_UNUSED](https://github.com/pytorch/pytorch/pull/138364). > What does the feature of torch-xpu-ops update? 1. Add some foreach ops, like `unary ops` and `foreach_clamp_max` etc; 2. Add some maxpool ops forward and backward, like `averge_pool3d` and `max_pool3d` 3. Add some other ops, like `log_normal_`, `index_copy`, and `mode` etc; 4. fix build failure related to `C10_UNUSED`; Pull Request resolved: https://github.com/pytorch/pytorch/pull/138548 Approved by: https://github.com/malfet, https://github.com/EikanWang
This commit is contained in:
committed by
PyTorch MergeBot
parent
dbf0fa811a
commit
0efa590d43
@ -121,6 +121,7 @@ KERNEL_COUNT_OVERRIDES = {
|
||||
"test_adamw_amsgrad_capturable_cuda": 6,
|
||||
"test_adamw_amsgrad_capturable_xpu": 6,
|
||||
"test_adamw_tensor_lr_tensor_betas_amsgrad_capturable_cuda": 6,
|
||||
"test_adamw_tensor_lr_tensor_betas_amsgrad_capturable_xpu": 6,
|
||||
"test_adamw_tensor_lr_amsgrad_capturable_cuda": 6,
|
||||
"test_adamw_tensor_lr_amsgrad_capturable_xpu": 6,
|
||||
"test_adam_tensor_lr_amsgrad_capturable_cuda": 6,
|
||||
@ -153,7 +154,6 @@ KERNEL_COUNT_OVERRIDES = {
|
||||
"test_sgd_cuda": 4,
|
||||
"test_sgd_cpu": 4,
|
||||
"test_sgd_xpu": 4,
|
||||
"test_rmsprop_tensor_lr_capturable_foreach_xpu": 4,
|
||||
"test_adagrad_initial_accumulator_value_weight_decay_foreach_xpu": 2,
|
||||
"test_adagrad_lr_decay_weight_decay_foreach_xpu": 2,
|
||||
"test_adagrad_weight_decay_foreach_xpu": 2,
|
||||
@ -167,14 +167,11 @@ KERNEL_COUNT_OVERRIDES = {
|
||||
"test_asgd_tensor_lr_weight_decay_maximize_capturable_xpu": 8,
|
||||
"test_nadam_tensor_lr_weight_decay_momentum_decay_decoupled_weight_decay_capturable_cuda": 6,
|
||||
"test_nadam_tensor_lr_weight_decay_momentum_decay_decoupled_weight_decay_capturable_xpu": 9,
|
||||
"test_nadam_tensor_lr_weight_decay_momentum_decay_decoupled_weight_decay_capturable_foreach_xpu": 3,
|
||||
"test_radam_tensor_lr_capturable_weight_decay_decoupled_weight_decay_cuda": 6,
|
||||
"test_radam_tensor_lr_capturable_weight_decay_decoupled_weight_decay_xpu": 6,
|
||||
"test_radam_tensor_lr_capturable_weight_decay_decoupled_weight_decay_foreach_xpu": 3,
|
||||
"test_sgd_tensor_lr_cpu": 2,
|
||||
"test_sgd_tensor_lr_cuda": 2,
|
||||
"test_sgd_tensor_lr_xpu": 2,
|
||||
"test_sgd_tensor_lr_foreach_xpu": 2,
|
||||
}
|
||||
|
||||
# also tracks currently supported optimizers
|
||||
|
@ -1234,6 +1234,7 @@ class TestPatternMatcher(TestCase):
|
||||
# of search_fn).
|
||||
self.assertTrue(pattern.pattern_eq(search_fn_pattern))
|
||||
|
||||
@skipIfXpu
|
||||
@inductor_config.patch(
|
||||
{
|
||||
"triton.unique_kernel_names": "original_aten",
|
||||
|
@ -351,11 +351,9 @@ inductor_expected_failures_single_sample["xpu"] = {
|
||||
"nn.functional.conv_transpose3d": {f32, f64},
|
||||
# rrelu not supported on XPU now
|
||||
"nn.functional.rrelu": {f16, f32, f64},
|
||||
"histc": {i32, i64},
|
||||
# not implemented for 'Half'
|
||||
"nn.functional.multilabel_margin_loss": {f16},
|
||||
"nn.functional.multi_margin_loss": {f16},
|
||||
"nn.functional.avg_pool3d": {f16},
|
||||
}
|
||||
|
||||
|
||||
|
@ -2158,7 +2158,7 @@ def forward(self, arg0_1, arg1_1):
|
||||
n_elements = out.numel()
|
||||
sin_kernel[(n_elements,)](x, out, n_elements)
|
||||
|
||||
x = torch.randn(65, device="cuda")
|
||||
x = torch.randn(65, device=GPU_TYPE)
|
||||
out = torch.empty_like(x)
|
||||
out_compiled = torch.empty_like(x)
|
||||
sin_triton_compiled = torch.compile(fullgraph=True)(sin_triton)
|
||||
|
2
third_party/xpu.txt
vendored
2
third_party/xpu.txt
vendored
@ -1 +1 @@
|
||||
1d217ae491669b550b136ca16e91b85c4597cd66
|
||||
b3d5d78c72eadc5140aef1f8e06844385e9a2d45
|
||||
|
Reference in New Issue
Block a user