From d5b1d99f78b497ea615eb043d7df035ff8009acf Mon Sep 17 00:00:00 2001 From: Aleksei Nikiforov Date: Tue, 18 Mar 2025 16:09:35 +0000 Subject: [PATCH] Enable more nightly tests on s390x (#148452) Also enable some tests which probably were accidentally disabled. Pull Request resolved: https://github.com/pytorch/pytorch/pull/148452 Approved by: https://github.com/seemethere, https://github.com/malfet --- .ci/docker/requirements-ci.txt | 5 +- .github/scripts/s390x-ci/tests_list.py | 97 ++++++ test/dynamo/test_logging.py | 3 + test/export/test_converter.py | 6 +- test/functorch/test_aotdispatch.py | 4 - test/functorch/test_ops.py | 7 - test/inductor/test_torchinductor.py | 7 +- test/run_test.py | 308 +++++++++++++++--- test/test_dataloader.py | 4 + test/test_ops_gradients.py | 2 - test/test_optim.py | 2 - torch/onnx/_internal/onnxruntime.py | 2 + .../_internal/common_methods_invocations.py | 3 +- 13 files changed, 391 insertions(+), 59 deletions(-) create mode 100755 .github/scripts/s390x-ci/tests_list.py diff --git a/.ci/docker/requirements-ci.txt b/.ci/docker/requirements-ci.txt index d0680f687794..80d7f97b1d89 100644 --- a/.ci/docker/requirements-ci.txt +++ b/.ci/docker/requirements-ci.txt @@ -41,11 +41,14 @@ fbscribelogger==0.1.7 #Pinned versions: 0.1.6 #test that import: -flatbuffers==2.0 +flatbuffers==2.0 ; platform_machine != "s390x" #Description: cross platform serialization library #Pinned versions: 2.0 #test that import: +flatbuffers ; platform_machine == "s390x" +#Description: cross platform serialization library; Newer version is required on s390x for new python version + hypothesis==5.35.1 # Pin hypothesis to avoid flakiness: https://github.com/pytorch/pytorch/issues/31136 #Description: advanced library for generating parametrized tests diff --git a/.github/scripts/s390x-ci/tests_list.py b/.github/scripts/s390x-ci/tests_list.py new file mode 100755 index 000000000000..18e78f40e5a3 --- /dev/null +++ b/.github/scripts/s390x-ci/tests_list.py @@ -0,0 +1,97 @@ +#!/usr/bin/env python3 + +import os +import re +import sys + + +sys.path.insert(1, os.path.join(sys.path[0], "..", "..", "..")) + +from tools.testing.discover_tests import TESTS + + +skip_list = [ + # these tests fail due to various reasons + "dynamo/test_misc", + "inductor/test_aot_inductor", + "inductor/test_cpu_repro", + "inductor/test_cpu_select_algorithm", + "inductor/test_aot_inductor_arrayref", + "inductor/test_torchinductor_codegen_dynamic_shapes", + "lazy/test_meta_kernel", + "onnx/test_utility_funs", + "profiler/test_profiler", + "test_ao_sparsity", + "test_cpp_extensions_open_device_registration", + "test_jit", + "test_metal", + "test_mps", + "dynamo/test_torchrec", + "inductor/test_aot_inductor_utils", + "inductor/test_coordinate_descent_tuner", + "test_jiterator", + # these tests run long and fail in addition to that + "dynamo/test_dynamic_shapes", + "test_quantization", + "inductor/test_torchinductor", + "inductor/test_torchinductor_dynamic_shapes", + "inductor/test_torchinductor_opinfo", + "test_binary_ufuncs", + "test_unary_ufuncs", + # these tests fail when cuda is not available + "inductor/test_cudacodecache", + "inductor/test_inductor_utils", + "inductor/test_inplacing_pass", + "inductor/test_kernel_benchmark", + "inductor/test_max_autotune", + "inductor/test_move_constructors_to_cuda", + "inductor/test_multi_kernel", + "inductor/test_pattern_matcher", + "inductor/test_perf", + "inductor/test_select_algorithm", + "inductor/test_snode_runtime", + "inductor/test_triton_wrapper", + # these tests fail when mkldnn is not available + "inductor/test_custom_post_grad_passes", + "inductor/test_mkldnn_pattern_matcher", + # lacks quantization support + "onnx/test_models_quantized_onnxruntime", + "onnx/test_pytorch_onnx_onnxruntime", + # https://github.com/pytorch/pytorch/issues/102078 + "test_decomp", + # https://github.com/pytorch/pytorch/issues/146698 + "test_model_exports_to_core_aten", + # runs very long, skip for now + "inductor/test_layout_optim", + "test_fx", + # some false errors + "doctests", +] + +skip_list_regex = [ + # distributed tests fail randomly + "distributed/.*", +] + +all_testfiles = sorted(TESTS) + +filtered_testfiles = [] + +for filename in all_testfiles: + if filename in skip_list: + continue + + regex_filtered = False + + for regex_string in skip_list_regex: + if re.fullmatch(regex_string, filename): + regex_filtered = True + break + + if regex_filtered: + continue + + filtered_testfiles.append(filename) + +for filename in filtered_testfiles: + print(' "' + filename + '",') diff --git a/test/dynamo/test_logging.py b/test/dynamo/test_logging.py index f97ef5afaed5..43e5f3308a31 100644 --- a/test/dynamo/test_logging.py +++ b/test/dynamo/test_logging.py @@ -23,6 +23,7 @@ from torch.testing._internal.common_utils import ( find_free_port, munge_exc, skipIfTorchDynamo, + xfailIfS390X, ) from torch.testing._internal.inductor_utils import HAS_CUDA from torch.testing._internal.logging_utils import ( @@ -817,6 +818,8 @@ TRACE FX call mul from test_logging.py:N in fn (LoggingTests.test_trace_call_pre len([r for r in records if "return a + 1" in r.getMessage()]), 0 ) + # there are some additional deprecation warnings in stderr, probably due to newer dependencies used on s390x + @xfailIfS390X def test_logs_out(self): import tempfile diff --git a/test/export/test_converter.py b/test/export/test_converter.py index 83d8aa4e081e..953246be7a7b 100644 --- a/test/export/test_converter.py +++ b/test/export/test_converter.py @@ -10,7 +10,7 @@ from torch._dynamo.test_case import TestCase from torch._export.converter import TS2EPConverter from torch.export import ExportedProgram from torch.testing._internal.common_quantized import override_quantized_engine -from torch.testing._internal.common_utils import IS_WINDOWS, run_tests +from torch.testing._internal.common_utils import IS_WINDOWS, run_tests, xfailIfS390X from torch.testing._internal.torchbind_impls import ( _empty_tensor_queue, init_torchbind_implementations, @@ -1403,6 +1403,8 @@ class TestConverter(TestCase): IS_WINDOWS, "Windows does not support qnnpack", ) + # qnnpack not supported on s390x + @xfailIfS390X def test_ts2ep_convert_quantized_model(self): class Standalone(torch.nn.Module): def __init__(self): @@ -1446,6 +1448,8 @@ class TestConverter(TestCase): ep_out, _ = pytree.tree_flatten(ep.module()(*inp)) self._check_tensor_list_equal(orig_out, ep_out) + # qnnpack not supported on s390x + @xfailIfS390X def test_ts2ep_convert_quantized_model_with_opcontext(self): class M(torch.nn.Module): def __init__(self, linear_op): diff --git a/test/functorch/test_aotdispatch.py b/test/functorch/test_aotdispatch.py index bcf99f6a6635..0b9347b53919 100644 --- a/test/functorch/test_aotdispatch.py +++ b/test/functorch/test_aotdispatch.py @@ -74,7 +74,6 @@ from torch.testing._internal.common_utils import ( skipIfRocm, TestCase, xfail_inherited_tests, - xfailIfS390X, xfailIfTorchDynamo, ) from torch.testing._internal.custom_tensor import ConstantExtraMetadataTensor @@ -6681,7 +6680,6 @@ class TestEagerFusionOpInfo(AOTTestCase): def test_aot_autograd_exhaustive(self, device, dtype, op): _test_aot_autograd_helper(self, device, dtype, op) - @xfailIfS390X @ops(op_db + hop_db, allowed_dtypes=(torch.float,)) @patch("functorch.compile.config.debug_assert", True) @skipOps( @@ -6728,13 +6726,11 @@ symbolic_aot_autograd_module_failures = { class TestEagerFusionModuleInfo(AOTTestCase): - @xfailIfS390X @modules(module_db, allowed_dtypes=(torch.float,)) @decorateForModules(unittest.expectedFailure, aot_autograd_module_failures) def test_aot_autograd_module_exhaustive(self, device, dtype, training, module_info): _test_aot_autograd_module_helper(self, device, dtype, training, module_info) - @xfailIfS390X @modules(module_db, allowed_dtypes=(torch.float,)) @decorateForModules( unittest.expectedFailure, diff --git a/test/functorch/test_ops.py b/test/functorch/test_ops.py index 145905f08f22..8a0bf6ad40f5 100644 --- a/test/functorch/test_ops.py +++ b/test/functorch/test_ops.py @@ -55,7 +55,6 @@ from torch.testing._internal.common_utils import ( TEST_WITH_ROCM, TestCase, unMarkDynamoStrictTest, - xfailIfS390X, ) from torch.testing._internal.opinfo.core import SampleInput from torch.utils import _pytree as pytree @@ -1031,12 +1030,6 @@ class TestOperators(TestCase): xfail( "unbind_copy" ), # Batching rule not implemented for aten::unbind_copy.int. - decorate("linalg.tensorsolve", decorator=xfailIfS390X), - decorate("nn.functional.max_pool1d", decorator=xfailIfS390X), - decorate("nn.functional.max_unpool2d", decorator=xfailIfS390X), - decorate( - "nn.functional.multilabel_margin_loss", decorator=xfailIfS390X - ), } ), ) diff --git a/test/inductor/test_torchinductor.py b/test/inductor/test_torchinductor.py index 6eb8da343357..fde903c9fd42 100644 --- a/test/inductor/test_torchinductor.py +++ b/test/inductor/test_torchinductor.py @@ -1686,6 +1686,7 @@ class CommonTemplate: self.common(fn, (torch.randn(1024),)) + @xfailIfS390X @config.patch(debug_index_asserts=False) @config.patch("cpp.enable_tiling_heuristics", False) def test_neg_index(self): @@ -1995,7 +1996,6 @@ class CommonTemplate: @skip_if_gpu_halide @skipCPUIf(IS_MACOS, "fails on macos") - @xfailIfS390X def test_multilayer_var(self): def fn(a): return torch.var(a) @@ -2015,7 +2015,7 @@ class CommonTemplate: @skipCPUIf(IS_MACOS, "fails on macos") @skip_if_halide # accuracy 4.7% off - @xfailIfS390X + @xfailIfS390X # accuracy failure def test_multilayer_var_lowp(self): def fn(a): return torch.var(a) @@ -9695,7 +9695,6 @@ class CommonTemplate: "TODO: debug this with asan", ) @skip_if_gpu_halide - @xfailIfS390X def test_tmp_not_defined_issue2(self): def forward(arg38_1, arg81_1, getitem_17, new_zeros_default_4): div_tensor_7 = torch.ops.aten.div.Tensor(getitem_17, arg81_1) @@ -10904,8 +10903,8 @@ class CommonTemplate: # Calling div only torch.SymInt arguments is not yet supported. # To support this behavior, we need to allow const-propping tensors that store symint data. # For now, dynamo will explicitly graph break when it encounters user code with this behavior. - @xfailIfS390X @expectedFailureCodegenDynamic + @xfailIfS390X @skip_if_gpu_halide # accuracy error def test_AllenaiLongformerBase_repro(self): def fn(query, scores, window_overlap): diff --git a/test/run_test.py b/test/run_test.py index 974e54004519..b342ffa6f531 100755 --- a/test/run_test.py +++ b/test/run_test.py @@ -189,30 +189,130 @@ ROCM_BLOCKLIST = [ # whitelist of tests for s390x S390X_TESTLIST = [ - "backends/xeon/test_launch.py", - "benchmark_utils/test_benchmark_utils.py", - "cpp/apply_utils_test", - "cpp/atest", - "cpp/basic", - "cpp/broadcast_test", - "cpp/cpu_generator_test", + "backends/xeon/test_launch", + "benchmark_utils/test_benchmark_utils", + "cpp/BackoffTest", + "cpp/CppSignature_test", "cpp/Dict_test", "cpp/Dimname_test", + "cpp/FileStoreTest", + "cpp/HashStoreTest", + "cpp/IListRef_test", + "cpp/KernelFunction_test", + "cpp/List_test", + "cpp/MaybeOwned_test", + "cpp/NamedTensor_test", + "cpp/ProcessGroupGlooTest", + "cpp/StorageUtils_test", + "cpp/TCPStoreTest", + "cpp/apply_utils_test", + "cpp/atest", + "cpp/backend_fallback_test", + "cpp/basic", + "cpp/broadcast_test", + "cpp/c10_ArrayRef_test", + "cpp/c10_Bitset_test", + "cpp/c10_CompileTimeFunctionPointer_test", + "cpp/c10_ConstexprCrc_test", + "cpp/c10_DeadlockDetection_test", + "cpp/c10_DeviceGuard_test", + "cpp/c10_Device_test", + "cpp/c10_DispatchKeySet_test", + "cpp/c10_Half_test", + "cpp/c10_InlineDeviceGuard_test", + "cpp/c10_InlineStreamGuard_test", + "cpp/c10_LeftRight_test", + "cpp/c10_Metaprogramming_test", + "cpp/c10_NetworkFlow_test", + "cpp/c10_Scalar_test", + "cpp/c10_SizesAndStrides_test", + "cpp/c10_StreamGuard_test", + "cpp/c10_SymInt_test", + "cpp/c10_Synchronized_test", + "cpp/c10_ThreadLocal_test", + "cpp/c10_TypeIndex_test", + "cpp/c10_TypeList_test", + "cpp/c10_TypeTraits_test", + "cpp/c10_accumulate_test", + "cpp/c10_bfloat16_test", + "cpp/c10_bit_cast_test", + "cpp/c10_complex_math_test", + "cpp/c10_complex_test", + "cpp/c10_cow_test", + "cpp/c10_error_test", + "cpp/c10_exception_test", + "cpp/c10_flags_test", + "cpp/c10_generic_math_test", + "cpp/c10_intrusive_ptr_test", + "cpp/c10_irange_test", + "cpp/c10_lazy_test", + "cpp/c10_logging_test", + "cpp/c10_optional_test", + "cpp/c10_ordered_preserving_dict_test", + "cpp/c10_registry_test", + "cpp/c10_small_vector_test", + "cpp/c10_ssize_test", + "cpp/c10_string_util_test", + "cpp/c10_string_view_test", + "cpp/c10_tempfile_test", + "cpp/c10_typeid_test", + "cpp/cpu_allocator_test", + "cpp/cpu_generator_test", + "cpp/cpu_profiling_allocator_test", + "cpp/cpu_rng_test", + "cpp/dispatch_key_set_test", "cpp/dlconvertor_test", "cpp/extension_backend_test", + "cpp/half_test", + "cpp/inline_container_test", + "cpp/ivalue_test", + "cpp/kernel_function_legacy_test", + "cpp/kernel_function_test", + "cpp/kernel_lambda_legacy_test", + "cpp/kernel_lambda_test", + "cpp/kernel_stackbased_test", "cpp/lazy_tensor_test", "cpp/legacy_vmap_test", - "cpp/NamedTensor_test", + "cpp/make_boxed_from_unboxed_functor_test", + "cpp/math_kernel_test", + "cpp/memory_format_test", + "cpp/memory_overlapping_test", + "cpp/mobile_memory_cleanup", "cpp/native_test", + "cpp/op_allowlist_test", + "cpp/op_registration_test", + "cpp/operator_name_test", "cpp/operators_test", + "cpp/packedtensoraccessor_test", + "cpp/pow_test", + "cpp/protoc-3", + "cpp/quantized_test", + "cpp/reduce_ops_test", + "cpp/reportMemoryUsage_test", "cpp/scalar_tensor_test", "cpp/scalar_test", + "cpp/stride_properties_test", "cpp/tensor_iterator_test", "cpp/test_api", + "cpp/test_cpp_rpc", + "cpp/test_dist_autograd", + "cpp/test_edge_op_registration", + "cpp/test_jit", + "cpp/test_lazy", + "cpp/test_parallel", + "cpp/test_tensorexpr", + "cpp/thread_init_test", + "cpp/type_ptr_test", + "cpp/type_test", "cpp/undefined_tensor_test", + "cpp/vec_test_all_types_DEFAULT", + "cpp/verify_api_visibility", + "cpp/weakref_test", "cpp/wrapdim_test", + "cpp/xla_tensor_test", + "cpp_extensions/open_registration_extension/test/test_openreg", "distributions/test_constraints", - "doctests", + "distributions/test_distributions", "dynamo/test_activation_checkpointing", "dynamo/test_after_aot", "dynamo/test_aot_autograd", @@ -220,10 +320,12 @@ S390X_TESTLIST = [ "dynamo/test_autograd_function", "dynamo/test_backends", "dynamo/test_backward_higher_order_ops", + "dynamo/test_base_hop", "dynamo/test_base_output", "dynamo/test_bytecode_utils", "dynamo/test_callback", "dynamo/test_compile", + "dynamo/test_compiler_bisector", "dynamo/test_comptime", "dynamo/test_config", "dynamo/test_ctx_manager", @@ -232,53 +334,71 @@ S390X_TESTLIST = [ "dynamo/test_debug_utils", "dynamo/test_decorators", "dynamo/test_deviceguard", + "dynamo/test_dicts", + "dynamo/test_error_messages", + "dynamo/test_exc", + "dynamo/test_exceptions", "dynamo/test_export", "dynamo/test_export_mutations", + "dynamo/test_flat_apply", "dynamo/test_frame_init", + "dynamo/test_functions", "dynamo/test_fx_passes_pre_grad", + "dynamo/test_generator", "dynamo/test_global", + "dynamo/test_graph_deduplication", + "dynamo/test_graph_region_tracker", "dynamo/test_guard_manager", "dynamo/test_higher_order_ops", "dynamo/test_hooks", "dynamo/test_input_attr_tracking", "dynamo/test_interop", "dynamo/test_logging", + "dynamo/test_metrics_context", "dynamo/test_minifier", "dynamo/test_model_output", "dynamo/test_modes", "dynamo/test_modules", "dynamo/test_nops", "dynamo/test_optimizers", + "dynamo/test_pgo", "dynamo/test_pre_dispatch", "dynamo/test_profiler", "dynamo/test_python_autograd", - "dynamo/test_recompiles", + "dynamo/test_python_dispatcher", "dynamo/test_recompile_ux", + "dynamo/test_recompiles", "dynamo/test_reconstruct", "dynamo/test_reorder_logs", "dynamo/test_repros", "dynamo/test_resume", "dynamo/test_sdpa", + "dynamo/test_skip_guard_eval_unsafe", "dynamo/test_skip_non_tensor", "dynamo/test_sources", "dynamo/test_structured_trace", "dynamo/test_subclasses", "dynamo/test_subgraphs", - "dynamo/test_torchrec", + "dynamo/test_sys", + "dynamo/test_trace_rules", "dynamo/test_unspec", "dynamo/test_utils", "dynamo/test_verify_correctness", "dynamo/test_view", + "export/test_converter", + "export/test_cpp_serdes", "export/test_db", + "export/test_draft_export", "export/test_experimental", "export/test_export", + "export/test_export_legacy", "export/test_export_nonstrict", "export/test_export_training_ir_to_run_decomp", "export/test_functionalized_assertions", "export/test_hop", "export/test_lift_unlift", - "export/test_passes", "export/test_pass_infra", + "export/test_passes", "export/test_retraceability", "export/test_schema", "export/test_serdes", @@ -292,30 +412,49 @@ S390X_TESTLIST = [ "export/test_unflatten_training_ir", "export/test_verifier", "functorch/test_ac", + "functorch/test_ac_knapsack", + "functorch/test_ac_logging", + "functorch/test_aotdispatch", "functorch/test_control_flow", + "functorch/test_dims", "functorch/test_eager_transforms", "functorch/test_logging", + "functorch/test_memory_efficient_fusion", "functorch/test_minifier", - "higher_order_ops/test_with_effects.py", + "functorch/test_ops", + "functorch/test_parsing", + "functorch/test_rearrange", + "functorch/test_vmap", + "functorch/test_vmap_registrations", + "higher_order_ops/test_invoke_quant", + "higher_order_ops/test_invoke_subgraph", + "higher_order_ops/test_with_effects", + "inductor/test_aot_inductor_custom_ops", + "inductor/test_aot_inductor_package", + "inductor/test_async_compile", "inductor/test_auto_functionalize", "inductor/test_autoheuristic", "inductor/test_b2b_gemm", + "inductor/test_benchmark_fusion", "inductor/test_benchmarking", + "inductor/test_binary_folding", + "inductor/test_block_analysis", "inductor/test_ck_backend", "inductor/test_codecache", "inductor/test_codegen_triton", "inductor/test_combo_kernels", + "inductor/test_compile_subprocess", + "inductor/test_compile_worker", "inductor/test_compiled_autograd", "inductor/test_compiled_optimizers", - "inductor/test_compile_worker", "inductor/test_config", "inductor/test_control_flow", - "inductor/test_coordinate_descent_tuner", + "inductor/test_cooperative_reductions", "inductor/test_cpp_wrapper_hipify", "inductor/test_cpu_cpp_wrapper", + "inductor/test_cuda_repro", "inductor/test_cudagraph_trees", "inductor/test_cudagraph_trees_expandable_segments", - "inductor/test_cuda_repro", "inductor/test_custom_lowering", "inductor/test_cutlass_backend", "inductor/test_debug_trace", @@ -329,31 +468,48 @@ S390X_TESTLIST = [ "inductor/test_flex_decoding", "inductor/test_foreach", "inductor/test_fp8", + "inductor/test_fused_attention", + "inductor/test_fuzzer", "inductor/test_fx_fusion", + "inductor/test_gpu_cpp_wrapper", "inductor/test_graph_transform_observer", "inductor/test_group_batch_fusion", - "inductor/test_gpu_cpp_wrapper", "inductor/test_halide", "inductor/test_indexing", + "inductor/test_inductor_annotations", "inductor/test_inductor_freezing", + "inductor/test_inplace_padding", "inductor/test_loop_ordering", "inductor/test_memory", "inductor/test_memory_planning", "inductor/test_metrics", "inductor/test_minifier", "inductor/test_minifier_isolate", + "inductor/test_minifier_utils", "inductor/test_mmdecomp", - "inductor/test_padding", + "inductor/test_mps_basic", + "inductor/test_online_softmax", + "inductor/test_op_completeness", + "inductor/test_op_dtype_prop", + "inductor/test_ordered_set", "inductor/test_pad_mm", + "inductor/test_padding", "inductor/test_profiler", + "inductor/test_provenance_tracing", "inductor/test_scatter_optimization", "inductor/test_smoke", + "inductor/test_split_cat_fx_aten_passes", + "inductor/test_split_cat_fx_passes", "inductor/test_standalone_compile", "inductor/test_torchbind", + "inductor/test_torchinductor_codegen_config_overrides", + "inductor/test_torchinductor_strided_blocks", "inductor/test_triton_cpu_backend", "inductor/test_triton_extension_backend", "inductor/test_triton_heuristics", "inductor/test_triton_kernels", + "inductor/test_triton_syntax", + "inductor/test_unbacked_symints", "inductor/test_utils", "inductor/test_xpu_basic", "lazy/test_bindings", @@ -364,25 +520,59 @@ S390X_TESTLIST = [ "lazy/test_reuse_ir", "lazy/test_step_closures", "lazy/test_ts_opinfo", - "nn/test_convolution.py", - "nn/test_dropout.py", - "nn/test_embedding.py", - "nn/test_init.py", - "nn/test_lazy_modules.py", - "nn/test_load_state_dict.py", - "nn/test_module_hooks.py", - "nn/test_multihead_attention.py", - "nn/test_packed_sequence.py", - "nn/test_parametrization.py", - "nn/test_pooling.py", - "nn/test_pruning.py", + "nn/test_convolution", + "nn/test_dropout", + "nn/test_embedding", + "nn/test_init", + "nn/test_lazy_modules", + "nn/test_load_state_dict", + "nn/test_module_hooks", + "nn/test_multihead_attention", + "nn/test_packed_sequence", + "nn/test_parametrization", + "nn/test_pooling", + "nn/test_pruning", + "onnx/dynamo/test_dynamo_with_onnxruntime_backend", + "onnx/exporter/test_api", + "onnx/exporter/test_building", + "onnx/exporter/test_capture_strategies", + "onnx/exporter/test_core", + "onnx/exporter/test_dynamic_shapes", + "onnx/exporter/test_hf_models_e2e", + "onnx/exporter/test_ir_passes", + "onnx/exporter/test_small_models_e2e", + "onnx/exporter/test_tensors", + "onnx/exporter/test_verification", + "onnx/internal/test_diagnostics", + "onnx/internal/test_registraion", + "onnx/test_autograd_funs", + "onnx/test_custom_ops", + "onnx/test_fx_passes", + "onnx/test_fx_type_promotion", + "onnx/test_lazy_import", + "onnx/test_models_onnxruntime", + "onnx/test_onnx_opset", + "onnx/test_onnxscript_no_runtime", + "onnx/test_onnxscript_runtime", + "onnx/test_op_consistency", + "onnx/test_pytorch_jit_onnx", + "onnx/test_pytorch_onnx_no_runtime", + "onnx/test_pytorch_onnx_shape_inference", + "onnx/test_symbolic_helper", + "onnx/test_verification", + "onnx/torchlib/test_ops", "optim/test_lrscheduler", + "optim/test_optim", "optim/test_swa_utils", "profiler/test_cpp_thread", "profiler/test_execution_trace", + "profiler/test_kineto", "profiler/test_memory_profiler", + "profiler/test_profiler_tree", "profiler/test_record_function", "profiler/test_torch_tidy", + "test_accelerator", + "test_appending_byte_serializer", "test_autocast", "test_autograd", "test_autograd_fallback", @@ -390,6 +580,7 @@ S390X_TESTLIST = [ "test_autoload_disable", "test_autoload_enable", "test_bundled_inputs", + "test_ci_sanity_check_fail", "test_comparison_utils", "test_compile_benchmark_util", "test_complex", @@ -408,63 +599,102 @@ S390X_TESTLIST = [ "test_cuda_sanitizer", "test_cuda_trace", "test_custom_ops", + "test_dataloader", "test_datapipe", "test_deploy", "test_dispatch", "test_dlpack", "test_dynamic_shapes", "test_expanded_weights", + "test_extension_utils", "test_fake_tensor", "test_file_check", "test_flop_counter", + "test_foreach", + "test_function_schema", + "test_functional_autograd_benchmark", + "test_functional_optim", "test_functionalization", "test_functionalization_of_rng_ops", - "test_functional_optim", - "test_function_schema", "test_futures", + "test_fx_experimental", + "test_fx_passes", + "test_fx_reinplace_pass", + "test_hop_infra", "test_hub", "test_import_stats", "test_indexing", "test_itt", + "test_jit_autocast", + "test_jit_disabled", + "test_jit_fuser_legacy", + "test_jit_fuser_te", + "test_jit_legacy", + "test_jit_llga_fuser", + "test_jit_profiling", "test_legacy_vmap", + "test_license", + "test_linalg", "test_logging", "test_masked", "test_maskedtensor", "test_matmul_cuda", + "test_meta", + "test_mkl_verbose", "test_mkldnn", "test_mkldnn_fusion", "test_mkldnn_verbose", - "test_mkl_verbose", "test_mobile_optimizer", "test_module_tracker", + "test_modules", "test_monitor", + "test_multiprocessing", + "test_multiprocessing_spawn", + "test_namedtensor", "test_namedtuple_return_api", + "test_native_functions", "test_native_mha", "test_nestedtensor", + "test_nn", "test_numba_integration", "test_numpy_interop", "test_openmp", + "test_ops", + "test_ops_fwd_gradients", + "test_ops_gradients", + "test_ops_jit", + "test_optim", "test_out_dtype_op", "test_overrides", "test_package", "test_per_overload_api", "test_prims", + "test_proxy_tensor", "test_pruning_op", + "test_public_bindings", "test_python_dispatch", + "test_pytree", + "test_reductions", "test_scatter_gather_ops", + "test_schema_check", "test_segment_reductions", "test_serialization", "test_set_default_mobile_cpu_allocator", "test_shape_ops", "test_show_pickle", "test_sort_and_select", + "test_sparse", + "test_sparse_csr", + "test_sparse_semi_structured", "test_spectral_ops", "test_stateless", "test_subclass", - "test_tensorboard", + "test_sympy_utils", "test_tensor_creation_ops", + "test_tensorboard", "test_tensorexpr", "test_tensorexpr_pybind", + "test_testing", "test_torch", "test_transformers", "test_transformers_privateuse1", @@ -473,21 +703,25 @@ S390X_TESTLIST = [ "test_type_promotion", "test_typing", "test_utils", + "test_utils_config_module", + "test_utils_filelock", "test_view_ops", "test_vulkan", "test_weak", "test_xnnpack_integration", + "test_xpu", "torch_np/numpy_tests/core/test_dlpack", "torch_np/numpy_tests/core/test_dtype", "torch_np/numpy_tests/core/test_einsum", "torch_np/numpy_tests/core/test_getlimits", "torch_np/numpy_tests/core/test_indexing", + "torch_np/numpy_tests/core/test_multiarray", "torch_np/numpy_tests/core/test_numeric", "torch_np/numpy_tests/core/test_numerictypes", "torch_np/numpy_tests/core/test_scalar_ctors", + "torch_np/numpy_tests/core/test_scalar_methods", "torch_np/numpy_tests/core/test_scalarinherit", "torch_np/numpy_tests/core/test_scalarmath", - "torch_np/numpy_tests/core/test_scalar_methods", "torch_np/numpy_tests/core/test_shape_base", "torch_np/numpy_tests/fft/test_helper", "torch_np/numpy_tests/fft/test_pocketfft", @@ -511,8 +745,8 @@ S390X_TESTLIST = [ "torch_np/test_scalars_0D_arrays", "torch_np/test_ufuncs_basic", "torch_np/test_unary_ufuncs", - "xpu/test_conv.py", - "xpu/test_gemm.py", + "xpu/test_conv", + "xpu/test_gemm", ] XPU_BLOCKLIST = [ diff --git a/test/test_dataloader.py b/test/test_dataloader.py index 5c0708893579..ef92b4f1b82d 100644 --- a/test/test_dataloader.py +++ b/test/test_dataloader.py @@ -25,6 +25,7 @@ from torch.testing._internal.common_device_type import instantiate_device_type_t from torch.testing._internal.common_utils import ( IS_CI, IS_JETSON, + IS_S390X, IS_SANDCASTLE, IS_WINDOWS, load_tests, @@ -1384,6 +1385,9 @@ except RuntimeError as e: # This case pass on Intel GPU, but currently expected failure on other device, # please don't forget to remove this skip when remove the xfailIfLinux. @skipIfXpu + # This case passes on s390x too. + # please don't forget to remove this skip when remove the xfailIfLinux. + @unittest.skipIf(IS_S390X, "Unexpectedly succeeds on s390x") # https://github.com/pytorch/pytorch/issues/128551 @xfailIfLinux def test_segfault(self): diff --git a/test/test_ops_gradients.py b/test/test_ops_gradients.py index a0adc4ce3972..4dfedc458520 100644 --- a/test/test_ops_gradients.py +++ b/test/test_ops_gradients.py @@ -14,7 +14,6 @@ from torch.testing._internal.common_utils import ( TestCase, TestGradients, unMarkDynamoStrictTest, - xfailIfS390X, ) from torch.testing._internal.custom_op_db import custom_op_db from torch.testing._internal.hop_db import hop_db @@ -29,7 +28,6 @@ _gradcheck_ops = partial( @unMarkDynamoStrictTest class TestBwdGradients(TestGradients): # Tests that gradients are computed correctly - @xfailIfS390X @_gradcheck_ops(op_db + hop_db + custom_op_db) def test_fn_grad(self, device, dtype, op): # This is verified by test_dtypes in test_ops.py diff --git a/test/test_optim.py b/test/test_optim.py index 88b0ab78e4ac..9e2556683562 100644 --- a/test/test_optim.py +++ b/test/test_optim.py @@ -45,7 +45,6 @@ from torch.testing._internal.common_utils import ( run_tests, TEST_WITH_TORCHDYNAMO, TestCase, - xfailIfS390X, ) @@ -591,7 +590,6 @@ class TestOptimRenewed(TestCase): self.assertEqual(complex_steps, real_steps) @skipMPS - @xfailIfS390X @optims([o for o in optim_db if o.supports_complex], dtypes=[torch.complex64]) def test_complex_2d(self, device, dtype, optim_info): optim_cls = optim_info.optim_cls diff --git a/torch/onnx/_internal/onnxruntime.py b/torch/onnx/_internal/onnxruntime.py index 85f0cfe0d31f..1eb37f361203 100644 --- a/torch/onnx/_internal/onnxruntime.py +++ b/torch/onnx/_internal/onnxruntime.py @@ -496,6 +496,8 @@ def _run_onnx_session_with_ortvaluevector( _nvtx_range_pop() return pth_outputs else: + import onnxruntime.training + # Profile the two ORT-to-PyTorch type casts below _nvtx_range_push("after run_with_ortvaluevector") # Map ORTValue to torch.Tensor. diff --git a/torch/testing/_internal/common_methods_invocations.py b/torch/testing/_internal/common_methods_invocations.py index ec0775cad3f0..95d717b3be36 100644 --- a/torch/testing/_internal/common_methods_invocations.py +++ b/torch/testing/_internal/common_methods_invocations.py @@ -36,7 +36,7 @@ from torch.testing._internal.common_cuda import ( ) from torch.testing._internal.common_utils import ( make_fullrank_matrices_with_distinct_singular_values, - TEST_WITH_ROCM, IS_FBCODE, IS_WINDOWS, IS_MACOS, TEST_SCIPY, + TEST_WITH_ROCM, IS_FBCODE, IS_WINDOWS, IS_MACOS, IS_S390X, TEST_SCIPY, torch_to_numpy_dtype_dict, numpy_to_torch_dtype, TEST_WITH_ASAN, GRADCHECK_NONDET_TOL, slowTest, TEST_WITH_SLOW, TEST_WITH_TORCHINDUCTOR @@ -23172,6 +23172,7 @@ python_ref_db = [ "test_python_ref", dtypes=(torch.bfloat16,), device_type="cpu", + active_if=not IS_S390X, ), ), ),