[Inductor] Allocate another shard for testing cpp-wrapper JIT (#121310)

Summary: The ABI-compatible for cpp wrapper has not been turned on as default, so test them separately. Expect to add more tests for the shard.

Differential Revision: [D54617287](https://our.internmc.facebook.com/intern/diff/D54617287)
Pull Request resolved: https://github.com/pytorch/pytorch/pull/121310
Approved by: https://github.com/chenyang78
ghstack dependencies: #121309
This commit is contained in:
Bin Bao
2024-03-06 17:57:15 -08:00
committed by PyTorch MergeBot
parent 7e598c0053
commit 0339f1ca82
5 changed files with 94 additions and 2 deletions

View File

@ -323,6 +323,14 @@ test_inductor() {
fi
}
test_inductor_cpp_wrapper_abi_compatible() {
export TORCHINDUCTOR_ABI_COMPATIBLE=1
echo "Testing Inductor cpp wrapper mode with TORCHINDUCTOR_ABI_COMPATIBLE=1"
# cpu stack allocation causes segfault and needs more investigation
TORCHINDUCTOR_STACK_ALLOCATION=0 python test/run_test.py --include inductor/test_cpu_cpp_wrapper
python test/run_test.py --include inductor/test_cuda_cpp_wrapper
}
# "Global" flags for inductor benchmarking controlled by TEST_CONFIG
# For example 'dynamic_aot_eager_torchbench' TEST_CONFIG means we run
# the benchmark script with '--dynamic-shapes --backend aot_eager --device cuda'
@ -1173,6 +1181,9 @@ elif [[ "${TEST_CONFIG}" == *torchbench* ]]; then
fi
PYTHONPATH=$(pwd)/torchbench test_dynamo_benchmark torchbench "$id"
fi
elif [[ "${TEST_CONFIG}" == *inductor_cpp_wrapper_abi_compatible* ]]; then
install_torchvision
test_inductor_cpp_wrapper_abi_compatible
elif [[ "${TEST_CONFIG}" == *inductor* && "${SHARD_NUMBER}" == 1 ]]; then
install_torchvision
test_inductor

View File

@ -65,6 +65,7 @@ jobs:
{ config: "aot_inductor_timm", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_torchbench", shard: 1, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "aot_inductor_torchbench", shard: 2, num_shards: 2, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "inductor_cpp_wrapper_abi_compatible", shard: 1, num_shards: 1, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
secrets:
HUGGING_FACE_HUB_TOKEN: ${{ secrets.HUGGING_FACE_HUB_TOKEN }}

View File

@ -72,6 +72,53 @@ test_failures_cpp_wrapper = {
),
}
if config.abi_compatible:
xfail_list = [
"test_bernoulli1_cpu", # cpp fallback op naming issue
"test_conv2d_binary_inplace_fusion_failed_cpu",
"test_conv2d_binary_inplace_fusion_pass_cpu",
"test_cumsum_cpu",
"test_custom_op_cpu", # needs custom op support
"test_dtype_sympy_expr_cpu",
"test_dynamic_qlinear_cpu",
"test_dynamic_qlinear_qat_cpu",
"test_index_put_deterministic_fallback_cpu",
"test_lstm_packed_change_input_sizes_cpu",
"test_profiler_mark_wrapper_call_cpu",
"test_qconv2d_add_cpu",
"test_qconv2d_add_relu_cpu",
"test_qconv2d_cpu",
"test_qconv2d_dequant_promotion_cpu",
"test_qconv2d_maxpool2d_linear_dynamic_cpu",
"test_qconv2d_relu_cpu",
"test_qlinear_cpu",
"test_qlinear_dequant_promotion_cpu",
"test_qlinear_relu_cpu",
"test_randint_cpu",
"test_randn_with_dtype_and_device_cpu",
"test_scatter5_cpu",
"test_scatter6_cpu",
"test_tensor2_cpu",
]
for test_name in xfail_list:
test_failures_cpp_wrapper[test_name] = test_torchinductor.TestFailure(
("cpp_wrapper",), is_skip=False
)
test_failures_cpp_wrapper[
f"{test_name}_dynamic_shapes"
] = test_torchinductor.TestFailure(("cpp_wrapper",), is_skip=False)
skip_list = [
"test_linear1_cpu", # segfault from double free
"test_multihead_attention_cpu",
]
for test_name in skip_list:
test_failures_cpp_wrapper[test_name] = test_torchinductor.TestFailure(
("cpp_wrapper",), is_skip=True
)
test_failures_cpp_wrapper[
f"{test_name}_dynamic_shapes"
] = test_torchinductor.TestFailure(("cpp_wrapper",), is_skip=True)
def make_test_case(
name,
@ -306,7 +353,12 @@ if RUN_CPU:
item.code_string_count,
)
test_torchinductor.copy_tests(CppWrapperTemplate, TestCppWrapper, "cpp_wrapper")
test_torchinductor.copy_tests(
CppWrapperTemplate,
TestCppWrapper,
"cpp_wrapper",
test_failures_cpp_wrapper,
)
DynamicShapesCppWrapperTemplate = (
test_torchinductor_dynamic_shapes.make_dynamic_cls(CppWrapperTemplate)

View File

@ -93,6 +93,34 @@ if TEST_WITH_ROCM:
dynamic_shapes_test_name
] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=True)
if config.abi_compatible:
xfail_list = [
"test_bernoulli1_cuda", # cpp fallback op naming issue
"test_conv_backward_cuda",
"test_custom_op_cuda", # needs custom op support
"test_index_put_deterministic_fallback_cuda",
"test_profiler_mark_wrapper_call_cuda",
"test_scaled_dot_product_attention_cuda_dynamic_shapes",
]
for test_name in xfail_list:
test_failures_cuda_wrapper[test_name] = test_torchinductor.TestFailure(
("cuda_wrapper",), is_skip=False
)
test_failures_cuda_wrapper[
f"{test_name}_dynamic_shapes"
] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=False)
skip_list = [
"test_multi_device_cuda",
"test_linear1_cuda", # segfault from double free
]
for test_name in skip_list:
test_failures_cuda_wrapper[test_name] = test_torchinductor.TestFailure(
("cuda_wrapper",), is_skip=True
)
test_failures_cuda_wrapper[
f"{test_name}_dynamic_shapes"
] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=True)
def make_test_case(
name,

View File

@ -784,7 +784,7 @@ class CommonTemplate:
_, code = run_and_get_code(fn, x, y)
self.assertEqual(
" ".join(code).count(
"::view_dtype" if config.cpp_wrapper else "aten.view"
"view_dtype" if config.cpp_wrapper else "aten.view"
),
3,
)