From 4946638f06e5916ea9bd0f790ff620bdb78a92a3 Mon Sep 17 00:00:00 2001 From: Bin Bao Date: Thu, 11 Apr 2024 19:55:18 -0700 Subject: [PATCH] [AOTI] Add ABI-compatiblity tests (#123848) Summary: In AOTInductor generated CPU model code, there can be direct references to some aten/c10 utility functions and data structures, e.g. at::vec and c10::Half. These are performance critical and thus it doesn't make sense to create C shim for them. Instead, we make sure they are implemented in a header-only way, and use this set of tests to guard future changes. There are more header files to be updated, but we will do it in other followup PRs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/123848 Approved by: https://github.com/jansel ghstack dependencies: #123847 --- .ci/pytorch/test.sh | 2 +- .github/labeler.yml | 3 +- caffe2/CMakeLists.txt | 7 +- test/cpp/aoti_abi_check/CMakeLists.txt | 27 +++++ test/cpp/aoti_abi_check/README.md | 1 + test/cpp/aoti_abi_check/main.cpp | 6 ++ test/cpp/aoti_abi_check/test_dtype.cpp | 99 +++++++++++++++++++ .../CMakeLists.txt | 20 ++-- .../aoti_custom_class.cpp | 0 .../aoti_custom_class.h | 0 .../compile_model.py | 0 .../{aot_inductor => aoti_inference}/test.cpp | 4 +- .../{aot_inductor => aoti_inference}/test.py | 0 test/cpp/common/main.cpp | 1 + torch/_inductor/codegen/cpp_prefix.h | 6 ++ 15 files changed, 160 insertions(+), 16 deletions(-) create mode 100644 test/cpp/aoti_abi_check/CMakeLists.txt create mode 100644 test/cpp/aoti_abi_check/README.md create mode 100644 test/cpp/aoti_abi_check/main.cpp create mode 100644 test/cpp/aoti_abi_check/test_dtype.cpp rename test/cpp/{aot_inductor => aoti_inference}/CMakeLists.txt (73%) rename test/cpp/{aot_inductor => aoti_inference}/aoti_custom_class.cpp (100%) rename test/cpp/{aot_inductor => aoti_inference}/aoti_custom_class.h (100%) rename test/cpp/{aot_inductor => aoti_inference}/compile_model.py (100%) rename test/cpp/{aot_inductor => aoti_inference}/test.cpp (99%) rename test/cpp/{aot_inductor => aoti_inference}/test.py (100%) diff --git a/.ci/pytorch/test.sh b/.ci/pytorch/test.sh index 5408e0f596e9..23eaf8a2dda9 100755 --- a/.ci/pytorch/test.sh +++ b/.ci/pytorch/test.sh @@ -334,7 +334,7 @@ test_inductor() { # TODO: need a faster way to build if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop - CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor + CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference fi } diff --git a/.github/labeler.yml b/.github/labeler.yml index 74ea0abd43bd..8a572bfcbafe 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -36,7 +36,8 @@ - torch/distributed/_tensor/** - torch/distributed/fsdp/** - torch/csrc/inductor/** -- test/cpp/aot_inductor/** +- test/cpp/aoti_abi_check/** +- test/cpp/aoti_inference/** "module: cpu": - aten/src/ATen/cpu/** diff --git a/caffe2/CMakeLists.txt b/caffe2/CMakeLists.txt index d080ef6ce047..83bf15893ca2 100644 --- a/caffe2/CMakeLists.txt +++ b/caffe2/CMakeLists.txt @@ -1336,8 +1336,11 @@ if(BUILD_TEST) endif() if(BUILD_AOT_INDUCTOR_TEST) add_subdirectory( - ${TORCH_ROOT}/test/cpp/aot_inductor - ${CMAKE_BINARY_DIR}/test_aot_inductor) + ${TORCH_ROOT}/test/cpp/aoti_abi_check + ${CMAKE_BINARY_DIR}/test_aoti_abi_check) + add_subdirectory( + ${TORCH_ROOT}/test/cpp/aoti_inference + ${CMAKE_BINARY_DIR}/test_aoti_inference) endif() endif() diff --git a/test/cpp/aoti_abi_check/CMakeLists.txt b/test/cpp/aoti_abi_check/CMakeLists.txt new file mode 100644 index 000000000000..8ae688ebbb80 --- /dev/null +++ b/test/cpp/aoti_abi_check/CMakeLists.txt @@ -0,0 +1,27 @@ +set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check) + +# Build the cpp gtest binary containing the cpp-only tests. +set(AOTI_ABI_CHECK_TEST_SRCS + ${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp + ${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp +) + +add_executable(test_aoti_abi_check + ${AOTI_ABI_CHECK_TEST_SRCS} +) + +# TODO temporary until we can delete the old gtest polyfills. +target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST) + +# WARNING: DO NOT LINK torch!!! +# The purpose is to check if the used aten/c10 headers are writtern in a header-only way +target_link_libraries(test_aoti_abi_check PRIVATE gtest) +target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE}) + +if(INSTALL_TEST) + install(TARGETS test_aoti_abi_check DESTINATION bin) + # Install PDB files for MSVC builds + if(MSVC AND BUILD_SHARED_LIBS) + install(FILES $ DESTINATION bin OPTIONAL) + endif() +endif() diff --git a/test/cpp/aoti_abi_check/README.md b/test/cpp/aoti_abi_check/README.md new file mode 100644 index 000000000000..7a35838a4f67 --- /dev/null +++ b/test/cpp/aoti_abi_check/README.md @@ -0,0 +1 @@ +Tests in this directory are meant to guard certain ATen/c10 util functions and data structures are implemented in a header-only fashion, to make sure AOTInductor generated CPU model code is ABI backward-compatible. diff --git a/test/cpp/aoti_abi_check/main.cpp b/test/cpp/aoti_abi_check/main.cpp new file mode 100644 index 000000000000..d81071339fc2 --- /dev/null +++ b/test/cpp/aoti_abi_check/main.cpp @@ -0,0 +1,6 @@ +#include + +int main(int argc, char* argv[]) { + ::testing::InitGoogleTest(&argc, argv); + return RUN_ALL_TESTS(); +} diff --git a/test/cpp/aoti_abi_check/test_dtype.cpp b/test/cpp/aoti_abi_check/test_dtype.cpp new file mode 100644 index 000000000000..bf0081575b06 --- /dev/null +++ b/test/cpp/aoti_abi_check/test_dtype.cpp @@ -0,0 +1,99 @@ +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace torch { +namespace aot_inductor { + +TEST(TestDtype, TestBFloat16) { + c10::BFloat16 a = 1.0f; + c10::BFloat16 b = 2.0f; + c10::BFloat16 add = 3.0f; + c10::BFloat16 sub = -1.0f; + c10::BFloat16 mul = 2.0f; + c10::BFloat16 div = 0.5f; + + EXPECT_EQ(a + b, add); + EXPECT_EQ(a - b, sub); + EXPECT_EQ(a * b, mul); + EXPECT_EQ(a / b, div); +} + +TEST(TestDtype, TestFloat8_e4m3fn) { + c10::Float8_e4m3fn a = 1.0f; + c10::Float8_e4m3fn b = 2.0f; + c10::Float8_e4m3fn add = 3.0f; + c10::Float8_e4m3fn sub = -1.0f; + c10::Float8_e4m3fn mul = 2.0f; + c10::Float8_e4m3fn div = 0.5f; + + EXPECT_EQ(a + b, add); + EXPECT_EQ(a - b, sub); + EXPECT_EQ(a * b, mul); + EXPECT_EQ(a / b, div); +} + +TEST(TestDtype, TestFloat8_e4m3fuz) { + c10::Float8_e4m3fnuz a = 1.0f; + c10::Float8_e4m3fnuz b = 2.0f; + c10::Float8_e4m3fnuz add = 3.0f; + c10::Float8_e4m3fnuz sub = -1.0f; + c10::Float8_e4m3fnuz mul = 2.0f; + c10::Float8_e4m3fnuz div = 0.5f; + + EXPECT_EQ(a + b, add); + EXPECT_EQ(a - b, sub); + EXPECT_EQ(a * b, mul); + EXPECT_EQ(a / b, div); +} + +TEST(TestDtype, TestFloat8_e5m2) { + c10::Float8_e5m2 a = 1.0f; + c10::Float8_e5m2 b = 2.0f; + c10::Float8_e5m2 add = 3.0f; + c10::Float8_e5m2 sub = -1.0f; + c10::Float8_e5m2 mul = 2.0f; + c10::Float8_e5m2 div = 0.5f; + + EXPECT_EQ(a + b, add); + EXPECT_EQ(a - b, sub); + EXPECT_EQ(a * b, mul); + EXPECT_EQ(a / b, div); +} + +TEST(TestDtype, TestFloat8_e5m2fnuz) { + c10::Float8_e5m2fnuz a = 1.0f; + c10::Float8_e5m2fnuz b = 2.0f; + c10::Float8_e5m2fnuz add = 3.0f; + c10::Float8_e5m2fnuz sub = -1.0f; + c10::Float8_e5m2fnuz mul = 2.0f; + c10::Float8_e5m2fnuz div = 0.5f; + + EXPECT_EQ(a + b, add); + EXPECT_EQ(a - b, sub); + EXPECT_EQ(a * b, mul); + EXPECT_EQ(a / b, div); +} + +TEST(TestDtype, TestHalf) { + c10::Half a = 1.0f; + c10::Half b = 2.0f; + c10::Half add = 3.0f; + c10::Half sub = -1.0f; + c10::Half mul = 2.0f; + c10::Half div = 0.5f; + + EXPECT_EQ(a + b, add); + EXPECT_EQ(a - b, sub); + EXPECT_EQ(a * b, mul); + EXPECT_EQ(a / b, div); +} + +} // namespace aot_inductor +} // namespace torch diff --git a/test/cpp/aot_inductor/CMakeLists.txt b/test/cpp/aoti_inference/CMakeLists.txt similarity index 73% rename from test/cpp/aot_inductor/CMakeLists.txt rename to test/cpp/aoti_inference/CMakeLists.txt index 8ec065ee08f6..70c91fbd419d 100644 --- a/test/cpp/aot_inductor/CMakeLists.txt +++ b/test/cpp/aoti_inference/CMakeLists.txt @@ -1,5 +1,5 @@ -set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aot_inductor) +set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference) # Build custom TorchScript op for AOTInductor add_library(aoti_custom_class SHARED aoti_custom_class.cpp) @@ -31,7 +31,7 @@ set(INDUCTOR_TEST_SRCS ${AOT_INDUCTOR_TEST_ROOT}/test.cpp ) -add_executable(test_aot_inductor +add_executable(test_aoti_inference ${TORCH_ROOT}/test/cpp/common/main.cpp ${INDUCTOR_TEST_SRCS} data.pt @@ -39,10 +39,10 @@ add_executable(test_aot_inductor script_model_cpu.pt script_model_cuda.pt ) -add_dependencies(test_aot_inductor aoti_custom_class aoti_script_model) +add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model) # TODO temporary until we can delete the old gtest polyfills. -target_compile_definitions(test_aot_inductor PRIVATE USE_GTEST) +target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST) # Define a custom command to generate the library add_custom_command( @@ -51,24 +51,24 @@ add_custom_command( DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py ) -target_link_libraries(test_aot_inductor PRIVATE +target_link_libraries(test_aoti_inference PRIVATE torch gtest -Wl,--no-as-needed aoti_custom_class ) if(USE_CUDA) - target_include_directories(test_aot_inductor PRIVATE ${ATen_CUDA_INCLUDE}) - target_compile_definitions(test_aot_inductor PRIVATE USE_CUDA) + target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE}) + target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA) endif() -target_compile_definitions(test_aot_inductor PRIVATE +target_compile_definitions(test_aoti_inference PRIVATE CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} ) if(INSTALL_TEST) - install(TARGETS test_aot_inductor DESTINATION bin) + install(TARGETS test_aoti_inference DESTINATION bin) # Install PDB files for MSVC builds if(MSVC AND BUILD_SHARED_LIBS) - install(FILES $ DESTINATION bin OPTIONAL) + install(FILES $ DESTINATION bin OPTIONAL) endif() endif() diff --git a/test/cpp/aot_inductor/aoti_custom_class.cpp b/test/cpp/aoti_inference/aoti_custom_class.cpp similarity index 100% rename from test/cpp/aot_inductor/aoti_custom_class.cpp rename to test/cpp/aoti_inference/aoti_custom_class.cpp diff --git a/test/cpp/aot_inductor/aoti_custom_class.h b/test/cpp/aoti_inference/aoti_custom_class.h similarity index 100% rename from test/cpp/aot_inductor/aoti_custom_class.h rename to test/cpp/aoti_inference/aoti_custom_class.h diff --git a/test/cpp/aot_inductor/compile_model.py b/test/cpp/aoti_inference/compile_model.py similarity index 100% rename from test/cpp/aot_inductor/compile_model.py rename to test/cpp/aoti_inference/compile_model.py diff --git a/test/cpp/aot_inductor/test.cpp b/test/cpp/aoti_inference/test.cpp similarity index 99% rename from test/cpp/aot_inductor/test.cpp rename to test/cpp/aoti_inference/test.cpp index bfb5e410a997..fde2a372b54a 100644 --- a/test/cpp/aot_inductor/test.cpp +++ b/test/cpp/aoti_inference/test.cpp @@ -283,7 +283,7 @@ void test_aoti_double_buffering_with_tensor_constants() { } // namespace namespace torch { -namespace inductor { +namespace aot_inductor { TEST(AotInductorTest, BasicTestCpu) { test_aoti("cpu", false); @@ -324,5 +324,5 @@ TEST(AotInductorTest, UpdateInactiveConstantsWithTensorConstantsCuda) { } #endif -} // namespace inductor +} // namespace aot_inductor } // namespace torch diff --git a/test/cpp/aot_inductor/test.py b/test/cpp/aoti_inference/test.py similarity index 100% rename from test/cpp/aot_inductor/test.py rename to test/cpp/aoti_inference/test.py diff --git a/test/cpp/common/main.cpp b/test/cpp/common/main.cpp index 632aa8ee8af2..7feb2a8fb7bf 100644 --- a/test/cpp/common/main.cpp +++ b/test/cpp/common/main.cpp @@ -18,6 +18,7 @@ std::string add_negative_flag(const std::string& flag) { int main(int argc, char* argv[]) { ::testing::InitGoogleTest(&argc, argv); + if (!torch::cuda::is_available()) { std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests" << std::endl; diff --git a/torch/_inductor/codegen/cpp_prefix.h b/torch/_inductor/codegen/cpp_prefix.h index 5afb6195d48f..a05a9e2b8ae8 100644 --- a/torch/_inductor/codegen/cpp_prefix.h +++ b/torch/_inductor/codegen/cpp_prefix.h @@ -7,6 +7,12 @@ #include #include +// WARNING: be extra careful when including more ATen/c10 header files here! +// Because AOTInductor generated code will copy-paste this cpp_prefix.h for +// the CPU backend, we have to make sure the used headers are implemented +// in a header-only way, i.e. all the function and class definitions are +// in .h files instead of .cpp files, to avoid ABI backward-compatiblity breakage. + #include #include #include