[AOTI] Add ABI-compatiblity tests (#123848)

Summary: In AOTInductor generated CPU model code, there can be direct references to some aten/c10 utility functions and data structures, e.g. at::vec and c10::Half. These are performance critical and thus it doesn't make sense to create C shim for them. Instead, we make sure they are implemented in a header-only way, and use this set of tests to guard future changes. There are more header files to be updated, but we will do it in other followup PRs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/123848 Approved by: https://github.com/jansel ghstack dependencies: #123847
2025-10-20 21:14:14 +08:00 · 2024-04-11 19:55:18 -07:00
parent cbefaf2a37
commit 4946638f06
15 changed files with 160 additions and 16 deletions
--- a/.ci/pytorch/test.sh
+++ b/.ci/pytorch/test.sh
@ -334,7 +334,7 @@ test_inductor() {
  # TODO: need a faster way to build
  if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
      BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
-      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
+      CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
  fi
 }
--- a/.github/labeler.yml
+++ b/.github/labeler.yml
@ -36,7 +36,8 @@
 - torch/distributed/_tensor/**
 - torch/distributed/fsdp/**
 - torch/csrc/inductor/**
- test/cpp/aot_inductor/**
+- test/cpp/aoti_abi_check/**
 - test/cpp/aoti_inference/**
 "module: cpu":
 - aten/src/ATen/cpu/**
--- a/caffe2/CMakeLists.txt
+++ b/caffe2/CMakeLists.txt
@ -1336,8 +1336,11 @@ if(BUILD_TEST)
  endif()
  if(BUILD_AOT_INDUCTOR_TEST)
    add_subdirectory(
-      ${TORCH_ROOT}/test/cpp/aot_inductor
+      ${TORCH_ROOT}/test/cpp/aoti_abi_check
-      ${CMAKE_BINARY_DIR}/test_aot_inductor)
+      ${CMAKE_BINARY_DIR}/test_aoti_abi_check)
    add_subdirectory(
      ${TORCH_ROOT}/test/cpp/aoti_inference
      ${CMAKE_BINARY_DIR}/test_aoti_inference)
  endif()
 endif()
--- a/test/cpp/aoti_abi_check/CMakeLists.txt
+++ b/test/cpp/aoti_abi_check/CMakeLists.txt
@ -0,0 +1,27 @@
 set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
 # Build the cpp gtest binary containing the cpp-only tests.
 set(AOTI_ABI_CHECK_TEST_SRCS
  ${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
  ${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
 )
 add_executable(test_aoti_abi_check
  ${AOTI_ABI_CHECK_TEST_SRCS}
 )
 # TODO temporary until we can delete the old gtest polyfills.
 target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)
 # WARNING: DO NOT LINK torch!!!
 # The purpose is to check if the used aten/c10 headers are writtern in a header-only way
 target_link_libraries(test_aoti_abi_check PRIVATE gtest)
 target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})
 if(INSTALL_TEST)
  install(TARGETS test_aoti_abi_check DESTINATION bin)
  # Install PDB files for MSVC builds
  if(MSVC AND BUILD_SHARED_LIBS)
    install(FILES $<TARGET_PDB_FILE:test_aoti_abi_check> DESTINATION bin OPTIONAL)
  endif()
 endif()
--- a/test/cpp/aoti_abi_check/README.md
+++ b/test/cpp/aoti_abi_check/README.md
@ -0,0 +1 @@
 Tests in this directory are meant to guard certain ATen/c10 util functions and data structures are implemented in a header-only fashion, to make sure AOTInductor generated CPU model code is ABI backward-compatible.
--- a/test/cpp/aoti_abi_check/main.cpp
+++ b/test/cpp/aoti_abi_check/main.cpp
@ -0,0 +1,6 @@
 #include <gtest/gtest.h>
 int main(int argc, char* argv[]) {
  ::testing::InitGoogleTest(&argc, argv);
  return RUN_ALL_TESTS();
 }
--- a/test/cpp/aoti_abi_check/test_dtype.cpp
+++ b/test/cpp/aoti_abi_check/test_dtype.cpp
@ -0,0 +1,99 @@
 #include <gtest/gtest.h>
 #include <c10/util/BFloat16-math.h>
 #include <c10/util/BFloat16.h>
 #include <c10/util/Float8_e4m3fn.h>
 #include <c10/util/Float8_e4m3fnuz.h>
 #include <c10/util/Float8_e5m2.h>
 #include <c10/util/Float8_e5m2fnuz.h>
 #include <c10/util/Half.h>
 namespace torch {
 namespace aot_inductor {
 TEST(TestDtype, TestBFloat16) {
  c10::BFloat16 a = 1.0f;
  c10::BFloat16 b = 2.0f;
  c10::BFloat16 add = 3.0f;
  c10::BFloat16 sub = -1.0f;
  c10::BFloat16 mul = 2.0f;
  c10::BFloat16 div = 0.5f;
  EXPECT_EQ(a + b, add);
  EXPECT_EQ(a - b, sub);
  EXPECT_EQ(a * b, mul);
  EXPECT_EQ(a / b, div);
 }
 TEST(TestDtype, TestFloat8_e4m3fn) {
  c10::Float8_e4m3fn a = 1.0f;
  c10::Float8_e4m3fn b = 2.0f;
  c10::Float8_e4m3fn add = 3.0f;
  c10::Float8_e4m3fn sub = -1.0f;
  c10::Float8_e4m3fn mul = 2.0f;
  c10::Float8_e4m3fn div = 0.5f;
  EXPECT_EQ(a + b, add);
  EXPECT_EQ(a - b, sub);
  EXPECT_EQ(a * b, mul);
  EXPECT_EQ(a / b, div);
 }
 TEST(TestDtype, TestFloat8_e4m3fuz) {
  c10::Float8_e4m3fnuz a = 1.0f;
  c10::Float8_e4m3fnuz b = 2.0f;
  c10::Float8_e4m3fnuz add = 3.0f;
  c10::Float8_e4m3fnuz sub = -1.0f;
  c10::Float8_e4m3fnuz mul = 2.0f;
  c10::Float8_e4m3fnuz div = 0.5f;
  EXPECT_EQ(a + b, add);
  EXPECT_EQ(a - b, sub);
  EXPECT_EQ(a * b, mul);
  EXPECT_EQ(a / b, div);
 }
 TEST(TestDtype, TestFloat8_e5m2) {
  c10::Float8_e5m2 a = 1.0f;
  c10::Float8_e5m2 b = 2.0f;
  c10::Float8_e5m2 add = 3.0f;
  c10::Float8_e5m2 sub = -1.0f;
  c10::Float8_e5m2 mul = 2.0f;
  c10::Float8_e5m2 div = 0.5f;
  EXPECT_EQ(a + b, add);
  EXPECT_EQ(a - b, sub);
  EXPECT_EQ(a * b, mul);
  EXPECT_EQ(a / b, div);
 }
 TEST(TestDtype, TestFloat8_e5m2fnuz) {
  c10::Float8_e5m2fnuz a = 1.0f;
  c10::Float8_e5m2fnuz b = 2.0f;
  c10::Float8_e5m2fnuz add = 3.0f;
  c10::Float8_e5m2fnuz sub = -1.0f;
  c10::Float8_e5m2fnuz mul = 2.0f;
  c10::Float8_e5m2fnuz div = 0.5f;
  EXPECT_EQ(a + b, add);
  EXPECT_EQ(a - b, sub);
  EXPECT_EQ(a * b, mul);
  EXPECT_EQ(a / b, div);
 }
 TEST(TestDtype, TestHalf) {
  c10::Half a = 1.0f;
  c10::Half b = 2.0f;
  c10::Half add = 3.0f;
  c10::Half sub = -1.0f;
  c10::Half mul = 2.0f;
  c10::Half div = 0.5f;
  EXPECT_EQ(a + b, add);
  EXPECT_EQ(a - b, sub);
  EXPECT_EQ(a * b, mul);
  EXPECT_EQ(a / b, div);
 }
 } // namespace aot_inductor
 } // namespace torch
--- a/test/cpp/aoti_inference/CMakeLists.txt
+++ b/test/cpp/aoti_inference/CMakeLists.txt
@ -1,5 +1,5 @@
-set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aot_inductor)
+set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference)
 # Build custom TorchScript op for AOTInductor
 add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
@ -31,7 +31,7 @@ set(INDUCTOR_TEST_SRCS
  ${AOT_INDUCTOR_TEST_ROOT}/test.cpp
 )
-add_executable(test_aot_inductor
+add_executable(test_aoti_inference
  ${TORCH_ROOT}/test/cpp/common/main.cpp
  ${INDUCTOR_TEST_SRCS}
  data.pt
@ -39,10 +39,10 @@ add_executable(test_aot_inductor
  script_model_cpu.pt
  script_model_cuda.pt
 )
-add_dependencies(test_aot_inductor aoti_custom_class aoti_script_model)
+add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model)
 # TODO temporary until we can delete the old gtest polyfills.
-target_compile_definitions(test_aot_inductor PRIVATE USE_GTEST)
+target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST)
 # Define a custom command to generate the library
 add_custom_command(
@ -51,24 +51,24 @@ add_custom_command(
        DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
 )
-target_link_libraries(test_aot_inductor PRIVATE
+target_link_libraries(test_aoti_inference PRIVATE
  torch
  gtest
  -Wl,--no-as-needed aoti_custom_class
 )
 if(USE_CUDA)
-  target_include_directories(test_aot_inductor PRIVATE ${ATen_CUDA_INCLUDE})
+  target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE})
-  target_compile_definitions(test_aot_inductor PRIVATE USE_CUDA)
+  target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA)
 endif()
-target_compile_definitions(test_aot_inductor PRIVATE
+target_compile_definitions(test_aoti_inference PRIVATE
    CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
 )
 if(INSTALL_TEST)
-  install(TARGETS test_aot_inductor DESTINATION bin)
+  install(TARGETS test_aoti_inference DESTINATION bin)
  # Install PDB files for MSVC builds
  if(MSVC AND BUILD_SHARED_LIBS)
-    install(FILES $<TARGET_PDB_FILE:test_aot_inductor> DESTINATION bin OPTIONAL)
+    install(FILES $<TARGET_PDB_FILE:test_aoti_inference> DESTINATION bin OPTIONAL)
  endif()
 endif()
--- a/test/cpp/aoti_inference/aoti_custom_class.cpp
+++ b/test/cpp/aoti_inference/aoti_custom_class.cpp
--- a/test/cpp/aoti_inference/aoti_custom_class.h
+++ b/test/cpp/aoti_inference/aoti_custom_class.h
--- a/test/cpp/aoti_inference/compile_model.py
+++ b/test/cpp/aoti_inference/compile_model.py
--- a/test/cpp/aoti_inference/test.cpp
+++ b/test/cpp/aoti_inference/test.cpp
@ -283,7 +283,7 @@ void test_aoti_double_buffering_with_tensor_constants() {
 } // namespace
 namespace torch {
-namespace inductor {
+namespace aot_inductor {
 TEST(AotInductorTest, BasicTestCpu) {
  test_aoti("cpu", false);
@ -324,5 +324,5 @@ TEST(AotInductorTest, UpdateInactiveConstantsWithTensorConstantsCuda) {
 }
 #endif
-} // namespace inductor
+} // namespace aot_inductor
 } // namespace torch
--- a/test/cpp/aoti_inference/test.py
+++ b/test/cpp/aoti_inference/test.py
--- a/test/cpp/common/main.cpp
+++ b/test/cpp/common/main.cpp
@ -18,6 +18,7 @@ std::string add_negative_flag(const std::string& flag) {
 int main(int argc, char* argv[]) {
  ::testing::InitGoogleTest(&argc, argv);
  if (!torch::cuda::is_available()) {
    std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests"
              << std::endl;
--- a/torch/_inductor/codegen/cpp_prefix.h
+++ b/torch/_inductor/codegen/cpp_prefix.h
@ -7,6 +7,12 @@
 #include <limits>
 #include <omp.h>
 // WARNING: be extra careful when including more ATen/c10 header files here!
 // Because AOTInductor generated code will copy-paste this cpp_prefix.h for
 // the CPU backend, we have to make sure the used headers are implemented
 // in a header-only way, i.e. all the function and class definitions are
 // in .h files instead of .cpp files, to avoid ABI backward-compatiblity breakage.
 #include <ATen/NumericUtils.h>
 #include <ATen/core/PhiloxRNGEngine.h>
 #include <ATen/native/Math.h>
		`@ -0,0 +1 @@`
							`Tests in this directory are meant to guard certain ATen/c10 util functions and data structures are implemented in a header-only fashion, to make sure AOTInductor generated CPU model code is ABI backward-compatible.`