[AOTI] Add ABI-compatiblity tests (#123848)

Summary: In AOTInductor generated CPU model code, there can be direct references to some aten/c10 utility functions and data structures, e.g. at::vec and c10::Half. These are performance critical and thus it doesn't make sense to create C shim for them. Instead, we make sure they are implemented in a header-only way, and use this set of tests to guard future changes.

There are more header files to be updated, but we will do it in other followup PRs.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/123848
Approved by: https://github.com/jansel
ghstack dependencies: #123847
This commit is contained in:
Bin Bao
2024-04-11 19:55:18 -07:00
committed by PyTorch MergeBot
parent cbefaf2a37
commit 4946638f06
15 changed files with 160 additions and 16 deletions

View File

@ -334,7 +334,7 @@ test_inductor() {
# TODO: need a faster way to build # TODO: need a faster way to build
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
fi fi
} }

3
.github/labeler.yml vendored
View File

@ -36,7 +36,8 @@
- torch/distributed/_tensor/** - torch/distributed/_tensor/**
- torch/distributed/fsdp/** - torch/distributed/fsdp/**
- torch/csrc/inductor/** - torch/csrc/inductor/**
- test/cpp/aot_inductor/** - test/cpp/aoti_abi_check/**
- test/cpp/aoti_inference/**
"module: cpu": "module: cpu":
- aten/src/ATen/cpu/** - aten/src/ATen/cpu/**

View File

@ -1336,8 +1336,11 @@ if(BUILD_TEST)
endif() endif()
if(BUILD_AOT_INDUCTOR_TEST) if(BUILD_AOT_INDUCTOR_TEST)
add_subdirectory( add_subdirectory(
${TORCH_ROOT}/test/cpp/aot_inductor ${TORCH_ROOT}/test/cpp/aoti_abi_check
${CMAKE_BINARY_DIR}/test_aot_inductor) ${CMAKE_BINARY_DIR}/test_aoti_abi_check)
add_subdirectory(
${TORCH_ROOT}/test/cpp/aoti_inference
${CMAKE_BINARY_DIR}/test_aoti_inference)
endif() endif()
endif() endif()

View File

@ -0,0 +1,27 @@
set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
# Build the cpp gtest binary containing the cpp-only tests.
set(AOTI_ABI_CHECK_TEST_SRCS
${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
)
add_executable(test_aoti_abi_check
${AOTI_ABI_CHECK_TEST_SRCS}
)
# TODO temporary until we can delete the old gtest polyfills.
target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)
# WARNING: DO NOT LINK torch!!!
# The purpose is to check if the used aten/c10 headers are writtern in a header-only way
target_link_libraries(test_aoti_abi_check PRIVATE gtest)
target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})
if(INSTALL_TEST)
install(TARGETS test_aoti_abi_check DESTINATION bin)
# Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_aoti_abi_check> DESTINATION bin OPTIONAL)
endif()
endif()

View File

@ -0,0 +1 @@
Tests in this directory are meant to guard certain ATen/c10 util functions and data structures are implemented in a header-only fashion, to make sure AOTInductor generated CPU model code is ABI backward-compatible.

View File

@ -0,0 +1,6 @@
#include <gtest/gtest.h>
int main(int argc, char* argv[]) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -0,0 +1,99 @@
#include <gtest/gtest.h>
#include <c10/util/BFloat16-math.h>
#include <c10/util/BFloat16.h>
#include <c10/util/Float8_e4m3fn.h>
#include <c10/util/Float8_e4m3fnuz.h>
#include <c10/util/Float8_e5m2.h>
#include <c10/util/Float8_e5m2fnuz.h>
#include <c10/util/Half.h>
namespace torch {
namespace aot_inductor {
TEST(TestDtype, TestBFloat16) {
c10::BFloat16 a = 1.0f;
c10::BFloat16 b = 2.0f;
c10::BFloat16 add = 3.0f;
c10::BFloat16 sub = -1.0f;
c10::BFloat16 mul = 2.0f;
c10::BFloat16 div = 0.5f;
EXPECT_EQ(a + b, add);
EXPECT_EQ(a - b, sub);
EXPECT_EQ(a * b, mul);
EXPECT_EQ(a / b, div);
}
TEST(TestDtype, TestFloat8_e4m3fn) {
c10::Float8_e4m3fn a = 1.0f;
c10::Float8_e4m3fn b = 2.0f;
c10::Float8_e4m3fn add = 3.0f;
c10::Float8_e4m3fn sub = -1.0f;
c10::Float8_e4m3fn mul = 2.0f;
c10::Float8_e4m3fn div = 0.5f;
EXPECT_EQ(a + b, add);
EXPECT_EQ(a - b, sub);
EXPECT_EQ(a * b, mul);
EXPECT_EQ(a / b, div);
}
TEST(TestDtype, TestFloat8_e4m3fuz) {
c10::Float8_e4m3fnuz a = 1.0f;
c10::Float8_e4m3fnuz b = 2.0f;
c10::Float8_e4m3fnuz add = 3.0f;
c10::Float8_e4m3fnuz sub = -1.0f;
c10::Float8_e4m3fnuz mul = 2.0f;
c10::Float8_e4m3fnuz div = 0.5f;
EXPECT_EQ(a + b, add);
EXPECT_EQ(a - b, sub);
EXPECT_EQ(a * b, mul);
EXPECT_EQ(a / b, div);
}
TEST(TestDtype, TestFloat8_e5m2) {
c10::Float8_e5m2 a = 1.0f;
c10::Float8_e5m2 b = 2.0f;
c10::Float8_e5m2 add = 3.0f;
c10::Float8_e5m2 sub = -1.0f;
c10::Float8_e5m2 mul = 2.0f;
c10::Float8_e5m2 div = 0.5f;
EXPECT_EQ(a + b, add);
EXPECT_EQ(a - b, sub);
EXPECT_EQ(a * b, mul);
EXPECT_EQ(a / b, div);
}
TEST(TestDtype, TestFloat8_e5m2fnuz) {
c10::Float8_e5m2fnuz a = 1.0f;
c10::Float8_e5m2fnuz b = 2.0f;
c10::Float8_e5m2fnuz add = 3.0f;
c10::Float8_e5m2fnuz sub = -1.0f;
c10::Float8_e5m2fnuz mul = 2.0f;
c10::Float8_e5m2fnuz div = 0.5f;
EXPECT_EQ(a + b, add);
EXPECT_EQ(a - b, sub);
EXPECT_EQ(a * b, mul);
EXPECT_EQ(a / b, div);
}
TEST(TestDtype, TestHalf) {
c10::Half a = 1.0f;
c10::Half b = 2.0f;
c10::Half add = 3.0f;
c10::Half sub = -1.0f;
c10::Half mul = 2.0f;
c10::Half div = 0.5f;
EXPECT_EQ(a + b, add);
EXPECT_EQ(a - b, sub);
EXPECT_EQ(a * b, mul);
EXPECT_EQ(a / b, div);
}
} // namespace aot_inductor
} // namespace torch

View File

@ -1,5 +1,5 @@
set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aot_inductor) set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference)
# Build custom TorchScript op for AOTInductor # Build custom TorchScript op for AOTInductor
add_library(aoti_custom_class SHARED aoti_custom_class.cpp) add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
@ -31,7 +31,7 @@ set(INDUCTOR_TEST_SRCS
${AOT_INDUCTOR_TEST_ROOT}/test.cpp ${AOT_INDUCTOR_TEST_ROOT}/test.cpp
) )
add_executable(test_aot_inductor add_executable(test_aoti_inference
${TORCH_ROOT}/test/cpp/common/main.cpp ${TORCH_ROOT}/test/cpp/common/main.cpp
${INDUCTOR_TEST_SRCS} ${INDUCTOR_TEST_SRCS}
data.pt data.pt
@ -39,10 +39,10 @@ add_executable(test_aot_inductor
script_model_cpu.pt script_model_cpu.pt
script_model_cuda.pt script_model_cuda.pt
) )
add_dependencies(test_aot_inductor aoti_custom_class aoti_script_model) add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model)
# TODO temporary until we can delete the old gtest polyfills. # TODO temporary until we can delete the old gtest polyfills.
target_compile_definitions(test_aot_inductor PRIVATE USE_GTEST) target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST)
# Define a custom command to generate the library # Define a custom command to generate the library
add_custom_command( add_custom_command(
@ -51,24 +51,24 @@ add_custom_command(
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
) )
target_link_libraries(test_aot_inductor PRIVATE target_link_libraries(test_aoti_inference PRIVATE
torch torch
gtest gtest
-Wl,--no-as-needed aoti_custom_class -Wl,--no-as-needed aoti_custom_class
) )
if(USE_CUDA) if(USE_CUDA)
target_include_directories(test_aot_inductor PRIVATE ${ATen_CUDA_INCLUDE}) target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE})
target_compile_definitions(test_aot_inductor PRIVATE USE_CUDA) target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA)
endif() endif()
target_compile_definitions(test_aot_inductor PRIVATE target_compile_definitions(test_aoti_inference PRIVATE
CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR} CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
) )
if(INSTALL_TEST) if(INSTALL_TEST)
install(TARGETS test_aot_inductor DESTINATION bin) install(TARGETS test_aoti_inference DESTINATION bin)
# Install PDB files for MSVC builds # Install PDB files for MSVC builds
if(MSVC AND BUILD_SHARED_LIBS) if(MSVC AND BUILD_SHARED_LIBS)
install(FILES $<TARGET_PDB_FILE:test_aot_inductor> DESTINATION bin OPTIONAL) install(FILES $<TARGET_PDB_FILE:test_aoti_inference> DESTINATION bin OPTIONAL)
endif() endif()
endif() endif()

View File

@ -283,7 +283,7 @@ void test_aoti_double_buffering_with_tensor_constants() {
} // namespace } // namespace
namespace torch { namespace torch {
namespace inductor { namespace aot_inductor {
TEST(AotInductorTest, BasicTestCpu) { TEST(AotInductorTest, BasicTestCpu) {
test_aoti("cpu", false); test_aoti("cpu", false);
@ -324,5 +324,5 @@ TEST(AotInductorTest, UpdateInactiveConstantsWithTensorConstantsCuda) {
} }
#endif #endif
} // namespace inductor } // namespace aot_inductor
} // namespace torch } // namespace torch

View File

@ -18,6 +18,7 @@ std::string add_negative_flag(const std::string& flag) {
int main(int argc, char* argv[]) { int main(int argc, char* argv[]) {
::testing::InitGoogleTest(&argc, argv); ::testing::InitGoogleTest(&argc, argv);
if (!torch::cuda::is_available()) { if (!torch::cuda::is_available()) {
std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests" std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests"
<< std::endl; << std::endl;

View File

@ -7,6 +7,12 @@
#include <limits> #include <limits>
#include <omp.h> #include <omp.h>
// WARNING: be extra careful when including more ATen/c10 header files here!
// Because AOTInductor generated code will copy-paste this cpp_prefix.h for
// the CPU backend, we have to make sure the used headers are implemented
// in a header-only way, i.e. all the function and class definitions are
// in .h files instead of .cpp files, to avoid ABI backward-compatiblity breakage.
#include <ATen/NumericUtils.h> #include <ATen/NumericUtils.h>
#include <ATen/core/PhiloxRNGEngine.h> #include <ATen/core/PhiloxRNGEngine.h>
#include <ATen/native/Math.h> #include <ATen/native/Math.h>