mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[AOTI] Add ABI-compatiblity tests (#123848)
Summary: In AOTInductor generated CPU model code, there can be direct references to some aten/c10 utility functions and data structures, e.g. at::vec and c10::Half. These are performance critical and thus it doesn't make sense to create C shim for them. Instead, we make sure they are implemented in a header-only way, and use this set of tests to guard future changes. There are more header files to be updated, but we will do it in other followup PRs. Pull Request resolved: https://github.com/pytorch/pytorch/pull/123848 Approved by: https://github.com/jansel ghstack dependencies: #123847
This commit is contained in:
committed by
PyTorch MergeBot
parent
cbefaf2a37
commit
4946638f06
@ -334,7 +334,7 @@ test_inductor() {
|
|||||||
# TODO: need a faster way to build
|
# TODO: need a faster way to build
|
||||||
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
|
if [[ "$BUILD_ENVIRONMENT" != *rocm* ]]; then
|
||||||
BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
|
BUILD_AOT_INDUCTOR_TEST=1 python setup.py develop
|
||||||
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aot_inductor
|
CPP_TESTS_DIR="${BUILD_BIN_DIR}" LD_LIBRARY_PATH="${TORCH_LIB_DIR}" python test/run_test.py --cpp --verbose -i cpp/test_aoti_abi_check cpp/test_aoti_inference
|
||||||
fi
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
|
3
.github/labeler.yml
vendored
3
.github/labeler.yml
vendored
@ -36,7 +36,8 @@
|
|||||||
- torch/distributed/_tensor/**
|
- torch/distributed/_tensor/**
|
||||||
- torch/distributed/fsdp/**
|
- torch/distributed/fsdp/**
|
||||||
- torch/csrc/inductor/**
|
- torch/csrc/inductor/**
|
||||||
- test/cpp/aot_inductor/**
|
- test/cpp/aoti_abi_check/**
|
||||||
|
- test/cpp/aoti_inference/**
|
||||||
|
|
||||||
"module: cpu":
|
"module: cpu":
|
||||||
- aten/src/ATen/cpu/**
|
- aten/src/ATen/cpu/**
|
||||||
|
@ -1336,8 +1336,11 @@ if(BUILD_TEST)
|
|||||||
endif()
|
endif()
|
||||||
if(BUILD_AOT_INDUCTOR_TEST)
|
if(BUILD_AOT_INDUCTOR_TEST)
|
||||||
add_subdirectory(
|
add_subdirectory(
|
||||||
${TORCH_ROOT}/test/cpp/aot_inductor
|
${TORCH_ROOT}/test/cpp/aoti_abi_check
|
||||||
${CMAKE_BINARY_DIR}/test_aot_inductor)
|
${CMAKE_BINARY_DIR}/test_aoti_abi_check)
|
||||||
|
add_subdirectory(
|
||||||
|
${TORCH_ROOT}/test/cpp/aoti_inference
|
||||||
|
${CMAKE_BINARY_DIR}/test_aoti_inference)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
|
27
test/cpp/aoti_abi_check/CMakeLists.txt
Normal file
27
test/cpp/aoti_abi_check/CMakeLists.txt
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
set(AOTI_ABI_CHECK_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_abi_check)
|
||||||
|
|
||||||
|
# Build the cpp gtest binary containing the cpp-only tests.
|
||||||
|
set(AOTI_ABI_CHECK_TEST_SRCS
|
||||||
|
${AOTI_ABI_CHECK_TEST_ROOT}/main.cpp
|
||||||
|
${AOTI_ABI_CHECK_TEST_ROOT}/test_dtype.cpp
|
||||||
|
)
|
||||||
|
|
||||||
|
add_executable(test_aoti_abi_check
|
||||||
|
${AOTI_ABI_CHECK_TEST_SRCS}
|
||||||
|
)
|
||||||
|
|
||||||
|
# TODO temporary until we can delete the old gtest polyfills.
|
||||||
|
target_compile_definitions(test_aoti_abi_check PRIVATE USE_GTEST)
|
||||||
|
|
||||||
|
# WARNING: DO NOT LINK torch!!!
|
||||||
|
# The purpose is to check if the used aten/c10 headers are writtern in a header-only way
|
||||||
|
target_link_libraries(test_aoti_abi_check PRIVATE gtest)
|
||||||
|
target_include_directories(test_aoti_abi_check PRIVATE ${ATen_CPU_INCLUDE})
|
||||||
|
|
||||||
|
if(INSTALL_TEST)
|
||||||
|
install(TARGETS test_aoti_abi_check DESTINATION bin)
|
||||||
|
# Install PDB files for MSVC builds
|
||||||
|
if(MSVC AND BUILD_SHARED_LIBS)
|
||||||
|
install(FILES $<TARGET_PDB_FILE:test_aoti_abi_check> DESTINATION bin OPTIONAL)
|
||||||
|
endif()
|
||||||
|
endif()
|
1
test/cpp/aoti_abi_check/README.md
Normal file
1
test/cpp/aoti_abi_check/README.md
Normal file
@ -0,0 +1 @@
|
|||||||
|
Tests in this directory are meant to guard certain ATen/c10 util functions and data structures are implemented in a header-only fashion, to make sure AOTInductor generated CPU model code is ABI backward-compatible.
|
6
test/cpp/aoti_abi_check/main.cpp
Normal file
6
test/cpp/aoti_abi_check/main.cpp
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
int main(int argc, char* argv[]) {
|
||||||
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
return RUN_ALL_TESTS();
|
||||||
|
}
|
99
test/cpp/aoti_abi_check/test_dtype.cpp
Normal file
99
test/cpp/aoti_abi_check/test_dtype.cpp
Normal file
@ -0,0 +1,99 @@
|
|||||||
|
#include <gtest/gtest.h>
|
||||||
|
|
||||||
|
#include <c10/util/BFloat16-math.h>
|
||||||
|
#include <c10/util/BFloat16.h>
|
||||||
|
#include <c10/util/Float8_e4m3fn.h>
|
||||||
|
#include <c10/util/Float8_e4m3fnuz.h>
|
||||||
|
#include <c10/util/Float8_e5m2.h>
|
||||||
|
#include <c10/util/Float8_e5m2fnuz.h>
|
||||||
|
#include <c10/util/Half.h>
|
||||||
|
|
||||||
|
namespace torch {
|
||||||
|
namespace aot_inductor {
|
||||||
|
|
||||||
|
TEST(TestDtype, TestBFloat16) {
|
||||||
|
c10::BFloat16 a = 1.0f;
|
||||||
|
c10::BFloat16 b = 2.0f;
|
||||||
|
c10::BFloat16 add = 3.0f;
|
||||||
|
c10::BFloat16 sub = -1.0f;
|
||||||
|
c10::BFloat16 mul = 2.0f;
|
||||||
|
c10::BFloat16 div = 0.5f;
|
||||||
|
|
||||||
|
EXPECT_EQ(a + b, add);
|
||||||
|
EXPECT_EQ(a - b, sub);
|
||||||
|
EXPECT_EQ(a * b, mul);
|
||||||
|
EXPECT_EQ(a / b, div);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestDtype, TestFloat8_e4m3fn) {
|
||||||
|
c10::Float8_e4m3fn a = 1.0f;
|
||||||
|
c10::Float8_e4m3fn b = 2.0f;
|
||||||
|
c10::Float8_e4m3fn add = 3.0f;
|
||||||
|
c10::Float8_e4m3fn sub = -1.0f;
|
||||||
|
c10::Float8_e4m3fn mul = 2.0f;
|
||||||
|
c10::Float8_e4m3fn div = 0.5f;
|
||||||
|
|
||||||
|
EXPECT_EQ(a + b, add);
|
||||||
|
EXPECT_EQ(a - b, sub);
|
||||||
|
EXPECT_EQ(a * b, mul);
|
||||||
|
EXPECT_EQ(a / b, div);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestDtype, TestFloat8_e4m3fuz) {
|
||||||
|
c10::Float8_e4m3fnuz a = 1.0f;
|
||||||
|
c10::Float8_e4m3fnuz b = 2.0f;
|
||||||
|
c10::Float8_e4m3fnuz add = 3.0f;
|
||||||
|
c10::Float8_e4m3fnuz sub = -1.0f;
|
||||||
|
c10::Float8_e4m3fnuz mul = 2.0f;
|
||||||
|
c10::Float8_e4m3fnuz div = 0.5f;
|
||||||
|
|
||||||
|
EXPECT_EQ(a + b, add);
|
||||||
|
EXPECT_EQ(a - b, sub);
|
||||||
|
EXPECT_EQ(a * b, mul);
|
||||||
|
EXPECT_EQ(a / b, div);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestDtype, TestFloat8_e5m2) {
|
||||||
|
c10::Float8_e5m2 a = 1.0f;
|
||||||
|
c10::Float8_e5m2 b = 2.0f;
|
||||||
|
c10::Float8_e5m2 add = 3.0f;
|
||||||
|
c10::Float8_e5m2 sub = -1.0f;
|
||||||
|
c10::Float8_e5m2 mul = 2.0f;
|
||||||
|
c10::Float8_e5m2 div = 0.5f;
|
||||||
|
|
||||||
|
EXPECT_EQ(a + b, add);
|
||||||
|
EXPECT_EQ(a - b, sub);
|
||||||
|
EXPECT_EQ(a * b, mul);
|
||||||
|
EXPECT_EQ(a / b, div);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestDtype, TestFloat8_e5m2fnuz) {
|
||||||
|
c10::Float8_e5m2fnuz a = 1.0f;
|
||||||
|
c10::Float8_e5m2fnuz b = 2.0f;
|
||||||
|
c10::Float8_e5m2fnuz add = 3.0f;
|
||||||
|
c10::Float8_e5m2fnuz sub = -1.0f;
|
||||||
|
c10::Float8_e5m2fnuz mul = 2.0f;
|
||||||
|
c10::Float8_e5m2fnuz div = 0.5f;
|
||||||
|
|
||||||
|
EXPECT_EQ(a + b, add);
|
||||||
|
EXPECT_EQ(a - b, sub);
|
||||||
|
EXPECT_EQ(a * b, mul);
|
||||||
|
EXPECT_EQ(a / b, div);
|
||||||
|
}
|
||||||
|
|
||||||
|
TEST(TestDtype, TestHalf) {
|
||||||
|
c10::Half a = 1.0f;
|
||||||
|
c10::Half b = 2.0f;
|
||||||
|
c10::Half add = 3.0f;
|
||||||
|
c10::Half sub = -1.0f;
|
||||||
|
c10::Half mul = 2.0f;
|
||||||
|
c10::Half div = 0.5f;
|
||||||
|
|
||||||
|
EXPECT_EQ(a + b, add);
|
||||||
|
EXPECT_EQ(a - b, sub);
|
||||||
|
EXPECT_EQ(a * b, mul);
|
||||||
|
EXPECT_EQ(a / b, div);
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace aot_inductor
|
||||||
|
} // namespace torch
|
@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aot_inductor)
|
set(AOT_INDUCTOR_TEST_ROOT ${TORCH_ROOT}/test/cpp/aoti_inference)
|
||||||
|
|
||||||
# Build custom TorchScript op for AOTInductor
|
# Build custom TorchScript op for AOTInductor
|
||||||
add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
|
add_library(aoti_custom_class SHARED aoti_custom_class.cpp)
|
||||||
@ -31,7 +31,7 @@ set(INDUCTOR_TEST_SRCS
|
|||||||
${AOT_INDUCTOR_TEST_ROOT}/test.cpp
|
${AOT_INDUCTOR_TEST_ROOT}/test.cpp
|
||||||
)
|
)
|
||||||
|
|
||||||
add_executable(test_aot_inductor
|
add_executable(test_aoti_inference
|
||||||
${TORCH_ROOT}/test/cpp/common/main.cpp
|
${TORCH_ROOT}/test/cpp/common/main.cpp
|
||||||
${INDUCTOR_TEST_SRCS}
|
${INDUCTOR_TEST_SRCS}
|
||||||
data.pt
|
data.pt
|
||||||
@ -39,10 +39,10 @@ add_executable(test_aot_inductor
|
|||||||
script_model_cpu.pt
|
script_model_cpu.pt
|
||||||
script_model_cuda.pt
|
script_model_cuda.pt
|
||||||
)
|
)
|
||||||
add_dependencies(test_aot_inductor aoti_custom_class aoti_script_model)
|
add_dependencies(test_aoti_inference aoti_custom_class aoti_script_model)
|
||||||
|
|
||||||
# TODO temporary until we can delete the old gtest polyfills.
|
# TODO temporary until we can delete the old gtest polyfills.
|
||||||
target_compile_definitions(test_aot_inductor PRIVATE USE_GTEST)
|
target_compile_definitions(test_aoti_inference PRIVATE USE_GTEST)
|
||||||
|
|
||||||
# Define a custom command to generate the library
|
# Define a custom command to generate the library
|
||||||
add_custom_command(
|
add_custom_command(
|
||||||
@ -51,24 +51,24 @@ add_custom_command(
|
|||||||
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
|
DEPENDS ${AOT_INDUCTOR_TEST_ROOT}/test.py
|
||||||
)
|
)
|
||||||
|
|
||||||
target_link_libraries(test_aot_inductor PRIVATE
|
target_link_libraries(test_aoti_inference PRIVATE
|
||||||
torch
|
torch
|
||||||
gtest
|
gtest
|
||||||
-Wl,--no-as-needed aoti_custom_class
|
-Wl,--no-as-needed aoti_custom_class
|
||||||
)
|
)
|
||||||
|
|
||||||
if(USE_CUDA)
|
if(USE_CUDA)
|
||||||
target_include_directories(test_aot_inductor PRIVATE ${ATen_CUDA_INCLUDE})
|
target_include_directories(test_aoti_inference PRIVATE ${ATen_CUDA_INCLUDE})
|
||||||
target_compile_definitions(test_aot_inductor PRIVATE USE_CUDA)
|
target_compile_definitions(test_aoti_inference PRIVATE USE_CUDA)
|
||||||
endif()
|
endif()
|
||||||
target_compile_definitions(test_aot_inductor PRIVATE
|
target_compile_definitions(test_aoti_inference PRIVATE
|
||||||
CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
|
CMAKE_CURRENT_BINARY_DIR=${CMAKE_CURRENT_BINARY_DIR}
|
||||||
)
|
)
|
||||||
|
|
||||||
if(INSTALL_TEST)
|
if(INSTALL_TEST)
|
||||||
install(TARGETS test_aot_inductor DESTINATION bin)
|
install(TARGETS test_aoti_inference DESTINATION bin)
|
||||||
# Install PDB files for MSVC builds
|
# Install PDB files for MSVC builds
|
||||||
if(MSVC AND BUILD_SHARED_LIBS)
|
if(MSVC AND BUILD_SHARED_LIBS)
|
||||||
install(FILES $<TARGET_PDB_FILE:test_aot_inductor> DESTINATION bin OPTIONAL)
|
install(FILES $<TARGET_PDB_FILE:test_aoti_inference> DESTINATION bin OPTIONAL)
|
||||||
endif()
|
endif()
|
||||||
endif()
|
endif()
|
@ -283,7 +283,7 @@ void test_aoti_double_buffering_with_tensor_constants() {
|
|||||||
} // namespace
|
} // namespace
|
||||||
|
|
||||||
namespace torch {
|
namespace torch {
|
||||||
namespace inductor {
|
namespace aot_inductor {
|
||||||
|
|
||||||
TEST(AotInductorTest, BasicTestCpu) {
|
TEST(AotInductorTest, BasicTestCpu) {
|
||||||
test_aoti("cpu", false);
|
test_aoti("cpu", false);
|
||||||
@ -324,5 +324,5 @@ TEST(AotInductorTest, UpdateInactiveConstantsWithTensorConstantsCuda) {
|
|||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
} // namespace inductor
|
} // namespace aot_inductor
|
||||||
} // namespace torch
|
} // namespace torch
|
@ -18,6 +18,7 @@ std::string add_negative_flag(const std::string& flag) {
|
|||||||
|
|
||||||
int main(int argc, char* argv[]) {
|
int main(int argc, char* argv[]) {
|
||||||
::testing::InitGoogleTest(&argc, argv);
|
::testing::InitGoogleTest(&argc, argv);
|
||||||
|
|
||||||
if (!torch::cuda::is_available()) {
|
if (!torch::cuda::is_available()) {
|
||||||
std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests"
|
std::cout << "CUDA not available. Disabling CUDA and MultiCUDA tests"
|
||||||
<< std::endl;
|
<< std::endl;
|
||||||
|
@ -7,6 +7,12 @@
|
|||||||
#include <limits>
|
#include <limits>
|
||||||
#include <omp.h>
|
#include <omp.h>
|
||||||
|
|
||||||
|
// WARNING: be extra careful when including more ATen/c10 header files here!
|
||||||
|
// Because AOTInductor generated code will copy-paste this cpp_prefix.h for
|
||||||
|
// the CPU backend, we have to make sure the used headers are implemented
|
||||||
|
// in a header-only way, i.e. all the function and class definitions are
|
||||||
|
// in .h files instead of .cpp files, to avoid ABI backward-compatiblity breakage.
|
||||||
|
|
||||||
#include <ATen/NumericUtils.h>
|
#include <ATen/NumericUtils.h>
|
||||||
#include <ATen/core/PhiloxRNGEngine.h>
|
#include <ATen/core/PhiloxRNGEngine.h>
|
||||||
#include <ATen/native/Math.h>
|
#include <ATen/native/Math.h>
|
||||||
|
Reference in New Issue
Block a user