Add SVE implementation of embedding_lookup_idx (#133995)

Adds an accelerated version of the embedding_lookup_idx perfkernels. This is done via a python codegen file similarly to `caffe2/perfkernels/hp_emblookup_codegen.py`

Pull Request resolved: https://github.com/pytorch/pytorch/pull/133995
Approved by: https://github.com/malfet, https://github.com/huydhn
This commit is contained in:
Siddhartha Menon
2024-10-15 18:52:44 +00:00
committed by PyTorch MergeBot
parent b09d6f3a7d
commit e1e6417d4c
8 changed files with 7265 additions and 9 deletions

View File

@ -101,6 +101,16 @@ endif()
# Also, we will turn off deprecated-declarations
# due to protobuf.
# ---[ Check if the compiler has SVE support.
find_package(ARM) # checks SVE
if(CXX_SVE_FOUND)
message(STATUS "Compiler supports SVE extension. Will build perfkernels.")
# Also see CMakeLists.txt under caffe2/perfkernels.
add_compile_definitions(CAFFE2_PERF_WITH_SVE=1)
else()
message(STATUS "Compiler does not support SVE extension. Will not build perfkernels.")
endif()
if(IOS AND (${IOS_ARCH} MATCHES "armv7*"))
add_definitions("-mfpu=neon-fp16")
add_definitions("-arch" ${IOS_ARCH})

View File

@ -21,10 +21,10 @@ if("${ACL_VERSION_FILE}" STREQUAL "")
message(WARNING "Build may fail: Could not determine ACL version (minimum required is ${ACL_MINIMUM_VERSION})")
else()
file(READ ${ACL_VERSION_FILE} ACL_VERSION_STRING)
string(REGEX MATCH "v([0-9]+\\.[0-9]+)" ACL_VERSION ${ACL_VERSION_STRING})
string(REGEX MATCH "v([0-9]+\\.[0-9]+)" ACL_VERSION "${ACL_VERSION_STRING}")
set(ACL_VERSION "${CMAKE_MATCH_1}")
if(${ACL_VERSION} VERSION_EQUAL "0.0")
if("${ACL_VERSION}" VERSION_EQUAL "0.0")
# Unreleased ACL versions come with version string "v0.0-unreleased", and may not be compatible with oneDNN.
# It is recommended to use the latest release of ACL.
message(WARNING "Build may fail: Using unreleased ACL version (minimum required is ${ACL_MINIMUM_VERSION})")