mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[reland][ROCm] use hiprtc precompiled header (#55965)
Summary:
Revert "Revert D27449031 (2a7df657fe
): [pytorch][PR] [ROCm] use hiprtc precompiled header". Reland PR https://github.com/pytorch/pytorch/issues/54350.
This reverts commit 204ac21bf1457022caab197001788239720b96d6.
The original PR was reverted under suspicion that it was causing CI instability, but it was instead due to a hardware failure.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/55965
Reviewed By: jbschlosser
Differential Revision: D27755907
Pulled By: malfet
fbshipit-source-id: 75bf0b9d888df3dee62f00a366b1123757e0474e
This commit is contained in:
committed by
Facebook GitHub Bot
parent
f02454f957
commit
e1752ffa04
@ -1215,6 +1215,7 @@ if(USE_ROCM)
|
||||
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
|
||||
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
|
||||
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
|
||||
list(APPEND HIP_CXX_FLAGS -DROCM_VERSION=${ROCM_VERSION_DEV_INT})
|
||||
list(APPEND HIP_CXX_FLAGS -std=c++14)
|
||||
|
||||
if(CMAKE_BUILD_TYPE MATCHES Debug)
|
||||
|
@ -161,12 +161,14 @@ if(HIP_FOUND)
|
||||
set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2})
|
||||
set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3})
|
||||
set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}")
|
||||
math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}")
|
||||
endif()
|
||||
message("\n***** ROCm version from ${ROCM_PATH}/.info/version-dev ****\n")
|
||||
message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
|
||||
message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}")
|
||||
message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}")
|
||||
message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}")
|
||||
message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}")
|
||||
|
||||
message("\n***** Library versions from dpkg *****\n")
|
||||
execute_process(COMMAND dpkg -l COMMAND grep rocm-dev COMMAND awk "{print $2 \" VERSION: \" $3}")
|
||||
|
@ -28,8 +28,10 @@ std::string FusionExecutor::getStructuredCode(const std::string& kernel) {
|
||||
// generating cuda code;
|
||||
std::string code = "";
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#if ROCM_VERSION < 40200
|
||||
code += std::string("#include <hip/hip_runtime.h>\n") +
|
||||
std::string("#include <hip/hip_fp16.h>\n");
|
||||
#endif
|
||||
#endif
|
||||
code += std::string("namespace ") + FusionExecutor::kernelNamespace() +
|
||||
" {\n" + executor_utils::kernelPreamble() + kernel + "}\n";
|
||||
|
@ -281,6 +281,9 @@ NvrtcFunction nvrtcCompile(
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
std::vector<const char*> args = {"--std=c++14"};
|
||||
#if ROCM_VERSION >= 40200
|
||||
args.push_back("-hip-pch");
|
||||
#endif
|
||||
#else
|
||||
const std::string compute = std::string("--gpu-architecture=") +
|
||||
#if CUDA_VERSION >= 11010
|
||||
|
@ -659,6 +659,24 @@ std::string generateKernel(
|
||||
env.s("RandInit", "");
|
||||
}
|
||||
|
||||
// HIP headers must be included until precompiled header feature is available
|
||||
// clang-format off
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
#if ROCM_VERSION < 40200
|
||||
if (use_cuda && has_half_tensor) {
|
||||
env.s("RuntimeHeader", R"(
|
||||
#include <hip/hip_runtime.h>
|
||||
#include <hip/hip_fp16.h>
|
||||
)");
|
||||
} else if (use_cuda) {
|
||||
env.s("RuntimeHeader", R"(
|
||||
#include <hip/hip_runtime.h>
|
||||
)");
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
// clang-format on
|
||||
|
||||
// Instantiates the CUDA or CPU-specific templates
|
||||
env.s("tensorOffsets", tensorOffsets.str());
|
||||
env.s("tensorChecks", tensorChecks.str());
|
||||
|
@ -121,7 +121,10 @@ FusedKernelCUDA::FusedKernelCUDA(
|
||||
&program, code_.c_str(), nullptr, 0, nullptr, nullptr));
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
std::vector<const char*> args = {};
|
||||
std::vector<const char*> args = {"--std=c++14"};
|
||||
#if ROCM_VERSION >= 40200
|
||||
args.push_back("-hip-pch");
|
||||
#endif
|
||||
#else
|
||||
const std::string compute = std::string("--gpu-architecture=") +
|
||||
#if CUDA_VERSION >= 11010
|
||||
|
@ -15,7 +15,7 @@ cases*/
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
static auto type_declarations_template = CodeTemplate(R"(
|
||||
#include <hip/hip_runtime.h>
|
||||
${RuntimeHeader}
|
||||
${HalfHeader}
|
||||
${RandHeader}
|
||||
|
||||
@ -213,8 +213,6 @@ void ${kernelName}(IndexType totalElements, ${formals} ${RandParam}) {
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
constexpr auto half_support_literal =
|
||||
R"(
|
||||
#include <hip/hip_fp16.h>
|
||||
|
||||
typedef __half half;
|
||||
)";
|
||||
#else
|
||||
|
@ -887,7 +887,6 @@ static std::ostream& operator<<(
|
||||
|
||||
#ifdef USE_ROCM
|
||||
static const char* device_resource_string = R"(
|
||||
#include <hip/hip_runtime.h>
|
||||
#define POS_INFINITY INFINITY
|
||||
#define NEG_INFINITY -INFINITY
|
||||
|
||||
@ -930,17 +929,26 @@ void CudaCodeGen::Initialize() {
|
||||
metavar_rewriter_ =
|
||||
std::make_unique<GPUMetaVarRewriter>(cuda_analysis_.get());
|
||||
|
||||
// Check whether the statement uses the Half type, if so add the
|
||||
// half_support_literal.
|
||||
Stmt* stmt_v = stmt();
|
||||
HalfChecker halfChecker(buffer_args());
|
||||
stmt_v->accept(&halfChecker);
|
||||
|
||||
#if __HIP_PLATFORM_HCC__
|
||||
#if ROCM_VERSION < 40200
|
||||
os() << "#include <hip/hip_runtime.h>" << std::endl;
|
||||
if (halfChecker.hasHalf()) {
|
||||
os() << "#include <hip/hip_fp16.h>" << std::endl;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
os() << device_resource_string << shared_resource_string;
|
||||
|
||||
if (has_random_) {
|
||||
os() << philox_random_string << std::endl;
|
||||
}
|
||||
|
||||
// Check whether the statement uses the Half type, if so add the
|
||||
// half_support_literal.
|
||||
Stmt* stmt_v = stmt();
|
||||
HalfChecker halfChecker(buffer_args());
|
||||
stmt_v->accept(&halfChecker);
|
||||
if (halfChecker.hasHalf()) {
|
||||
os() << fuser::cuda::half_support_literal << std::endl;
|
||||
}
|
||||
@ -1203,7 +1211,10 @@ void CudaCodeGen::CompileToNVRTC(
|
||||
&program, code.c_str(), nullptr, 0, nullptr, nullptr));
|
||||
|
||||
#ifdef __HIP_PLATFORM_HCC__
|
||||
std::vector<const char*> args = {};
|
||||
std::vector<const char*> args = {"--std=c++14"};
|
||||
#if ROCM_VERSION >= 40200
|
||||
args.push_back("-hip-pch");
|
||||
#endif
|
||||
#else
|
||||
const std::string compute = std::string("--gpu-architecture=") +
|
||||
#if CUDA_VERSION >= 11010
|
||||
|
Reference in New Issue
Block a user