[reland][ROCm] use hiprtc precompiled header (#55965)

Summary:
Revert "Revert D27449031 (2a7df657fe): [pytorch][PR] [ROCm] use hiprtc precompiled header".  Reland PR https://github.com/pytorch/pytorch/issues/54350.

This reverts commit 204ac21bf1457022caab197001788239720b96d6.

The original PR was reverted under suspicion that it was causing CI instability, but it was instead due to a hardware failure.

Pull Request resolved: https://github.com/pytorch/pytorch/pull/55965

Reviewed By: jbschlosser

Differential Revision: D27755907

Pulled By: malfet

fbshipit-source-id: 75bf0b9d888df3dee62f00a366b1123757e0474e
This commit is contained in:
Jeff Daily
2021-04-15 15:44:25 -07:00
committed by Facebook GitHub Bot
parent f02454f957
commit e1752ffa04
8 changed files with 49 additions and 11 deletions

View File

@ -1215,6 +1215,7 @@ if(USE_ROCM)
list(APPEND HIP_CXX_FLAGS -Wno-implicit-int-float-conversion)
list(APPEND HIP_CXX_FLAGS -DCAFFE2_USE_MIOPEN)
list(APPEND HIP_CXX_FLAGS -DTHRUST_DEVICE_SYSTEM=THRUST_DEVICE_SYSTEM_HIP)
list(APPEND HIP_CXX_FLAGS -DROCM_VERSION=${ROCM_VERSION_DEV_INT})
list(APPEND HIP_CXX_FLAGS -std=c++14)
if(CMAKE_BUILD_TYPE MATCHES Debug)

View File

@ -161,12 +161,14 @@ if(HIP_FOUND)
set(ROCM_VERSION_DEV_MINOR ${CMAKE_MATCH_2})
set(ROCM_VERSION_DEV_PATCH ${CMAKE_MATCH_3})
set(ROCM_VERSION_DEV "${ROCM_VERSION_DEV_MAJOR}.${ROCM_VERSION_DEV_MINOR}.${ROCM_VERSION_DEV_PATCH}")
math(EXPR ROCM_VERSION_DEV_INT "(${ROCM_VERSION_DEV_MAJOR}*10000) + (${ROCM_VERSION_DEV_MINOR}*100) + ${ROCM_VERSION_DEV_PATCH}")
endif()
message("\n***** ROCm version from ${ROCM_PATH}/.info/version-dev ****\n")
message("ROCM_VERSION_DEV: ${ROCM_VERSION_DEV}")
message("ROCM_VERSION_DEV_MAJOR: ${ROCM_VERSION_DEV_MAJOR}")
message("ROCM_VERSION_DEV_MINOR: ${ROCM_VERSION_DEV_MINOR}")
message("ROCM_VERSION_DEV_PATCH: ${ROCM_VERSION_DEV_PATCH}")
message("ROCM_VERSION_DEV_INT: ${ROCM_VERSION_DEV_INT}")
message("\n***** Library versions from dpkg *****\n")
execute_process(COMMAND dpkg -l COMMAND grep rocm-dev COMMAND awk "{print $2 \" VERSION: \" $3}")

View File

@ -28,8 +28,10 @@ std::string FusionExecutor::getStructuredCode(const std::string& kernel) {
// generating cuda code;
std::string code = "";
#ifdef __HIP_PLATFORM_HCC__
#if ROCM_VERSION < 40200
code += std::string("#include <hip/hip_runtime.h>\n") +
std::string("#include <hip/hip_fp16.h>\n");
#endif
#endif
code += std::string("namespace ") + FusionExecutor::kernelNamespace() +
" {\n" + executor_utils::kernelPreamble() + kernel + "}\n";

View File

@ -281,6 +281,9 @@ NvrtcFunction nvrtcCompile(
#ifdef __HIP_PLATFORM_HCC__
std::vector<const char*> args = {"--std=c++14"};
#if ROCM_VERSION >= 40200
args.push_back("-hip-pch");
#endif
#else
const std::string compute = std::string("--gpu-architecture=") +
#if CUDA_VERSION >= 11010

View File

@ -659,6 +659,24 @@ std::string generateKernel(
env.s("RandInit", "");
}
// HIP headers must be included until precompiled header feature is available
// clang-format off
#ifdef __HIP_PLATFORM_HCC__
#if ROCM_VERSION < 40200
if (use_cuda && has_half_tensor) {
env.s("RuntimeHeader", R"(
#include <hip/hip_runtime.h>
#include <hip/hip_fp16.h>
)");
} else if (use_cuda) {
env.s("RuntimeHeader", R"(
#include <hip/hip_runtime.h>
)");
}
#endif
#endif
// clang-format on
// Instantiates the CUDA or CPU-specific templates
env.s("tensorOffsets", tensorOffsets.str());
env.s("tensorChecks", tensorChecks.str());

View File

@ -121,7 +121,10 @@ FusedKernelCUDA::FusedKernelCUDA(
&program, code_.c_str(), nullptr, 0, nullptr, nullptr));
#ifdef __HIP_PLATFORM_HCC__
std::vector<const char*> args = {};
std::vector<const char*> args = {"--std=c++14"};
#if ROCM_VERSION >= 40200
args.push_back("-hip-pch");
#endif
#else
const std::string compute = std::string("--gpu-architecture=") +
#if CUDA_VERSION >= 11010

View File

@ -15,7 +15,7 @@ cases*/
#ifdef __HIP_PLATFORM_HCC__
static auto type_declarations_template = CodeTemplate(R"(
#include <hip/hip_runtime.h>
${RuntimeHeader}
${HalfHeader}
${RandHeader}
@ -213,8 +213,6 @@ void ${kernelName}(IndexType totalElements, ${formals} ${RandParam}) {
#ifdef __HIP_PLATFORM_HCC__
constexpr auto half_support_literal =
R"(
#include <hip/hip_fp16.h>
typedef __half half;
)";
#else

View File

@ -887,7 +887,6 @@ static std::ostream& operator<<(
#ifdef USE_ROCM
static const char* device_resource_string = R"(
#include <hip/hip_runtime.h>
#define POS_INFINITY INFINITY
#define NEG_INFINITY -INFINITY
@ -930,17 +929,26 @@ void CudaCodeGen::Initialize() {
metavar_rewriter_ =
std::make_unique<GPUMetaVarRewriter>(cuda_analysis_.get());
// Check whether the statement uses the Half type, if so add the
// half_support_literal.
Stmt* stmt_v = stmt();
HalfChecker halfChecker(buffer_args());
stmt_v->accept(&halfChecker);
#if __HIP_PLATFORM_HCC__
#if ROCM_VERSION < 40200
os() << "#include <hip/hip_runtime.h>" << std::endl;
if (halfChecker.hasHalf()) {
os() << "#include <hip/hip_fp16.h>" << std::endl;
}
#endif
#endif
os() << device_resource_string << shared_resource_string;
if (has_random_) {
os() << philox_random_string << std::endl;
}
// Check whether the statement uses the Half type, if so add the
// half_support_literal.
Stmt* stmt_v = stmt();
HalfChecker halfChecker(buffer_args());
stmt_v->accept(&halfChecker);
if (halfChecker.hasHalf()) {
os() << fuser::cuda::half_support_literal << std::endl;
}
@ -1203,7 +1211,10 @@ void CudaCodeGen::CompileToNVRTC(
&program, code.c_str(), nullptr, 0, nullptr, nullptr));
#ifdef __HIP_PLATFORM_HCC__
std::vector<const char*> args = {};
std::vector<const char*> args = {"--std=c++14"};
#if ROCM_VERSION >= 40200
args.push_back("-hip-pch");
#endif
#else
const std::string compute = std::string("--gpu-architecture=") +
#if CUDA_VERSION >= 11010