Revert "[ROCm] Add ROCm AMDGPU support for inductor cpp codegen (#105141)"

This reverts commit 8ff00360a4daab7848307a9a0b1c81b1da873d0c.

Reverted https://github.com/pytorch/pytorch/pull/105141 on behalf of https://github.com/DanilBaibak due to Break internal build ([comment](https://github.com/pytorch/pytorch/pull/105141#issuecomment-1715629007))
This commit is contained in:
PyTorch MergeBot
2023-09-12 12:29:55 +00:00
parent 5531a23b20
commit 5a7c008b30
6 changed files with 12 additions and 74 deletions

View File

@ -41,7 +41,7 @@ except unittest.SkipTest:
RUN_CPU = HAS_CPU and not torch.backends.mps.is_available() and not IS_MACOS
RUN_CUDA = HAS_CUDA and not TEST_WITH_ASAN
RUN_CUDA = HAS_CUDA and not TEST_WITH_ASAN and not TEST_WITH_ROCM
class CppWrapperTemplate:
@ -91,37 +91,6 @@ test_failures_cuda_wrapper = {
),
}
if TEST_WITH_ROCM:
# Current skips for ROCm
rocm_exclude_list = [
"test_addmm",
"test_batch_norm_2d_2_cuda",
"test_bmm1_cuda",
"test_cat_cuda",
"test_convolution1_cuda",
"test_custom_op_cuda",
"test_foreach_cpp_wrapper",
"test_linear_relu",
"test_index_put_deterministic_fallback_cuda",
"test_index_tensor_cuda",
"test_multi_device_cuda",
"test_mm_plus_mm2",
"test_scaled_dot_product_efficient_attention_cuda",
"test_sum_dtype_cuda",
"test_transpose_cuda",
"test_index_tensor_cuda",
]
# Create skip entries for both the cuda and cuda_dynamic_shapes variants
for test_name in rocm_exclude_list:
dynamic_shapes_test_name = f"{test_name}_dynamic_shapes"
test_failures_cuda_wrapper[test_name] = test_torchinductor.TestFailure(
("cuda_wrapper",), is_skip=True
)
test_failures_cuda_wrapper[
dynamic_shapes_test_name
] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=True)
def make_test_case(name, device, tests, condition=True, slow=False, func_inputs=None):
test_name = f"{name}_{device}" if device else name
@ -342,9 +311,7 @@ if RUN_CUDA:
]:
make_test_case(item.name, item.device, item.tests)
test_torchinductor.copy_tests(
CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper", test_failures_cuda_wrapper
)
test_torchinductor.copy_tests(CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper")
DynamicShapesCudaWrapperTemplate = (
test_torchinductor_dynamic_shapes.make_dynamic_cls(CudaWrapperTemplate)

View File

@ -195,7 +195,6 @@ hipify_python.hipify(
output_directory=out_dir,
includes=includes,
ignores=ignores,
extra_files=["torch/_inductor/codegen/wrapper.py"],
out_of_place_only=args.out_of_place_only,
hip_clang_launch=is_hip_clang(),
)

View File

@ -327,10 +327,10 @@ def get_path(basename: str, extension: str, specified_dir: str = ""):
def get_hash(content: Union[str, bytes], extra: str = "", hash_type: str = "code"):
assert hash_type in ["code", "cubin", "hsaco"], "Hash type not supported"
assert hash_type in ["code", "cubin"], "Hash type not supported"
if hash_type == "code":
return code_hash(content, extra)
if hash_type == "cubin" or "hsaco":
if hash_type == "cubin":
return code_hash(repr(content))
@ -820,13 +820,10 @@ def get_include_and_linking_paths(
else:
macros = f"-D{macros}"
if cuda:
if torch.version.hip is not None:
libs += ["c10_hip", "torch_hip"]
if config.is_fbcode():
libs += ["cuda"]
else:
if config.is_fbcode():
libs += ["cuda"]
else:
libs += ["c10_cuda", "cuda", "torch_cuda"]
libs += ["c10_cuda", "cuda", "torch_cuda"]
else:
# Note - this is effectively a header only inclusion. Usage of some header files may result in
# symbol not found, if those header files require a library.
@ -962,19 +959,13 @@ class CudaKernelParamCache:
@classmethod
def set(cls, key, params, cubin):
bin_type = "cubin" if torch.version.hip is None else "hsaco"
_, path = write(
cubin,
bin_type,
hash_type=bin_type,
"cubin",
hash_type="cubin",
specified_dir=config.aot_inductor.output_path,
)
if torch.version.hip is None:
params["cubin_path"] = path
else:
params["hsaco_path"] = path
params["cubin_path"] = path
cls.cache[key] = params
@classmethod

View File

@ -1643,9 +1643,7 @@ class CudaWrapperCodeGen(CppWrapperCodeGen):
def generate_load_kernel(self, name, params):
mangled_name = params.get("mangled_name", None)
assert mangled_name is not None, "missing mangled_name"
cubin_path = params.get(
"cubin_path" if torch.version.hip is None else "hsaco_path", None
)
cubin_path = params.get("cubin_path", None)
assert os.path.exists(
cubin_path
), "cubin file should already exist at this moment"

View File

@ -344,19 +344,7 @@ class CachingAutotuner(KernelInterface):
"shared_mem": launcher.bin.shared,
"stream": stream,
}
if torch.version.hip is None:
CudaKernelParamCache.set(key, params, launcher.bin.asm["cubin"])
else:
# There is some divergence between CUDA and ROCm here.
# On ROCm's triton we only have the the path to the binary, not the binary itself.
# For ROCm we will copy the binary to the new location instead of writing to file
import pathlib
launcher.bin.asm["hsaco"] = pathlib.Path(
launcher.bin.asm["hsaco_path"]
).read_bytes()
CudaKernelParamCache.set(key, params, launcher.bin.asm["hsaco"])
CudaKernelParamCache.set(key, params, launcher.bin.asm["cubin"])
def coordinate_descent_tuning(self, launcher, *args, **kwargs):
"""

View File

@ -2235,10 +2235,6 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict(
"CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES",
("hipFuncAttributeSharedSizeBytes", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED),
),
(
"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES",
("hipFuncAttributeMaxDynamicSharedMemorySize", CONV_TYPE, API_RUNTIME),
),
(
"CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES",
("hipFuncAttributeConstSizeBytes", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED),
@ -2920,7 +2916,6 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict(
("cuEventQuery", ("hipEventQuery", CONV_EVENT, API_DRIVER)),
("cuEventRecord", ("hipEventRecord", CONV_EVENT, API_DRIVER)),
("cuEventSynchronize", ("hipEventSynchronize", CONV_EVENT, API_DRIVER)),
("cuFuncSetAttribute", ("hipFuncSetAttribute", CONV_EVENT, API_DRIVER)),
(
"cuFuncGetAttribute",
("hipFuncGetAttribute", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED),