mirror of
https://github.com/pytorch/pytorch.git
synced 2025-11-11 22:34:53 +08:00
Revert "[ROCm] Add ROCm AMDGPU support for inductor cpp codegen (#105141)"
This reverts commit 8ff00360a4daab7848307a9a0b1c81b1da873d0c. Reverted https://github.com/pytorch/pytorch/pull/105141 on behalf of https://github.com/DanilBaibak due to Break internal build ([comment](https://github.com/pytorch/pytorch/pull/105141#issuecomment-1715629007))
This commit is contained in:
@ -41,7 +41,7 @@ except unittest.SkipTest:
|
||||
|
||||
|
||||
RUN_CPU = HAS_CPU and not torch.backends.mps.is_available() and not IS_MACOS
|
||||
RUN_CUDA = HAS_CUDA and not TEST_WITH_ASAN
|
||||
RUN_CUDA = HAS_CUDA and not TEST_WITH_ASAN and not TEST_WITH_ROCM
|
||||
|
||||
|
||||
class CppWrapperTemplate:
|
||||
@ -91,37 +91,6 @@ test_failures_cuda_wrapper = {
|
||||
),
|
||||
}
|
||||
|
||||
if TEST_WITH_ROCM:
|
||||
# Current skips for ROCm
|
||||
rocm_exclude_list = [
|
||||
"test_addmm",
|
||||
"test_batch_norm_2d_2_cuda",
|
||||
"test_bmm1_cuda",
|
||||
"test_cat_cuda",
|
||||
"test_convolution1_cuda",
|
||||
"test_custom_op_cuda",
|
||||
"test_foreach_cpp_wrapper",
|
||||
"test_linear_relu",
|
||||
"test_index_put_deterministic_fallback_cuda",
|
||||
"test_index_tensor_cuda",
|
||||
"test_multi_device_cuda",
|
||||
"test_mm_plus_mm2",
|
||||
"test_scaled_dot_product_efficient_attention_cuda",
|
||||
"test_sum_dtype_cuda",
|
||||
"test_transpose_cuda",
|
||||
"test_index_tensor_cuda",
|
||||
]
|
||||
|
||||
# Create skip entries for both the cuda and cuda_dynamic_shapes variants
|
||||
for test_name in rocm_exclude_list:
|
||||
dynamic_shapes_test_name = f"{test_name}_dynamic_shapes"
|
||||
test_failures_cuda_wrapper[test_name] = test_torchinductor.TestFailure(
|
||||
("cuda_wrapper",), is_skip=True
|
||||
)
|
||||
test_failures_cuda_wrapper[
|
||||
dynamic_shapes_test_name
|
||||
] = test_torchinductor.TestFailure(("cuda_wrapper",), is_skip=True)
|
||||
|
||||
|
||||
def make_test_case(name, device, tests, condition=True, slow=False, func_inputs=None):
|
||||
test_name = f"{name}_{device}" if device else name
|
||||
@ -342,9 +311,7 @@ if RUN_CUDA:
|
||||
]:
|
||||
make_test_case(item.name, item.device, item.tests)
|
||||
|
||||
test_torchinductor.copy_tests(
|
||||
CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper", test_failures_cuda_wrapper
|
||||
)
|
||||
test_torchinductor.copy_tests(CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper")
|
||||
|
||||
DynamicShapesCudaWrapperTemplate = (
|
||||
test_torchinductor_dynamic_shapes.make_dynamic_cls(CudaWrapperTemplate)
|
||||
|
||||
@ -195,7 +195,6 @@ hipify_python.hipify(
|
||||
output_directory=out_dir,
|
||||
includes=includes,
|
||||
ignores=ignores,
|
||||
extra_files=["torch/_inductor/codegen/wrapper.py"],
|
||||
out_of_place_only=args.out_of_place_only,
|
||||
hip_clang_launch=is_hip_clang(),
|
||||
)
|
||||
|
||||
@ -327,10 +327,10 @@ def get_path(basename: str, extension: str, specified_dir: str = ""):
|
||||
|
||||
|
||||
def get_hash(content: Union[str, bytes], extra: str = "", hash_type: str = "code"):
|
||||
assert hash_type in ["code", "cubin", "hsaco"], "Hash type not supported"
|
||||
assert hash_type in ["code", "cubin"], "Hash type not supported"
|
||||
if hash_type == "code":
|
||||
return code_hash(content, extra)
|
||||
if hash_type == "cubin" or "hsaco":
|
||||
if hash_type == "cubin":
|
||||
return code_hash(repr(content))
|
||||
|
||||
|
||||
@ -820,13 +820,10 @@ def get_include_and_linking_paths(
|
||||
else:
|
||||
macros = f"-D{macros}"
|
||||
if cuda:
|
||||
if torch.version.hip is not None:
|
||||
libs += ["c10_hip", "torch_hip"]
|
||||
if config.is_fbcode():
|
||||
libs += ["cuda"]
|
||||
else:
|
||||
if config.is_fbcode():
|
||||
libs += ["cuda"]
|
||||
else:
|
||||
libs += ["c10_cuda", "cuda", "torch_cuda"]
|
||||
libs += ["c10_cuda", "cuda", "torch_cuda"]
|
||||
else:
|
||||
# Note - this is effectively a header only inclusion. Usage of some header files may result in
|
||||
# symbol not found, if those header files require a library.
|
||||
@ -962,19 +959,13 @@ class CudaKernelParamCache:
|
||||
|
||||
@classmethod
|
||||
def set(cls, key, params, cubin):
|
||||
bin_type = "cubin" if torch.version.hip is None else "hsaco"
|
||||
_, path = write(
|
||||
cubin,
|
||||
bin_type,
|
||||
hash_type=bin_type,
|
||||
"cubin",
|
||||
hash_type="cubin",
|
||||
specified_dir=config.aot_inductor.output_path,
|
||||
)
|
||||
|
||||
if torch.version.hip is None:
|
||||
params["cubin_path"] = path
|
||||
else:
|
||||
params["hsaco_path"] = path
|
||||
|
||||
params["cubin_path"] = path
|
||||
cls.cache[key] = params
|
||||
|
||||
@classmethod
|
||||
|
||||
@ -1643,9 +1643,7 @@ class CudaWrapperCodeGen(CppWrapperCodeGen):
|
||||
def generate_load_kernel(self, name, params):
|
||||
mangled_name = params.get("mangled_name", None)
|
||||
assert mangled_name is not None, "missing mangled_name"
|
||||
cubin_path = params.get(
|
||||
"cubin_path" if torch.version.hip is None else "hsaco_path", None
|
||||
)
|
||||
cubin_path = params.get("cubin_path", None)
|
||||
assert os.path.exists(
|
||||
cubin_path
|
||||
), "cubin file should already exist at this moment"
|
||||
|
||||
@ -344,19 +344,7 @@ class CachingAutotuner(KernelInterface):
|
||||
"shared_mem": launcher.bin.shared,
|
||||
"stream": stream,
|
||||
}
|
||||
|
||||
if torch.version.hip is None:
|
||||
CudaKernelParamCache.set(key, params, launcher.bin.asm["cubin"])
|
||||
else:
|
||||
# There is some divergence between CUDA and ROCm here.
|
||||
# On ROCm's triton we only have the the path to the binary, not the binary itself.
|
||||
# For ROCm we will copy the binary to the new location instead of writing to file
|
||||
import pathlib
|
||||
|
||||
launcher.bin.asm["hsaco"] = pathlib.Path(
|
||||
launcher.bin.asm["hsaco_path"]
|
||||
).read_bytes()
|
||||
CudaKernelParamCache.set(key, params, launcher.bin.asm["hsaco"])
|
||||
CudaKernelParamCache.set(key, params, launcher.bin.asm["cubin"])
|
||||
|
||||
def coordinate_descent_tuning(self, launcher, *args, **kwargs):
|
||||
"""
|
||||
|
||||
@ -2235,10 +2235,6 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict(
|
||||
"CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES",
|
||||
("hipFuncAttributeSharedSizeBytes", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED),
|
||||
),
|
||||
(
|
||||
"CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES",
|
||||
("hipFuncAttributeMaxDynamicSharedMemorySize", CONV_TYPE, API_RUNTIME),
|
||||
),
|
||||
(
|
||||
"CU_FUNC_ATTRIBUTE_CONST_SIZE_BYTES",
|
||||
("hipFuncAttributeConstSizeBytes", CONV_TYPE, API_DRIVER, HIP_UNSUPPORTED),
|
||||
@ -2920,7 +2916,6 @@ CUDA_IDENTIFIER_MAP = collections.OrderedDict(
|
||||
("cuEventQuery", ("hipEventQuery", CONV_EVENT, API_DRIVER)),
|
||||
("cuEventRecord", ("hipEventRecord", CONV_EVENT, API_DRIVER)),
|
||||
("cuEventSynchronize", ("hipEventSynchronize", CONV_EVENT, API_DRIVER)),
|
||||
("cuFuncSetAttribute", ("hipFuncSetAttribute", CONV_EVENT, API_DRIVER)),
|
||||
(
|
||||
"cuFuncGetAttribute",
|
||||
("hipFuncGetAttribute", CONV_MODULE, API_DRIVER, HIP_UNSUPPORTED),
|
||||
|
||||
Reference in New Issue
Block a user