mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 12:54:11 +08:00
[ROCm][Inductor][CK backend] Install rocm-composable-kernel python package on ROCm Linux CI docker images (#162288)
Reopened from #158747 which got reverted since without setuptools-scm in pytorch index URL the wheel cannot be built We reconsider the original PR idea of introducing CK as a pytorch dependency on ROCm Linux and install the CK python package in CI only -- since (1) rocm-composable-kernel depends on setuptools-scm which depends on tomli and the existing index URLs need to be modified to host the new packages and (2) there also is a packaging [bug](https://github.com/pypa/setuptools/issues/3269#issuecomment-1254507377) in Ubuntu 22.04 which prevents correct dynamic version calculation with default system pip. Extras: -> this PR reconsiders how TORCHINDUCTOR_CK_DIR env variable is used; previously, this var was used to point to rocm-composable-kernel package installation path on the filesystem; now, the path is inferred by trying to import ck4inductor -> the tests are updated to reflect this change -> since in CI clang points to a bash script which invokes sccache, we cannot patch PATH to not contain sccache, this logic is removed from the testing code -> scaled_mm test crashes during the benchmarking when the benchmarking happens in the main process, and times out benchmarking when it happens in a subprocess, on gfx942, so it is disabled TBD: roll back rocm-mi300 workflow before merging Pull Request resolved: https://github.com/pytorch/pytorch/pull/162288 Approved by: https://github.com/jeffdaily
This commit is contained in:
committed by
PyTorch MergeBot
parent
5f630d28d7
commit
ab0694f1c6
@ -56,9 +56,13 @@ ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# Install rocm
|
||||
ARG ROCM_VERSION
|
||||
RUN mkdir ci_commit_pins
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
|
||||
COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
RUN rm install_rocm.sh common_utils.sh
|
||||
RUN rm -r ci_commit_pins
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||
RUN rm install_rocm_magma.sh
|
||||
|
1
.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
Normal file
1
.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
Normal file
@ -0,0 +1 @@
|
||||
7fe50dc3da2069d6645d9deb8c017a876472a977
|
@ -2,6 +2,11 @@
|
||||
|
||||
set -ex
|
||||
|
||||
# for pip_install function
|
||||
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||
|
||||
ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)"
|
||||
|
||||
ver() {
|
||||
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
|
||||
}
|
||||
@ -113,6 +118,8 @@ EOF
|
||||
rm -rf HIP clr
|
||||
fi
|
||||
|
||||
pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
|
||||
|
||||
# Cleanup
|
||||
apt-get autoclean && apt-get clean
|
||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||
@ -176,6 +183,8 @@ install_centos() {
|
||||
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
|
||||
done
|
||||
|
||||
pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
|
||||
|
||||
# Cleanup
|
||||
yum clean all
|
||||
rm -rf /var/cache/yum
|
||||
|
@ -52,9 +52,13 @@ ENV INSTALLED_VISION ${VISION}
|
||||
|
||||
# Install rocm
|
||||
ARG ROCM_VERSION
|
||||
RUN mkdir ci_commit_pins
|
||||
COPY ./common/common_utils.sh common_utils.sh
|
||||
COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
|
||||
COPY ./common/install_rocm.sh install_rocm.sh
|
||||
RUN bash ./install_rocm.sh
|
||||
RUN rm install_rocm.sh
|
||||
RUN rm install_rocm.sh common_utils.sh
|
||||
RUN rm -r ci_commit_pins
|
||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||
RUN rm install_rocm_magma.sh
|
||||
|
1
.github/workflows/rocm-mi300.yml
vendored
1
.github/workflows/rocm-mi300.yml
vendored
@ -70,4 +70,5 @@ jobs:
|
||||
build-environment: linux-noble-rocm-py3.12-mi300
|
||||
docker-image: ${{ needs.linux-noble-rocm-py3_12-build.outputs.docker-image }}
|
||||
test-matrix: ${{ needs.linux-noble-rocm-py3_12-build.outputs.test-matrix }}
|
||||
tests-to-include: "inductor/test_ck_backend"
|
||||
secrets: inherit
|
||||
|
@ -1,5 +1,4 @@
|
||||
# Owner(s): ["module: inductor"]
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import unittest
|
||||
@ -13,6 +12,7 @@ except ImportError:
|
||||
import torch
|
||||
from torch._inductor import config
|
||||
from torch._inductor.test_case import run_tests, TestCase
|
||||
from torch._inductor.utils import try_import_ck_lib
|
||||
from torch.testing._internal.common_cuda import tf32_off
|
||||
from torch.testing._internal.common_utils import (
|
||||
instantiate_parametrized_tests,
|
||||
@ -32,20 +32,8 @@ if HAS_CUDA_AND_TRITON:
|
||||
log = logging.getLogger(__name__)
|
||||
|
||||
|
||||
@functools.lru_cache(None)
|
||||
def _get_path_without_sccache() -> str:
|
||||
"""
|
||||
Get the PATH environment variable without sccache.
|
||||
"""
|
||||
path_envs = os.environ.get("PATH", "").split(":")
|
||||
path_envs = [env for env in path_envs if "/opt/cache/bin" not in env]
|
||||
return ":".join(path_envs)
|
||||
|
||||
|
||||
_test_env = {
|
||||
"PATH": _get_path_without_sccache(),
|
||||
"DISABLE_SCCACHE": "1",
|
||||
}
|
||||
# patch env for tests if needed
|
||||
_test_env = {}
|
||||
|
||||
|
||||
@instantiate_parametrized_tests
|
||||
@ -61,13 +49,10 @@ class TestCKBackend(TestCase):
|
||||
)
|
||||
|
||||
torch.random.manual_seed(1234)
|
||||
try:
|
||||
import ck4inductor # @manual
|
||||
|
||||
self.ck_dir = os.path.dirname(ck4inductor.__file__)
|
||||
os.environ["TORCHINDUCTOR_CK_DIR"] = self.ck_dir
|
||||
except ImportError as e:
|
||||
raise unittest.SkipTest("Composable Kernel library not installed") from e
|
||||
self.ck_dir, _, _, _ = try_import_ck_lib()
|
||||
if not self.ck_dir:
|
||||
raise unittest.SkipTest("Composable Kernel library is not installed")
|
||||
|
||||
try:
|
||||
os.environ["INDUCTOR_TEST_DISABLE_FRESH_CACHE"] = "1"
|
||||
@ -288,6 +273,9 @@ class TestCKBackend(TestCase):
|
||||
|
||||
torch.testing.assert_close(Y_compiled, Y_eager)
|
||||
|
||||
@unittest.skip(
|
||||
"FIXME(tenpercent): kernel compilation errors on gfx942 as of 09/01/25"
|
||||
)
|
||||
@unittest.skipIf(not torch.version.hip, "ROCM only")
|
||||
@unittest.mock.patch.dict(os.environ, _test_env)
|
||||
@parametrize("max_autotune_gemm_backends", ("CK", "ATen,Triton,CK"))
|
||||
|
@ -4,7 +4,7 @@ import os
|
||||
from typing import Optional
|
||||
|
||||
from torch._inductor import config
|
||||
from torch._inductor.utils import is_linux
|
||||
from torch._inductor.utils import is_linux, try_import_ck_lib
|
||||
|
||||
|
||||
log = logging.getLogger(__name__)
|
||||
@ -18,18 +18,23 @@ def _rocm_include_paths(dst_file_ext: str) -> list[str]:
|
||||
if config.rocm.rocm_home
|
||||
else cpp_extension._join_rocm_home("include")
|
||||
)
|
||||
if not config.rocm.ck_dir:
|
||||
log.warning("Unspecified Composable Kernel include dir")
|
||||
|
||||
if config.is_fbcode():
|
||||
from libfb.py import parutil
|
||||
|
||||
ck_path = parutil.get_dir_path("composable-kernel-headers")
|
||||
else:
|
||||
if not config.rocm.ck_dir:
|
||||
ck_dir, _, _, _ = try_import_ck_lib()
|
||||
if not ck_dir:
|
||||
log.warning("Unspecified Composable Kernel directory")
|
||||
config.rocm.ck_dir = ck_dir
|
||||
ck_path = config.rocm.ck_dir or cpp_extension._join_rocm_home(
|
||||
"composable_kernel"
|
||||
)
|
||||
|
||||
log.debug("Using ck path %s", ck_path)
|
||||
|
||||
ck_include = os.path.join(ck_path, "include")
|
||||
ck_library_include = os.path.join(ck_path, "library", "include")
|
||||
|
||||
|
@ -1985,16 +1985,7 @@ def use_ck_template(layout: Layout) -> bool:
|
||||
log.warning("Please pip install Composable Kernel package")
|
||||
return False
|
||||
|
||||
if config.is_fbcode():
|
||||
config.rocm.ck_dir = ck_package_dirname
|
||||
|
||||
if not config.rocm.ck_dir:
|
||||
log.warning("Please set TORCHINDUCTOR_CK_DIR env variable")
|
||||
return False
|
||||
|
||||
if ck_package_dirname != config.rocm.ck_dir:
|
||||
log.warning("Invalid path to CK library")
|
||||
return False
|
||||
config.rocm.ck_dir = ck_package_dirname
|
||||
|
||||
return True
|
||||
|
||||
|
Reference in New Issue
Block a user