mirror of
https://github.com/pytorch/pytorch.git
synced 2025-10-20 21:14:14 +08:00
[ROCm][Inductor][CK backend] Install rocm-composable-kernel python package on ROCm Linux CI docker images (#162288)
Reopened from #158747 which got reverted since without setuptools-scm in pytorch index URL the wheel cannot be built We reconsider the original PR idea of introducing CK as a pytorch dependency on ROCm Linux and install the CK python package in CI only -- since (1) rocm-composable-kernel depends on setuptools-scm which depends on tomli and the existing index URLs need to be modified to host the new packages and (2) there also is a packaging [bug](https://github.com/pypa/setuptools/issues/3269#issuecomment-1254507377) in Ubuntu 22.04 which prevents correct dynamic version calculation with default system pip. Extras: -> this PR reconsiders how TORCHINDUCTOR_CK_DIR env variable is used; previously, this var was used to point to rocm-composable-kernel package installation path on the filesystem; now, the path is inferred by trying to import ck4inductor -> the tests are updated to reflect this change -> since in CI clang points to a bash script which invokes sccache, we cannot patch PATH to not contain sccache, this logic is removed from the testing code -> scaled_mm test crashes during the benchmarking when the benchmarking happens in the main process, and times out benchmarking when it happens in a subprocess, on gfx942, so it is disabled TBD: roll back rocm-mi300 workflow before merging Pull Request resolved: https://github.com/pytorch/pytorch/pull/162288 Approved by: https://github.com/jeffdaily
This commit is contained in:
committed by
PyTorch MergeBot
parent
5f630d28d7
commit
ab0694f1c6
@ -56,9 +56,13 @@ ENV INSTALLED_VISION ${VISION}
|
|||||||
|
|
||||||
# Install rocm
|
# Install rocm
|
||||||
ARG ROCM_VERSION
|
ARG ROCM_VERSION
|
||||||
|
RUN mkdir ci_commit_pins
|
||||||
|
COPY ./common/common_utils.sh common_utils.sh
|
||||||
|
COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
|
||||||
COPY ./common/install_rocm.sh install_rocm.sh
|
COPY ./common/install_rocm.sh install_rocm.sh
|
||||||
RUN bash ./install_rocm.sh
|
RUN bash ./install_rocm.sh
|
||||||
RUN rm install_rocm.sh
|
RUN rm install_rocm.sh common_utils.sh
|
||||||
|
RUN rm -r ci_commit_pins
|
||||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||||
RUN rm install_rocm_magma.sh
|
RUN rm install_rocm_magma.sh
|
||||||
|
|||||||
1
.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
Normal file
1
.ci/docker/ci_commit_pins/rocm-composable-kernel.txt
Normal file
@ -0,0 +1 @@
|
|||||||
|
7fe50dc3da2069d6645d9deb8c017a876472a977
|
||||||
@ -2,6 +2,11 @@
|
|||||||
|
|
||||||
set -ex
|
set -ex
|
||||||
|
|
||||||
|
# for pip_install function
|
||||||
|
source "$(dirname "${BASH_SOURCE[0]}")/common_utils.sh"
|
||||||
|
|
||||||
|
ROCM_COMPOSABLE_KERNEL_VERSION="$(cat $(dirname $0)/../ci_commit_pins/rocm-composable-kernel.txt)"
|
||||||
|
|
||||||
ver() {
|
ver() {
|
||||||
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
|
printf "%3d%03d%03d%03d" $(echo "$1" | tr '.' ' ');
|
||||||
}
|
}
|
||||||
@ -113,6 +118,8 @@ EOF
|
|||||||
rm -rf HIP clr
|
rm -rf HIP clr
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
apt-get autoclean && apt-get clean
|
apt-get autoclean && apt-get clean
|
||||||
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
|
||||||
@ -176,6 +183,8 @@ install_centos() {
|
|||||||
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
|
sqlite3 $kdb "PRAGMA journal_mode=off; PRAGMA VACUUM;"
|
||||||
done
|
done
|
||||||
|
|
||||||
|
pip_install "git+https://github.com/rocm/composable_kernel@$ROCM_COMPOSABLE_KERNEL_VERSION"
|
||||||
|
|
||||||
# Cleanup
|
# Cleanup
|
||||||
yum clean all
|
yum clean all
|
||||||
rm -rf /var/cache/yum
|
rm -rf /var/cache/yum
|
||||||
|
|||||||
@ -52,9 +52,13 @@ ENV INSTALLED_VISION ${VISION}
|
|||||||
|
|
||||||
# Install rocm
|
# Install rocm
|
||||||
ARG ROCM_VERSION
|
ARG ROCM_VERSION
|
||||||
|
RUN mkdir ci_commit_pins
|
||||||
|
COPY ./common/common_utils.sh common_utils.sh
|
||||||
|
COPY ./ci_commit_pins/rocm-composable-kernel.txt ci_commit_pins/rocm-composable-kernel.txt
|
||||||
COPY ./common/install_rocm.sh install_rocm.sh
|
COPY ./common/install_rocm.sh install_rocm.sh
|
||||||
RUN bash ./install_rocm.sh
|
RUN bash ./install_rocm.sh
|
||||||
RUN rm install_rocm.sh
|
RUN rm install_rocm.sh common_utils.sh
|
||||||
|
RUN rm -r ci_commit_pins
|
||||||
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
COPY ./common/install_rocm_magma.sh install_rocm_magma.sh
|
||||||
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
RUN bash ./install_rocm_magma.sh ${ROCM_VERSION}
|
||||||
RUN rm install_rocm_magma.sh
|
RUN rm install_rocm_magma.sh
|
||||||
|
|||||||
1
.github/workflows/rocm-mi300.yml
vendored
1
.github/workflows/rocm-mi300.yml
vendored
@ -70,4 +70,5 @@ jobs:
|
|||||||
build-environment: linux-noble-rocm-py3.12-mi300
|
build-environment: linux-noble-rocm-py3.12-mi300
|
||||||
docker-image: ${{ needs.linux-noble-rocm-py3_12-build.outputs.docker-image }}
|
docker-image: ${{ needs.linux-noble-rocm-py3_12-build.outputs.docker-image }}
|
||||||
test-matrix: ${{ needs.linux-noble-rocm-py3_12-build.outputs.test-matrix }}
|
test-matrix: ${{ needs.linux-noble-rocm-py3_12-build.outputs.test-matrix }}
|
||||||
|
tests-to-include: "inductor/test_ck_backend"
|
||||||
secrets: inherit
|
secrets: inherit
|
||||||
|
|||||||
@ -1,5 +1,4 @@
|
|||||||
# Owner(s): ["module: inductor"]
|
# Owner(s): ["module: inductor"]
|
||||||
import functools
|
|
||||||
import logging
|
import logging
|
||||||
import os
|
import os
|
||||||
import unittest
|
import unittest
|
||||||
@ -13,6 +12,7 @@ except ImportError:
|
|||||||
import torch
|
import torch
|
||||||
from torch._inductor import config
|
from torch._inductor import config
|
||||||
from torch._inductor.test_case import run_tests, TestCase
|
from torch._inductor.test_case import run_tests, TestCase
|
||||||
|
from torch._inductor.utils import try_import_ck_lib
|
||||||
from torch.testing._internal.common_cuda import tf32_off
|
from torch.testing._internal.common_cuda import tf32_off
|
||||||
from torch.testing._internal.common_utils import (
|
from torch.testing._internal.common_utils import (
|
||||||
instantiate_parametrized_tests,
|
instantiate_parametrized_tests,
|
||||||
@ -32,20 +32,8 @@ if HAS_CUDA_AND_TRITON:
|
|||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(None)
|
# patch env for tests if needed
|
||||||
def _get_path_without_sccache() -> str:
|
_test_env = {}
|
||||||
"""
|
|
||||||
Get the PATH environment variable without sccache.
|
|
||||||
"""
|
|
||||||
path_envs = os.environ.get("PATH", "").split(":")
|
|
||||||
path_envs = [env for env in path_envs if "/opt/cache/bin" not in env]
|
|
||||||
return ":".join(path_envs)
|
|
||||||
|
|
||||||
|
|
||||||
_test_env = {
|
|
||||||
"PATH": _get_path_without_sccache(),
|
|
||||||
"DISABLE_SCCACHE": "1",
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
@instantiate_parametrized_tests
|
@instantiate_parametrized_tests
|
||||||
@ -61,13 +49,10 @@ class TestCKBackend(TestCase):
|
|||||||
)
|
)
|
||||||
|
|
||||||
torch.random.manual_seed(1234)
|
torch.random.manual_seed(1234)
|
||||||
try:
|
|
||||||
import ck4inductor # @manual
|
|
||||||
|
|
||||||
self.ck_dir = os.path.dirname(ck4inductor.__file__)
|
self.ck_dir, _, _, _ = try_import_ck_lib()
|
||||||
os.environ["TORCHINDUCTOR_CK_DIR"] = self.ck_dir
|
if not self.ck_dir:
|
||||||
except ImportError as e:
|
raise unittest.SkipTest("Composable Kernel library is not installed")
|
||||||
raise unittest.SkipTest("Composable Kernel library not installed") from e
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
os.environ["INDUCTOR_TEST_DISABLE_FRESH_CACHE"] = "1"
|
os.environ["INDUCTOR_TEST_DISABLE_FRESH_CACHE"] = "1"
|
||||||
@ -288,6 +273,9 @@ class TestCKBackend(TestCase):
|
|||||||
|
|
||||||
torch.testing.assert_close(Y_compiled, Y_eager)
|
torch.testing.assert_close(Y_compiled, Y_eager)
|
||||||
|
|
||||||
|
@unittest.skip(
|
||||||
|
"FIXME(tenpercent): kernel compilation errors on gfx942 as of 09/01/25"
|
||||||
|
)
|
||||||
@unittest.skipIf(not torch.version.hip, "ROCM only")
|
@unittest.skipIf(not torch.version.hip, "ROCM only")
|
||||||
@unittest.mock.patch.dict(os.environ, _test_env)
|
@unittest.mock.patch.dict(os.environ, _test_env)
|
||||||
@parametrize("max_autotune_gemm_backends", ("CK", "ATen,Triton,CK"))
|
@parametrize("max_autotune_gemm_backends", ("CK", "ATen,Triton,CK"))
|
||||||
|
|||||||
@ -4,7 +4,7 @@ import os
|
|||||||
from typing import Optional
|
from typing import Optional
|
||||||
|
|
||||||
from torch._inductor import config
|
from torch._inductor import config
|
||||||
from torch._inductor.utils import is_linux
|
from torch._inductor.utils import is_linux, try_import_ck_lib
|
||||||
|
|
||||||
|
|
||||||
log = logging.getLogger(__name__)
|
log = logging.getLogger(__name__)
|
||||||
@ -18,18 +18,23 @@ def _rocm_include_paths(dst_file_ext: str) -> list[str]:
|
|||||||
if config.rocm.rocm_home
|
if config.rocm.rocm_home
|
||||||
else cpp_extension._join_rocm_home("include")
|
else cpp_extension._join_rocm_home("include")
|
||||||
)
|
)
|
||||||
if not config.rocm.ck_dir:
|
|
||||||
log.warning("Unspecified Composable Kernel include dir")
|
|
||||||
|
|
||||||
if config.is_fbcode():
|
if config.is_fbcode():
|
||||||
from libfb.py import parutil
|
from libfb.py import parutil
|
||||||
|
|
||||||
ck_path = parutil.get_dir_path("composable-kernel-headers")
|
ck_path = parutil.get_dir_path("composable-kernel-headers")
|
||||||
else:
|
else:
|
||||||
|
if not config.rocm.ck_dir:
|
||||||
|
ck_dir, _, _, _ = try_import_ck_lib()
|
||||||
|
if not ck_dir:
|
||||||
|
log.warning("Unspecified Composable Kernel directory")
|
||||||
|
config.rocm.ck_dir = ck_dir
|
||||||
ck_path = config.rocm.ck_dir or cpp_extension._join_rocm_home(
|
ck_path = config.rocm.ck_dir or cpp_extension._join_rocm_home(
|
||||||
"composable_kernel"
|
"composable_kernel"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
log.debug("Using ck path %s", ck_path)
|
||||||
|
|
||||||
ck_include = os.path.join(ck_path, "include")
|
ck_include = os.path.join(ck_path, "include")
|
||||||
ck_library_include = os.path.join(ck_path, "library", "include")
|
ck_library_include = os.path.join(ck_path, "library", "include")
|
||||||
|
|
||||||
|
|||||||
@ -1985,17 +1985,8 @@ def use_ck_template(layout: Layout) -> bool:
|
|||||||
log.warning("Please pip install Composable Kernel package")
|
log.warning("Please pip install Composable Kernel package")
|
||||||
return False
|
return False
|
||||||
|
|
||||||
if config.is_fbcode():
|
|
||||||
config.rocm.ck_dir = ck_package_dirname
|
config.rocm.ck_dir = ck_package_dirname
|
||||||
|
|
||||||
if not config.rocm.ck_dir:
|
|
||||||
log.warning("Please set TORCHINDUCTOR_CK_DIR env variable")
|
|
||||||
return False
|
|
||||||
|
|
||||||
if ck_package_dirname != config.rocm.ck_dir:
|
|
||||||
log.warning("Invalid path to CK library")
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user