[CI][docker] Use install_cusparselt when possible in docker image (#150600)

spot checked builds for line like `Found CUSPARSELT: /usr/local/cuda/lib64/libcusparseLt.so`.  I don't know if there's another way to do it

I am slowly trying to reduce the duplicated code in docker image installs
Pros:
* less dup code

Cons:
* more docker copies
Pull Request resolved: https://github.com/pytorch/pytorch/pull/150600
Approved by: https://github.com/atalman
This commit is contained in:
Catherine Lee
2025-04-24 18:52:10 +00:00
committed by PyTorch MergeBot
parent ff075d0815
commit b11c9e1808
9 changed files with 17 additions and 54 deletions

View File

@ -46,6 +46,7 @@ RUN rm -rf /usr/local/cuda-*
ADD ./common/install_cuda.sh install_cuda.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
COPY ./common/install_cusparselt.sh install_cusparselt.sh
ENV CUDA_HOME=/usr/local/cuda-${CUDA_VERSION}
# Preserve CUDA_VERSION for the builds
ENV CUDA_VERSION=${CUDA_VERSION}

View File

@ -4,39 +4,6 @@ set -ex
CUDNN_VERSION=9.5.1.17
function install_cusparselt_040 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.4.0.7-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.4.0.7-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_cusparselt_062 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.6.2.3-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.2.3-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_118 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 11.8 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.4.0"
@ -59,7 +26,7 @@ function install_118 {
CUDA_VERSION=11.8 bash install_nccl.sh
install_cusparselt_040
CUDA_VERSION=11.8 bash install_cusparselt.sh
ldconfig
}
@ -86,7 +53,7 @@ function install_124 {
CUDA_VERSION=12.4 bash install_nccl.sh
install_cusparselt_062
CUDA_VERSION=12.4 bash install_cusparselt.sh
ldconfig
}
@ -112,7 +79,7 @@ function install_126 {
CUDA_VERSION=12.6 bash install_nccl.sh
install_cusparselt_063
CUDA_VERSION=12.6 bash install_cusparselt.sh
ldconfig
}
@ -238,7 +205,7 @@ function install_128 {
CUDA_VERSION=12.8 bash install_nccl.sh
install_cusparselt_063
CUDA_VERSION=12.8 bash install_cusparselt.sh
ldconfig
}

View File

@ -5,17 +5,6 @@ set -ex
CUDNN_VERSION=9.8.0.87
function install_cusparselt_063 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-sbsa/libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz
tar xf libcusparse_lt-linux-sbsa-0.6.3.2-archive.tar.xz
cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-sbsa-0.6.3.2-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_128 {
echo "Installing CUDA 12.8.0 and cuDNN ${CUDNN_VERSION} and NCCL and cuSparseLt-0.6.3"
rm -rf /usr/local/cuda-12.8 /usr/local/cuda
@ -37,7 +26,7 @@ function install_128 {
CUDA_VERSION=12.8 bash install_nccl.sh
install_cusparselt_063
CUDA_VERSION=12.8 bash install_cusparselt.sh
ldconfig
}

View File

@ -51,6 +51,7 @@ ADD ./common/install_cuda.sh install_cuda.sh
ADD ./common/install_magma.sh install_magma.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
COPY ./common/install_cusparselt.sh install_cusparselt.sh
ENV CUDA_HOME /usr/local/cuda
FROM cuda as cuda11.8

View File

@ -32,7 +32,8 @@ ARG CUDA_VERSION
COPY ./common/install_cuda.sh install_cuda.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
COPY ./common/install_cusparselt.sh install_cusparselt.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh
ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH

View File

@ -66,7 +66,8 @@ ARG BASE_CUDA_VERSION=10.2
ADD ./common/install_cuda.sh install_cuda.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
COPY ./common/install_cusparselt.sh install_cusparselt.sh
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh
FROM base as intel
# MKL

View File

@ -38,7 +38,8 @@ ARG BASE_CUDA_VERSION=11.8
ADD ./common/install_cuda.sh install_cuda.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu*
COPY ./common/install_cusparselt.sh install_cusparselt.sh
RUN bash ./install_cuda.sh ${BASE_CUDA_VERSION} && rm install_cuda.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh
FROM base as intel
# MKL

View File

@ -68,8 +68,9 @@ ARG BASE_CUDA_VERSION
# Install CUDA
ADD ./common/install_cuda_aarch64.sh install_cuda_aarch64.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./common/install_cusparselt.sh install_cusparselt.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh install_nccl.sh ci_commit_pins/nccl-cu*
RUN bash ./install_cuda_aarch64.sh ${BASE_CUDA_VERSION} && rm install_cuda_aarch64.sh install_nccl.sh ci_commit_pins/nccl-cu* install_cusparselt.sh
FROM base as magma
ARG BASE_CUDA_VERSION

View File

@ -54,7 +54,8 @@ ARG CUDA_VERSION
COPY ./common/install_cuda.sh install_cuda.sh
COPY ./common/install_nccl.sh install_nccl.sh
COPY ./ci_commit_pins/nccl-cu* /ci_commit_pins/
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu*
COPY ./common/install_cusparselt.sh install_cusparselt.sh
RUN bash ./install_cuda.sh ${CUDA_VERSION} && rm install_cuda.sh install_nccl.sh /ci_commit_pins/nccl-cu* install_cusparselt.sh
ENV DESIRED_CUDA ${CUDA_VERSION}
ENV PATH /usr/local/nvidia/bin:/usr/local/cuda/bin:$PATH
# No effect if cuda not installed