[CI][CUDA][cuSPARSELt] cusparselt 0.6.3 and cu121 related cleanups (#145793)

Make ci cusparselt installation be consistent with nightly binary
Remove cu121 related docker build jobs and inductor runs Update test failures relating to cu121

Retry of https://github.com/pytorch/pytorch/pull/145696
Pull Request resolved: https://github.com/pytorch/pytorch/pull/145793
Approved by: https://github.com/eqy, https://github.com/tinglvv
This commit is contained in:
Wei Wang
2025-01-28 21:01:55 +00:00
committed by PyTorch MergeBot
parent ccc2878c97
commit 6bcb545d9c
11 changed files with 33 additions and 222 deletions

View File

@ -105,20 +105,6 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
@ -134,36 +120,6 @@ case "$image" in
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.12
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
INDUCTOR_BENCHMARKS=yes
;;
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks)
CUDA_VERSION=12.4.1
CUDNN_VERSION=9
@ -208,20 +164,6 @@ case "$image" in
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9)
CUDA_VERSION=12.1.1
CUDNN_VERSION=9
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
PROTOBUF=yes
DB=yes
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
CONDA_CMAKE=yes
TRITON=yes
;;
pytorch-linux-focal-py3-clang10-onnx)
ANACONDA_PYTHON_VERSION=3.9
CLANG_VERSION=10

View File

@ -16,17 +16,6 @@ function install_cusparselt_040 {
rm -rf tmp_cusparselt
}
function install_cusparselt_052 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
wget -q https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
tar xf libcusparse_lt-linux-x86_64-0.5.2.1-archive.tar.xz
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/include/* /usr/local/cuda/include/
cp -a libcusparse_lt-linux-x86_64-0.5.2.1-archive/lib/* /usr/local/cuda/lib64/
popd
rm -rf tmp_cusparselt
}
function install_cusparselt_062 {
# cuSparseLt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && pushd tmp_cusparselt
@ -83,39 +72,6 @@ function install_118 {
ldconfig
}
function install_121 {
echo "Installing CUDA 12.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.5.2"
rm -rf /usr/local/cuda-12.1 /usr/local/cuda
# install CUDA 12.1.0 in the same container
wget -q https://developer.download.nvidia.com/compute/cuda/12.1.1/local_installers/cuda_12.1.1_530.30.02_linux.run
chmod +x cuda_12.1.1_530.30.02_linux.run
./cuda_12.1.1_530.30.02_linux.run --toolkit --silent
rm -f cuda_12.1.1_530.30.02_linux.run
rm -f /usr/local/cuda && ln -s /usr/local/cuda-12.1 /usr/local/cuda
# cuDNN license: https://developer.nvidia.com/cudnn/license_agreement
mkdir tmp_cudnn && cd tmp_cudnn
wget -q https://developer.download.nvidia.com/compute/cudnn/redist/cudnn/linux-x86_64/cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz -O cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
tar xf cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive.tar.xz
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/include/* /usr/local/cuda/include/
cp -a cudnn-linux-x86_64-${CUDNN_VERSION}_cuda12-archive/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf tmp_cudnn
# NCCL license: https://docs.nvidia.com/deeplearning/nccl/#licenses
# Follow build: https://github.com/NVIDIA/nccl/tree/master?tab=readme-ov-file#build
git clone -b $NCCL_VERSION --depth 1 https://github.com/NVIDIA/nccl.git
cd nccl && make -j src.build
cp -a build/include/* /usr/local/cuda/include/
cp -a build/lib/* /usr/local/cuda/lib64/
cd ..
rm -rf nccl
install_cusparselt_052
ldconfig
}
function install_124 {
CUDNN_VERSION=9.1.0.70
echo "Installing CUDA 12.4.1 and cuDNN ${CUDNN_VERSION} and NCCL ${NCCL_VERSION} and cuSparseLt-0.6.2"
@ -214,37 +170,6 @@ function prune_118 {
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2022.3.0 $CUDA_BASE/nsight-systems-2022.4.2/
}
function prune_121 {
echo "Pruning CUDA 12.1"
#####################################################################################
# CUDA 12.1 prune static libs
#####################################################################################
export NVPRUNE="/usr/local/cuda-12.1/bin/nvprune"
export CUDA_LIB_DIR="/usr/local/cuda-12.1/lib64"
export GENCODE="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
export GENCODE_CUDNN="-gencode arch=compute_50,code=sm_50 -gencode arch=compute_60,code=sm_60 -gencode arch=compute_61,code=sm_61 -gencode arch=compute_70,code=sm_70 -gencode arch=compute_75,code=sm_75 -gencode arch=compute_80,code=sm_80 -gencode arch=compute_86,code=sm_86 -gencode arch=compute_90,code=sm_90"
if [[ -n "$OVERRIDE_GENCODE" ]]; then
export GENCODE=$OVERRIDE_GENCODE
fi
# all CUDA libs except CuDNN and CuBLAS
ls $CUDA_LIB_DIR/ | grep "\.a" | grep -v "culibos" | grep -v "cudart" | grep -v "cudnn" | grep -v "cublas" | grep -v "metis" \
| xargs -I {} bash -c \
"echo {} && $NVPRUNE $GENCODE $CUDA_LIB_DIR/{} -o $CUDA_LIB_DIR/{}"
# prune CuDNN and CuBLAS
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublas_static.a -o $CUDA_LIB_DIR/libcublas_static.a
$NVPRUNE $GENCODE_CUDNN $CUDA_LIB_DIR/libcublasLt_static.a -o $CUDA_LIB_DIR/libcublasLt_static.a
#####################################################################################
# CUDA 12.1 prune visual tools
#####################################################################################
export CUDA_BASE="/usr/local/cuda-12.1/"
rm -rf $CUDA_BASE/libnvvp $CUDA_BASE/nsightee_plugins $CUDA_BASE/nsight-compute-2023.1.0 $CUDA_BASE/nsight-systems-2023.1.2/
}
function prune_124 {
echo "Pruning CUDA 12.4"
#####################################################################################

View File

@ -57,7 +57,7 @@ function install_124 {
cd ..
rm -rf nccl
install_cusparselt_062
install_cusparselt_063
ldconfig
}

View File

@ -5,7 +5,15 @@ set -ex
# cuSPARSELt license: https://docs.nvidia.com/cuda/cusparselt/license.html
mkdir tmp_cusparselt && cd tmp_cusparselt
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
if [[ ${CUDA_VERSION:0:4} =~ ^12\.[5-8]$ ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
arch_path='x86_64'
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.3.2-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "12.4" ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
@ -13,17 +21,11 @@ if [[ ${CUDA_VERSION:0:4} =~ ^12\.[2-6]$ ]]; then
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.6.2.3-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "12.1" ]]; then
arch_path='sbsa'
export TARGETARCH=${TARGETARCH:-$(uname -m)}
if [ ${TARGETARCH} = 'amd64' ] || [ "${TARGETARCH}" = 'x86_64' ]; then
arch_path='x86_64'
fi
CUSPARSELT_NAME="libcusparse_lt-linux-${arch_path}-0.5.2.1-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-${arch_path}/${CUSPARSELT_NAME}.tar.xz
elif [[ ${CUDA_VERSION:0:4} == "11.8" ]]; then
CUSPARSELT_NAME="libcusparse_lt-linux-x86_64-0.4.0.7-archive"
curl --retry 3 -OLs https://developer.download.nvidia.com/compute/cusparselt/redist/libcusparse_lt/linux-x86_64/${CUSPARSELT_NAME}.tar.xz
else
echo "Not sure which libcusparselt version to install for this ${CUDA_VERSION}"
fi
tar xf ${CUSPARSELT_NAME}.tar.xz

View File

@ -102,30 +102,6 @@ MOCKED_DISABLED_UNSTABLE_JOBS = {
"manywheel-py3_8-cuda11_8-build",
"",
],
"inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor)": [
"pytorchbot",
"107079",
"https://github.com/pytorch/pytorch/issues/107079",
"inductor",
"cuda12.1-py3.10-gcc9-sm86",
"test (inductor)",
],
"inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface)": [
"pytorchbot",
"109153",
"https://github.com/pytorch/pytorch/issues/109153",
"inductor",
"cuda12.1-py3.10-gcc9-sm86",
"test (inductor_huggingface)",
],
"inductor / cuda12.1-py3.10-gcc9-sm86 / test (inductor_huggingface_dynamic)": [
"pytorchbot",
"109154",
"https://github.com/pytorch/pytorch/issues/109154",
"inductor",
"cuda12.1-py3.10-gcc9-sm86",
"test (inductor_huggingface_dynamic)",
],
}
MOCKED_PR_INFO = {
@ -637,37 +613,6 @@ class TestConfigFilter(TestCase):
"expected": '{"include": [{"config": "default", "unstable": "unstable"}]}',
"description": "Both binary build and test jobs are unstable",
},
{
"workflow": "inductor",
"job_name": "cuda12.1-py3.10-gcc9-sm86 / build",
"test_matrix": """
{ include: [
{ config: "inductor" },
{ config: "inductor_huggingface", shard: 1 },
{ config: "inductor_huggingface", shard: 2 },
{ config: "inductor_timm", shard: 1 },
{ config: "inductor_timm", shard: 2 },
{ config: "inductor_torchbench" },
{ config: "inductor_huggingface_dynamic" },
{ config: "inductor_torchbench_dynamic" },
{ config: "inductor_distributed" },
]}
""",
"expected": """
{ "include": [
{ "config": "inductor", "unstable": "unstable" },
{ "config": "inductor_huggingface", "shard": 1, "unstable": "unstable" },
{ "config": "inductor_huggingface", "shard": 2, "unstable": "unstable" },
{ "config": "inductor_timm", "shard": 1 },
{ "config": "inductor_timm", "shard": 2 },
{ "config": "inductor_torchbench" },
{ "config": "inductor_huggingface_dynamic", "unstable": "unstable" },
{ "config": "inductor_torchbench_dynamic" },
{ "config": "inductor_distributed" }
]}
""",
"description": "Marking multiple unstable configurations",
},
]
for case in testcases:

View File

@ -535,8 +535,8 @@ class TestTryMerge(TestCase):
def test_remove_job_name_suffix(self, *args: Any) -> None:
test_cases = [
{
"name": "linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default, 1, 5, linux.g5.4xlarge.nvidia.gpu)",
"expected": "linux-bionic-cuda12.1-py3.10-gcc9-sm86 / test (default)",
"name": "linux-bionic-cuda12.6-py3.10-gcc9-sm86 / test (default, 1, 5, linux.g5.4xlarge.nvidia.gpu)",
"expected": "linux-bionic-cuda12.6-py3.10-gcc9-sm86 / test (default)",
},
{
"name": "android-emulator-build-test / build-and-test (default, 1, 1, ubuntu-20.04-16x)",

View File

@ -52,9 +52,6 @@ jobs:
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.4-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9,
pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.1-cudnn9-py3.12-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda12.4-cudnn9-py3.13-gcc9-inductor-benchmarks,
pytorch-linux-focal-cuda11.8-cudnn9-py3-gcc9,
pytorch-linux-focal-py3.9-clang10,

View File

@ -37,7 +37,7 @@ jobs:
id: calculate-docker-image
uses: pytorch/test-infra/.github/actions/calculate-docker-image@main
with:
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9
working-directory: pytorch
- name: Use following to pull public copy of the image

View File

@ -21,15 +21,15 @@ jobs:
curr_branch: ${{ github.head_ref || github.ref_name }}
curr_ref_type: ${{ github.ref_type }}
linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp:
name: cuda12.1-py3.10-gcc9-sm80
linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-build.yml
needs:
- get-default-label-prefix
with:
runner_prefix: "${{ needs.get-default-label-prefix.outputs.label-type }}"
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
test-matrix: |
{ include: [
@ -37,12 +37,12 @@ jobs:
]}
secrets: inherit
linux-focal-cuda12_1-py3_10-gcc9-torchbench-test-gcp:
name: cuda12.1-py3.10-gcc9-sm80
linux-focal-cuda12_4-py3_10-gcc9-torchbench-test-gcp:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-test.yml
needs: linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp
needs: linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_1-py3_10-gcc9-torchbench-build-gcp.outputs.test-matrix }}
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp.outputs.docker-image }}
test-matrix: ${{ needs.linux-focal-cuda12_4-py3_10-gcc9-torchbench-build-gcp.outputs.test-matrix }}
secrets: inherit

View File

@ -237,12 +237,12 @@ jobs:
secrets: inherit
# NB: Keep this in sync with inductor-perf-test-nightly.yml
linux-focal-cuda12_1-py3_10-gcc9-inductor-build:
name: cuda12.1-py3.10-gcc9-sm80
linux-focal-cuda12_4-py3_10-gcc9-inductor-build:
name: cuda12.4-py3.10-gcc9-sm80
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-environment: linux-focal-cuda12.1-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.1-cudnn9-py3-gcc9-inductor-benchmarks
build-environment: linux-focal-cuda12.4-py3.10-gcc9-sm80
docker-image-name: pytorch-linux-focal-cuda12.4-cudnn9-py3-gcc9-inductor-benchmarks
cuda-arch-list: '8.0'
secrets: inherit

View File

@ -1209,12 +1209,12 @@ class TestSparseSemiStructuredCUSPARSELT(TestCase):
# CUDA 11.8 has cuSPARSELt v0.4.0 support
if version == (11, 8):
assert torch.backends.cusparselt.version() == 400
# CUDA 12.1 has cuSPARSELt v0.5.2 support
elif version == (12, 1):
assert torch.backends.cusparselt.version() == 502
# CUDA 12.4+ has cuSPARSELt v0.6.2 support
# PyTorch CUDA 12.4 using cuSPARSELt v0.6.2
elif version >= (12, 4):
assert torch.backends.cusparselt.version() == 602
# PyTorch CUDA 12.6+ using cuSPARSELt v0.6.3
elif version >= (12, 6):
assert torch.backends.cusparselt.version() == 603
else:
assert torch.backends.cusparselt.version() is None