Compare commits

...

6 Commits

Author SHA1 Message Date
60900338b5 fix cuda 13.0 cuda-arch-list to be 8.9 for L4 gpu test 2025-11-11 08:47:28 +00:00
7527228257 specify cuda_arch_list for 13.0 tests 2025-11-11 08:47:28 +00:00
cb75f06c20 also add the cusparse cmake flag 2025-11-11 08:47:28 +00:00
a06f2ed18e use 13.0.2 2025-11-11 08:47:28 +00:00
f6810ef181 add win build too 2025-11-11 08:47:28 +00:00
01e5c341b8 Add eager tests cuda 13.0 2025-11-11 08:47:28 +00:00
7 changed files with 173 additions and 1 deletions

View File

@ -116,7 +116,7 @@ case "$tag" in
INSTALL_MINGW=yes
;;
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11)
CUDA_VERSION=13.0.0
CUDA_VERSION=13.0.2
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=11
VISION=yes
@ -125,6 +125,16 @@ case "$tag" in
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
;;
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc9)
CUDA_VERSION=13.0.2
ANACONDA_PYTHON_VERSION=3.10
GCC_VERSION=9
VISION=yes
KATEX=yes
UCX_COMMIT=${_UCX_COMMIT}
UCC_COMMIT=${_UCC_COMMIT}
TRITON=yes
;;
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks)
CUDA_VERSION=12.8.1
ANACONDA_PYTHON_VERSION=3.10

View File

@ -54,6 +54,7 @@ jobs:
pytorch-linux-jammy-cuda12.8-cudnn9-py3.12-gcc11-vllm,
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9-inductor-benchmarks,
pytorch-linux-jammy-cuda12.8-cudnn9-py3-gcc9,
pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc9,
pytorch-linux-jammy-cuda12.4-cudnn9-py3-gcc11,
pytorch-linux-jammy-py3.10-clang12,
pytorch-linux-jammy-py3.11-clang12,

View File

@ -204,6 +204,39 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc9-debug-build:
name: linux-jammy-cuda13.0-py3.10-gcc9-debug
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda13.0-py3.10-gcc9-debug
docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc9
cuda-arch-list: 8.6
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 2, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 3, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 4, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 5, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 6, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
{ config: "default", shard: 7, num_shards: 7, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu", owners: ["oncall:debug-build"] },
]}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc9-debug-test:
name: linux-jammy-cuda13.0-py3.10-gcc9-debug
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda13_0-py3_10-gcc9-debug-build
- target-determination
with:
build-environment: linux-jammy-cuda13.0-py3.10-gcc9-debug
docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc9-debug-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc9-debug-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cuda12_8-py3-gcc11-slow-gradcheck-build:
name: linux-jammy-cuda12.8-py3-gcc11-slow-gradcheck
uses: ./.github/workflows/_linux-build.yml

View File

@ -268,6 +268,35 @@ jobs:
]}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc9-build:
name: linux-jammy-cuda13.0-py3.10-gcc9
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda13.0-py3.10-gcc9
docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc9
cuda-arch-list: 8.9
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
]}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc9-test:
name: linux-jammy-cuda13.0-py3.10-gcc9
uses: ./.github/workflows/_linux-test.yml
needs: linux-jammy-cuda13_0-py3_10-gcc9-build
with:
build-environment: linux-jammy-cuda13.0-py3.10-gcc9
docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc9-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc9-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cpu-py3_10-gcc11-bazel-test:
name: linux-jammy-cpu-py3.10-gcc11-bazel-test
uses: ./.github/workflows/_bazel-build-test.yml

View File

@ -78,6 +78,35 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-sm86-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc11-sm86-build:
name: linux-jammy-cuda13.0-py3.10-gcc11-sm86
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda13.0-py3.10-gcc11-sm86
docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11
cuda-arch-list: 8.6
test-matrix: |
{ include: [
{ config: "slow", shard: 1, num_shards: 3, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "slow", shard: 2, num_shards: 3, runner: "linux.g5.4xlarge.nvidia.gpu" },
{ config: "slow", shard: 3, num_shards: 3, runner: "linux.g5.4xlarge.nvidia.gpu" },
]}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc11-sm86-test:
name: linux-jammy-cuda13.0-py3.10-gcc11-sm86
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda13_0-py3_10-gcc11-sm86-build
- target-determination
with:
build-environment: linux-jammy-cuda13.0-py3.10-gcc11-sm86
docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-sm86-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-sm86-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-py3_10-clang12-build:
name: linux-jammy-py3.10-clang12
uses: ./.github/workflows/_linux-build.yml

View File

@ -63,6 +63,23 @@ jobs:
]}
secrets: inherit
libtorch-linux-jammy-cuda13_0-py3_10-gcc11-debug-build:
name: libtorch-linux-jammy-cuda13.0-py3.10-gcc11-debug
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
build-environment: libtorch-linux-jammy-cuda13.0-py3.10-gcc11
cuda-arch-list: '7.5 8.9'
docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11
build-generates-artifacts: false
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
runner: "linux.c7i.4xlarge"
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
secrets: inherit
linux-jammy-cuda12_8-py3_10-gcc11-build:
name: linux-jammy-cuda12.8-py3.10-gcc11
uses: ./.github/workflows/_linux-build.yml
@ -99,6 +116,41 @@ jobs:
test-matrix: ${{ needs.linux-jammy-cuda12_8-py3_10-gcc11-build.outputs.test-matrix }}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc11-build:
name: linux-jammy-cuda13.0-py3.10-gcc11
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda13.0-py3.10-gcc11
docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11
cuda-arch-list: '7.5 8.9'
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 2, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 3, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 4, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "default", shard: 5, num_shards: 5, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g6.4xlarge.experimental.nvidia.gpu" },
{ config: "distributed", shard: 1, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
{ config: "distributed", shard: 2, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
{ config: "distributed", shard: 3, num_shards: 3, runner: "${{ needs.get-label-type.outputs.label-type }}linux.g4dn.12xlarge.nvidia.gpu" },
{ config: "pr_time_benchmarks", shard: 1, num_shards: 1, runner: "linux.g4dn.metal.nvidia.gpu" },
]}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc11-test:
name: linux-jammy-cuda13.0-py3.10-gcc11
uses: ./.github/workflows/_linux-test.yml
needs:
- linux-jammy-cuda13_0-py3_10-gcc11-build
- target-determination
with:
timeout-minutes: 360
build-environment: linux-jammy-cuda13.0-py3.10-gcc11
docker-image: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.docker-image }}
test-matrix: ${{ needs.linux-jammy-cuda13_0-py3_10-gcc11-build.outputs.test-matrix }}
secrets: inherit
# no-ops builds test USE_PER_OPERATOR_HEADERS=0 where ATen/ops is not generated
linux-jammy-cuda12_8-py3_10-gcc11-no-ops-build:
@ -115,6 +167,21 @@ jobs:
]}
secrets: inherit
linux-jammy-cuda13_0-py3_10-gcc11-no-ops-build:
name: linux-jammy-cuda13.0-py3.10-gcc11-no-ops
uses: ./.github/workflows/_linux-build.yml
needs: get-label-type
with:
runner_prefix: "${{ needs.get-label-type.outputs.label-type }}"
build-environment: linux-jammy-cuda13.0-py3.10-gcc11-no-ops
cuda-arch-list: '7.5 8.9'
docker-image-name: ci-image:pytorch-linux-jammy-cuda13.0-cudnn9-py3-gcc11
test-matrix: |
{ include: [
{ config: "default", shard: 1, num_shards: 1 },
]}
secrets: inherit
macos-py3-arm64-build:
if: github.repository_owner == 'pytorch'
name: macos-py3-arm64

View File

@ -1394,6 +1394,9 @@ if(NOT INTERN_BUILD_MOBILE)
# https://github.com/pytorch/pytorch/pull/55292
string(APPEND CMAKE_CUDA_FLAGS " -DCUB_WRAPPED_NAMESPACE=at_cuda_detail")
# Suppress cusparse warnings
string(APPEND CMAKE_CUDA_FLAGS " -DDISABLE_CUSPARSE_DEPRECATED")
message(STATUS "Found CUDA with FP16 support, compiling with torch.cuda.HalfTensor")
string(APPEND CMAKE_CUDA_FLAGS " -DCUDA_HAS_FP16=1"
" -D__CUDA_NO_HALF_OPERATORS__"